@wix/evalforge-evaluator 0.202.0 → 0.204.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -2412,9 +2412,9 @@ var require_debug = __commonJS({
2412
2412
  var require_follow_redirects = __commonJS({
2413
2413
  "../../node_modules/follow-redirects/index.js"(exports, module) {
2414
2414
  var url = __require("url");
2415
- var URL2 = url.URL;
2416
- var http = __require("http");
2417
- var https = __require("https");
2415
+ var URL3 = url.URL;
2416
+ var http2 = __require("http");
2417
+ var https2 = __require("https");
2418
2418
  var Writable = __require("stream").Writable;
2419
2419
  var assert = __require("assert");
2420
2420
  var debug = require_debug();
@@ -2428,7 +2428,7 @@ var require_follow_redirects = __commonJS({
2428
2428
  })();
2429
2429
  var useNativeURL = false;
2430
2430
  try {
2431
- assert(new URL2(""));
2431
+ assert(new URL3(""));
2432
2432
  } catch (error) {
2433
2433
  useNativeURL = error.code === "ERR_INVALID_URL";
2434
2434
  }
@@ -2808,7 +2808,7 @@ var require_follow_redirects = __commonJS({
2808
2808
  function parseUrl(input) {
2809
2809
  var parsed;
2810
2810
  if (useNativeURL) {
2811
- parsed = new URL2(input);
2811
+ parsed = new URL3(input);
2812
2812
  } else {
2813
2813
  parsed = validateUrl(url.parse(input));
2814
2814
  if (!isString(parsed.protocol)) {
@@ -2818,7 +2818,7 @@ var require_follow_redirects = __commonJS({
2818
2818
  return parsed;
2819
2819
  }
2820
2820
  function resolveUrl2(relative2, base) {
2821
- return useNativeURL ? new URL2(relative2, base) : parseUrl(url.resolve(base, relative2));
2821
+ return useNativeURL ? new URL3(relative2, base) : parseUrl(url.resolve(base, relative2));
2822
2822
  }
2823
2823
  function validateUrl(input) {
2824
2824
  if (/^\[/.test(input.hostname) && !/^\[[:0-9a-f]+\]$/i.test(input.hostname)) {
@@ -2897,9 +2897,9 @@ var require_follow_redirects = __commonJS({
2897
2897
  return typeof value === "object" && "length" in value;
2898
2898
  }
2899
2899
  function isURL(value) {
2900
- return URL2 && value instanceof URL2;
2900
+ return URL3 && value instanceof URL3;
2901
2901
  }
2902
- module.exports = wrap({ http, https });
2902
+ module.exports = wrap({ http: http2, https: https2 });
2903
2903
  module.exports.wrap = wrap;
2904
2904
  }
2905
2905
  });
@@ -3002,8 +3002,8 @@ var require_http = __commonJS({
3002
3002
  var settle = require_settle();
3003
3003
  var buildFullPath = require_buildFullPath();
3004
3004
  var buildURL = require_buildURL();
3005
- var http = __require("http");
3006
- var https = __require("https");
3005
+ var http2 = __require("http");
3006
+ var https2 = __require("https");
3007
3007
  var httpFollow = require_follow_redirects().http;
3008
3008
  var httpsFollow = require_follow_redirects().https;
3009
3009
  var url = __require("url");
@@ -3142,7 +3142,7 @@ var require_http = __commonJS({
3142
3142
  if (config.transport) {
3143
3143
  transport = config.transport;
3144
3144
  } else if (config.maxRedirects === 0) {
3145
- transport = isHttpsProxy ? https : http;
3145
+ transport = isHttpsProxy ? https2 : http2;
3146
3146
  } else {
3147
3147
  if (config.maxRedirects) {
3148
3148
  options.maxRedirects = config.maxRedirects;
@@ -3287,8 +3287,8 @@ var require_helpers = __commonJS({
3287
3287
  };
3288
3288
  Object.defineProperty(exports, "__esModule", { value: true });
3289
3289
  exports.req = exports.json = exports.toBuffer = void 0;
3290
- var http = __importStar2(__require("http"));
3291
- var https = __importStar2(__require("https"));
3290
+ var http2 = __importStar2(__require("http"));
3291
+ var https2 = __importStar2(__require("https"));
3292
3292
  async function toBuffer(stream) {
3293
3293
  let length = 0;
3294
3294
  const chunks = [];
@@ -3313,7 +3313,7 @@ var require_helpers = __commonJS({
3313
3313
  exports.json = json;
3314
3314
  function req(url, opts = {}) {
3315
3315
  const href = typeof url === "string" ? url : url.href;
3316
- const req2 = (href.startsWith("https:") ? https : http).request(url, opts);
3316
+ const req2 = (href.startsWith("https:") ? https2 : http2).request(url, opts);
3317
3317
  const promise = new Promise((resolve3, reject) => {
3318
3318
  req2.once("response", resolve3).once("error", reject).end();
3319
3319
  });
@@ -3361,11 +3361,11 @@ var require_dist = __commonJS({
3361
3361
  Object.defineProperty(exports, "__esModule", { value: true });
3362
3362
  exports.Agent = void 0;
3363
3363
  var net = __importStar2(__require("net"));
3364
- var http = __importStar2(__require("http"));
3364
+ var http2 = __importStar2(__require("http"));
3365
3365
  var https_1 = __require("https");
3366
3366
  __exportStar2(require_helpers(), exports);
3367
3367
  var INTERNAL = /* @__PURE__ */ Symbol("AgentBaseInternalState");
3368
- var Agent = class extends http.Agent {
3368
+ var Agent = class extends http2.Agent {
3369
3369
  constructor(opts) {
3370
3370
  super(opts);
3371
3371
  this[INTERNAL] = {};
@@ -3437,7 +3437,7 @@ var require_dist = __commonJS({
3437
3437
  const fakeSocket = this.incrementSockets(name);
3438
3438
  Promise.resolve().then(() => this.connect(req, connectOpts)).then((socket) => {
3439
3439
  this.decrementSockets(name, fakeSocket);
3440
- if (socket instanceof http.Agent) {
3440
+ if (socket instanceof http2.Agent) {
3441
3441
  try {
3442
3442
  return socket.addRequest(req, connectOpts);
3443
3443
  } catch (err) {
@@ -7401,7 +7401,7 @@ import {
7401
7401
  import { createHash } from "crypto";
7402
7402
  import path from "path";
7403
7403
  import { spawn, execFileSync } from "child_process";
7404
- var INSTALL_TIMEOUT_MS = 12e4;
7404
+ var INSTALL_TIMEOUT_MS = 18e4;
7405
7405
  var HEARTBEAT_INTERVAL_MS = 5e3;
7406
7406
  function reportRegistry(workDir, onProgress) {
7407
7407
  try {
@@ -9696,14 +9696,12 @@ function toCanonicalModelId(modelId) {
9696
9696
  const slashIndex = modelId.indexOf("/");
9697
9697
  return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
9698
9698
  }
9699
- function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
9700
- const canonicalModel = toCanonicalModelId(model);
9699
+ function isValidCost(cost) {
9700
+ return typeof cost === "number" && Number.isFinite(cost);
9701
+ }
9702
+ function groupEventsIntoTurns(timestampedEvents) {
9701
9703
  const turns = [];
9702
- let current = {
9703
- textParts: [],
9704
- reasoningParts: [],
9705
- toolCalls: []
9706
- };
9704
+ let current = { textParts: [], reasoningParts: [], toolCalls: [] };
9707
9705
  for (const { event: evt, receivedAt } of timestampedEvents) {
9708
9706
  switch (evt.type) {
9709
9707
  case "text":
@@ -9725,160 +9723,197 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9725
9723
  current.stepFinish = sf.part;
9726
9724
  current.receivedAt = receivedAt;
9727
9725
  turns.push(current);
9728
- current = {
9729
- textParts: [],
9730
- reasoningParts: [],
9731
- toolCalls: []
9732
- };
9726
+ current = { textParts: [], reasoningParts: [], toolCalls: [] };
9733
9727
  break;
9734
9728
  }
9735
9729
  }
9736
9730
  }
9737
- if (current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0) {
9731
+ const hasTrailingContent = current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0;
9732
+ if (hasTrailingContent) {
9738
9733
  if (timestampedEvents.length > 0) {
9739
9734
  current.receivedAt = timestampedEvents[timestampedEvents.length - 1].receivedAt;
9740
9735
  }
9741
9736
  turns.push(current);
9742
9737
  }
9743
- const executionStartMs = executionStartTime.getTime();
9744
- const allSteps = turns.flatMap((turn, turnIndex) => {
9745
- const sf = turn.stepFinish;
9746
- const stepInputTokens = sf?.tokens.input ?? 0;
9747
- const stepOutputTokens = sf?.tokens.output ?? 0;
9748
- const stepCost = sf?.cost ?? 0;
9749
- const finishReason = sf?.reason ?? "unknown";
9750
- const stepModel = toCanonicalModelId(sf?.modelID || model);
9751
- const stepProvider = sf?.providerID || provider;
9752
- const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
9753
- const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
9754
- const durationMs = Math.max(0, turnEndMs - prevEndMs);
9755
- const startedAt = new Date(prevEndMs).toISOString();
9756
- const text = turn.textParts.join("");
9757
- const thinking = turn.reasoningParts.join("");
9758
- const toolCallCount = turn.toolCalls.length;
9759
- const hasThinking = !!thinking;
9760
- const hasText = !!text;
9761
- const isSuccess = finishReason !== "error";
9762
- const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
9763
- const subSteps = [];
9764
- const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
9765
- const toolSubSteps = toolCallCount;
9766
- const textSubSteps = hasText && toolCallCount > 0 ? 1 : 0;
9767
- const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
9768
- if (hasThinking && (hasText || toolCallCount > 0)) {
9738
+ return turns;
9739
+ }
9740
+ function resolveTurnCosts(turns, gatewayCosts) {
9741
+ const turnCosts = turns.map((turn, i) => {
9742
+ if (!turn.stepFinish) return 0;
9743
+ const capturedCost = gatewayCosts?.[i];
9744
+ return isValidCost(capturedCost) ? capturedCost : turn.stepFinish.cost;
9745
+ });
9746
+ if (!gatewayCosts || gatewayCosts.length === 0) return turnCosts;
9747
+ const requestTurnIndexes = turns.flatMap(
9748
+ (turn, i) => turn.stepFinish ? [i] : []
9749
+ );
9750
+ const missingCount = requestTurnIndexes.filter(
9751
+ (i) => !isValidCost(gatewayCosts[i])
9752
+ ).length;
9753
+ if (missingCount > 0) {
9754
+ console.warn(
9755
+ `[opencode] gateway cost missing for ${missingCount}/${requestTurnIndexes.length} turn(s); using OpenCode-reported cost for those`
9756
+ );
9757
+ }
9758
+ let extraCallsCost = 0;
9759
+ for (let i = requestTurnIndexes.length; i < gatewayCosts.length; i++) {
9760
+ const capturedCost = gatewayCosts[i];
9761
+ if (isValidCost(capturedCost)) extraCallsCost += capturedCost;
9762
+ }
9763
+ if (extraCallsCost > 0 && requestTurnIndexes.length > 0) {
9764
+ const lastTurnIndex = requestTurnIndexes[requestTurnIndexes.length - 1];
9765
+ turnCosts[lastTurnIndex] += extraCallsCost;
9766
+ console.warn(
9767
+ `[opencode] ${gatewayCosts.length} gateway call(s) for ${requestTurnIndexes.length} turn(s); folded $${extraCallsCost} of extra calls into the last turn`
9768
+ );
9769
+ }
9770
+ return turnCosts;
9771
+ }
9772
+ function buildTurnSteps(turn, turnIndex, ctx) {
9773
+ const {
9774
+ turns,
9775
+ turnCosts,
9776
+ totalDurationMs,
9777
+ executionStartMs,
9778
+ model,
9779
+ provider
9780
+ } = ctx;
9781
+ const sf = turn.stepFinish;
9782
+ const stepInputTokens = sf?.tokens.input ?? 0;
9783
+ const stepOutputTokens = sf?.tokens.output ?? 0;
9784
+ const stepCost = turnCosts[turnIndex];
9785
+ const finishReason = sf?.reason ?? "unknown";
9786
+ const stepModel = toCanonicalModelId(sf?.modelID || model);
9787
+ const stepProvider = sf?.providerID || provider;
9788
+ const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
9789
+ const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
9790
+ const durationMs = Math.max(0, turnEndMs - prevEndMs);
9791
+ const startedAt = new Date(prevEndMs).toISOString();
9792
+ const text = turn.textParts.join("");
9793
+ const thinking = turn.reasoningParts.join("");
9794
+ const toolCallCount = turn.toolCalls.length;
9795
+ const hasThinking = !!thinking;
9796
+ const hasText = !!text;
9797
+ const isSuccess = finishReason !== "error";
9798
+ const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
9799
+ const subSteps = [];
9800
+ const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
9801
+ const toolSubSteps = toolCallCount;
9802
+ const textSubSteps = hasText && toolCallCount > 0 ? 1 : 0;
9803
+ const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
9804
+ if (hasThinking && (hasText || toolCallCount > 0)) {
9805
+ subSteps.push({
9806
+ id: randomUUID2(),
9807
+ stepNumber: 0,
9808
+ turnIndex,
9809
+ type: LLMStepType2.THINKING,
9810
+ model: stepModel,
9811
+ provider: stepProvider,
9812
+ startedAt,
9813
+ durationMs: Math.round(durationMs / totalSubSteps),
9814
+ tokenUsage: {
9815
+ prompt: Math.round(stepInputTokens / totalSubSteps),
9816
+ completion: Math.round(stepOutputTokens / totalSubSteps),
9817
+ total: Math.round((stepInputTokens + stepOutputTokens) / totalSubSteps)
9818
+ },
9819
+ costUsd: stepCost / totalSubSteps,
9820
+ outputPreview: thinking.slice(0, 200),
9821
+ success: isSuccess,
9822
+ error: errorMsg
9823
+ });
9824
+ }
9825
+ if (toolCallCount > 0) {
9826
+ for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
9827
+ const tc = turn.toolCalls[tcIdx];
9828
+ const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
9829
+ const toolBudgetSteps = toolSubSteps + textSubSteps;
9830
+ const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
9831
+ const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
9769
9832
  subSteps.push({
9770
9833
  id: randomUUID2(),
9771
9834
  stepNumber: 0,
9772
9835
  turnIndex,
9773
- type: LLMStepType2.THINKING,
9836
+ type: LLMStepType2.TOOL_USE,
9774
9837
  model: stepModel,
9775
9838
  provider: stepProvider,
9776
9839
  startedAt,
9777
- durationMs: Math.round(durationMs / totalSubSteps),
9840
+ durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
9778
9841
  tokenUsage: {
9779
- prompt: Math.round(stepInputTokens / totalSubSteps),
9780
- completion: Math.round(stepOutputTokens / totalSubSteps),
9842
+ prompt: Math.round(
9843
+ stepInputTokens * remainingFraction * toolFraction
9844
+ ),
9845
+ completion: Math.round(
9846
+ stepOutputTokens * remainingFraction * toolFraction
9847
+ ),
9781
9848
  total: Math.round(
9782
- (stepInputTokens + stepOutputTokens) / totalSubSteps
9849
+ (stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
9783
9850
  )
9784
9851
  },
9785
- costUsd: stepCost / totalSubSteps,
9786
- outputPreview: thinking.slice(0, 200),
9787
- success: isSuccess,
9788
- error: errorMsg
9789
- });
9790
- }
9791
- if (toolCallCount > 0) {
9792
- for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
9793
- const tc = turn.toolCalls[tcIdx];
9794
- const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
9795
- const toolBudgetSteps = toolSubSteps + textSubSteps;
9796
- const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
9797
- const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
9798
- subSteps.push({
9799
- id: randomUUID2(),
9800
- stepNumber: 0,
9801
- turnIndex,
9802
- type: LLMStepType2.TOOL_USE,
9803
- model: stepModel,
9804
- provider: stepProvider,
9805
- startedAt,
9806
- durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
9807
- tokenUsage: {
9808
- prompt: Math.round(
9809
- stepInputTokens * remainingFraction * toolFraction
9810
- ),
9811
- completion: Math.round(
9812
- stepOutputTokens * remainingFraction * toolFraction
9813
- ),
9814
- total: Math.round(
9815
- (stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
9816
- )
9817
- },
9818
- costUsd: stepCost * remainingFraction * toolFraction,
9819
- toolName: tc.toolName,
9820
- toolArguments: JSON.stringify(tc.args),
9821
- outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
9822
- success: isSuccess,
9823
- error: errorMsg
9824
- });
9825
- }
9826
- }
9827
- if (hasText && toolCallCount > 0) {
9828
- subSteps.push({
9829
- id: randomUUID2(),
9830
- stepNumber: 0,
9831
- turnIndex,
9832
- type: LLMStepType2.COMPLETION,
9833
- model: stepModel,
9834
- provider: stepProvider,
9835
- startedAt,
9836
- durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
9837
- tokenUsage: {
9838
- prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
9839
- completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
9840
- total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
9841
- },
9842
- costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
9843
- outputPreview: text.slice(0, 200),
9844
- success: isSuccess,
9845
- error: errorMsg
9846
- });
9847
- }
9848
- if (subSteps.length === 0) {
9849
- const stepType = hasThinking && !hasText ? LLMStepType2.THINKING : LLMStepType2.COMPLETION;
9850
- subSteps.push({
9851
- id: randomUUID2(),
9852
- stepNumber: 0,
9853
- turnIndex,
9854
- type: stepType,
9855
- model: stepModel,
9856
- provider: stepProvider,
9857
- startedAt,
9858
- durationMs,
9859
- tokenUsage: {
9860
- prompt: stepInputTokens,
9861
- completion: stepOutputTokens,
9862
- total: stepInputTokens + stepOutputTokens
9863
- },
9864
- costUsd: stepCost,
9865
- outputPreview: (text || thinking)?.slice(0, 200),
9852
+ costUsd: stepCost * remainingFraction * toolFraction,
9853
+ toolName: tc.toolName,
9854
+ toolArguments: JSON.stringify(tc.args),
9855
+ outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
9866
9856
  success: isSuccess,
9867
9857
  error: errorMsg
9868
9858
  });
9869
9859
  }
9870
- return subSteps;
9871
- }).map((s, i) => ({ ...s, stepNumber: i + 1 }));
9860
+ }
9861
+ if (hasText && toolCallCount > 0) {
9862
+ subSteps.push({
9863
+ id: randomUUID2(),
9864
+ stepNumber: 0,
9865
+ turnIndex,
9866
+ type: LLMStepType2.COMPLETION,
9867
+ model: stepModel,
9868
+ provider: stepProvider,
9869
+ startedAt,
9870
+ durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
9871
+ tokenUsage: {
9872
+ prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
9873
+ completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
9874
+ total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
9875
+ },
9876
+ costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
9877
+ outputPreview: text.slice(0, 200),
9878
+ success: isSuccess,
9879
+ error: errorMsg
9880
+ });
9881
+ }
9882
+ if (subSteps.length === 0) {
9883
+ const stepType = hasThinking && !hasText ? LLMStepType2.THINKING : LLMStepType2.COMPLETION;
9884
+ subSteps.push({
9885
+ id: randomUUID2(),
9886
+ stepNumber: 0,
9887
+ turnIndex,
9888
+ type: stepType,
9889
+ model: stepModel,
9890
+ provider: stepProvider,
9891
+ startedAt,
9892
+ durationMs,
9893
+ tokenUsage: {
9894
+ prompt: stepInputTokens,
9895
+ completion: stepOutputTokens,
9896
+ total: stepInputTokens + stepOutputTokens
9897
+ },
9898
+ costUsd: stepCost,
9899
+ outputPreview: (text || thinking)?.slice(0, 200),
9900
+ success: isSuccess,
9901
+ error: errorMsg
9902
+ });
9903
+ }
9904
+ return subSteps;
9905
+ }
9906
+ function buildSummary(allSteps, turns, turnCosts, totalDurationMs, canonicalModel) {
9872
9907
  let totalPrompt = 0;
9873
9908
  let totalCompletion = 0;
9874
9909
  let totalCost = 0;
9875
- for (const turn of turns) {
9910
+ turns.forEach((turn, turnIndex) => {
9876
9911
  if (turn.stepFinish) {
9877
9912
  totalPrompt += turn.stepFinish.tokens.input;
9878
9913
  totalCompletion += turn.stepFinish.tokens.output;
9879
- totalCost += turn.stepFinish.cost;
9914
+ totalCost += turnCosts[turnIndex];
9880
9915
  }
9881
- }
9916
+ });
9882
9917
  const totalTokens = {
9883
9918
  prompt: totalPrompt,
9884
9919
  completion: totalCompletion,
@@ -9899,7 +9934,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9899
9934
  stepTypeBreakdown[step.type] = entry;
9900
9935
  }
9901
9936
  const modelUsed = allSteps[0]?.model || canonicalModel;
9902
- const summary = {
9937
+ return {
9903
9938
  totalSteps: allSteps.length,
9904
9939
  totalTurns: turns.length,
9905
9940
  totalDurationMs,
@@ -9916,11 +9951,97 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9916
9951
  modelsUsed: [modelUsed],
9917
9952
  stepTypeBreakdown
9918
9953
  };
9919
- return {
9920
- id: randomUUID2(),
9921
- steps: allSteps,
9922
- summary
9954
+ }
9955
+ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime, gatewayCosts) {
9956
+ const canonicalModel = toCanonicalModelId(model);
9957
+ const turns = groupEventsIntoTurns(timestampedEvents);
9958
+ const turnCosts = resolveTurnCosts(turns, gatewayCosts);
9959
+ const ctx = {
9960
+ turns,
9961
+ turnCosts,
9962
+ totalDurationMs,
9963
+ executionStartMs: executionStartTime.getTime(),
9964
+ model,
9965
+ provider
9923
9966
  };
9967
+ const allSteps = turns.flatMap((turn, turnIndex) => buildTurnSteps(turn, turnIndex, ctx)).map((step, i) => ({ ...step, stepNumber: i + 1 }));
9968
+ const summary = buildSummary(
9969
+ allSteps,
9970
+ turns,
9971
+ turnCosts,
9972
+ totalDurationMs,
9973
+ canonicalModel
9974
+ );
9975
+ return { id: randomUUID2(), steps: allSteps, summary };
9976
+ }
9977
+
9978
+ // src/run-scenario/agents/opencode/gateway-cost-interceptor.ts
9979
+ import http from "node:http";
9980
+ import https from "node:https";
9981
+ import { URL as URL2 } from "node:url";
9982
+ var TAIL_BYTES = 64 * 1024;
9983
+ var COST_RE = /"total_cost_usd"\s*:\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)/g;
9984
+ function extractLastCost(text) {
9985
+ let match;
9986
+ let last = null;
9987
+ COST_RE.lastIndex = 0;
9988
+ while ((match = COST_RE.exec(text)) !== null) {
9989
+ const value = Number(match[1]);
9990
+ if (Number.isFinite(value)) last = value;
9991
+ }
9992
+ return last;
9993
+ }
9994
+ function startGatewayCostInterceptor(realGatewayUrl) {
9995
+ const base = realGatewayUrl.replace(/\/$/, "");
9996
+ const captured = [];
9997
+ const server = http.createServer((clientReq, clientRes) => {
9998
+ const slot = captured.length;
9999
+ captured.push(null);
10000
+ const target = new URL2(base + (clientReq.url ?? ""));
10001
+ const transport = target.protocol === "https:" ? https : http;
10002
+ const headers = { ...clientReq.headers };
10003
+ delete headers.host;
10004
+ headers["accept-encoding"] = "identity";
10005
+ const proxyReq = transport.request(
10006
+ {
10007
+ protocol: target.protocol,
10008
+ hostname: target.hostname,
10009
+ port: target.port,
10010
+ path: target.pathname + target.search,
10011
+ method: clientReq.method,
10012
+ headers
10013
+ },
10014
+ (proxyRes) => {
10015
+ clientRes.writeHead(proxyRes.statusCode ?? 502, proxyRes.headers);
10016
+ let tail = "";
10017
+ proxyRes.on("data", (chunk) => {
10018
+ clientRes.write(chunk);
10019
+ tail = (tail + chunk.toString("utf8")).slice(-TAIL_BYTES);
10020
+ });
10021
+ proxyRes.on("end", () => {
10022
+ clientRes.end();
10023
+ captured[slot] = extractLastCost(tail);
10024
+ });
10025
+ proxyRes.on("error", () => clientRes.destroy());
10026
+ }
10027
+ );
10028
+ proxyReq.on("error", () => {
10029
+ if (!clientRes.headersSent) clientRes.writeHead(502);
10030
+ clientRes.end();
10031
+ });
10032
+ clientReq.pipe(proxyReq);
10033
+ });
10034
+ return new Promise((resolve3) => {
10035
+ server.listen(0, "127.0.0.1", () => {
10036
+ const addr = server.address();
10037
+ const port = typeof addr === "object" && addr ? addr.port : 0;
10038
+ resolve3({
10039
+ url: `http://127.0.0.1:${port}`,
10040
+ getCapturedCosts: () => captured.slice(),
10041
+ close: () => new Promise((r) => server.close(() => r()))
10042
+ });
10043
+ });
10044
+ });
9924
10045
  }
9925
10046
 
9926
10047
  // src/run-scenario/agents/opencode/build-conversation.ts
@@ -10196,6 +10317,7 @@ function spawnOpenCodeProcess(opts) {
10196
10317
  return new Promise((resolve3) => {
10197
10318
  let resolved = false;
10198
10319
  let stderr = "";
10320
+ let rawStdout = "";
10199
10321
  let lineBuffer = "";
10200
10322
  let lastOutputTime = Date.now();
10201
10323
  let traceStepNumber = initialStepNumber;
@@ -10317,6 +10439,7 @@ function spawnOpenCodeProcess(opts) {
10317
10439
  child.stdout?.on("data", (data) => {
10318
10440
  const text = data.toString();
10319
10441
  lastOutputTime = Date.now();
10442
+ rawStdout += text;
10320
10443
  lineBuffer += text;
10321
10444
  const lines = lineBuffer.split("\n");
10322
10445
  lineBuffer = lines.pop() || "";
@@ -10360,7 +10483,7 @@ function spawnOpenCodeProcess(opts) {
10360
10483
  stderr += text;
10361
10484
  lastOutputTime = Date.now();
10362
10485
  });
10363
- child.on("close", (code) => {
10486
+ child.on("close", (code, signal) => {
10364
10487
  if (lineBuffer.trim()) {
10365
10488
  const evt = tryParseJson(lineBuffer);
10366
10489
  if (evt && evt.type) {
@@ -10368,8 +10491,18 @@ function spawnOpenCodeProcess(opts) {
10368
10491
  }
10369
10492
  }
10370
10493
  console.log(
10371
- `[executeWithOpenCode] Process exited with code ${code}, ${events.length} events collected`
10494
+ `[executeWithOpenCode] Process exited with code ${code}, signal ${signal}, ${events.length} events collected`
10372
10495
  );
10496
+ if (events.length === 0) {
10497
+ console.error(
10498
+ `[executeWithOpenCode] No events. exitCode=${code} signal=${signal}
10499
+ --- raw stdout (first 4000) ---
10500
+ ${rawStdout.slice(0, 4e3)}
10501
+ --- raw stderr (first 4000) ---
10502
+ ${stderr.slice(0, 4e3)}
10503
+ --- end raw output ---`
10504
+ );
10505
+ }
10373
10506
  if (code === 0) {
10374
10507
  finalize(true, false);
10375
10508
  } else {
@@ -10392,7 +10525,7 @@ Stderr: ${stderr.slice(0, 1e3)}`
10392
10525
  });
10393
10526
  });
10394
10527
  }
10395
- async function executeWithOpenCode(skills, scenario, options) {
10528
+ async function executeWithOpenCodeInner(skills, scenario, options, interceptor) {
10396
10529
  const skillNames = skills.map((s) => s.name).join(", ");
10397
10530
  console.log("[executeWithOpenCode] Starting execution", {
10398
10531
  skillCount: skills.length,
@@ -10411,7 +10544,9 @@ async function executeWithOpenCode(skills, scenario, options) {
10411
10544
  temperature: options.temperature,
10412
10545
  topP: options.topP,
10413
10546
  maxTurns,
10414
- aiGatewayUrl: options.aiGatewayUrl,
10547
+ // Point OpenCode at the local interceptor (which forwards to the real
10548
+ // gateway and captures the true cost); fall back to the gateway directly.
10549
+ aiGatewayUrl: interceptor?.url ?? options.aiGatewayUrl,
10415
10550
  aiGatewayHeaders: options.aiGatewayHeaders,
10416
10551
  mcps: options.mcps,
10417
10552
  rules: options.rules,
@@ -10564,13 +10699,11 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10564
10699
  }
10565
10700
  let inputTokens = 0;
10566
10701
  let outputTokens = 0;
10567
- let costUsd = 0;
10568
10702
  for (const { event: evt } of accumulatedEvents) {
10569
10703
  if (evt.type === "step_finish") {
10570
10704
  const sf = evt;
10571
10705
  inputTokens += sf.part.tokens.input;
10572
10706
  outputTokens += sf.part.tokens.output;
10573
- costUsd += sf.part.cost;
10574
10707
  }
10575
10708
  }
10576
10709
  if (traceContext) {
@@ -10596,7 +10729,8 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10596
10729
  totalDurationMs,
10597
10730
  modelStr,
10598
10731
  providerID,
10599
- startTime
10732
+ startTime,
10733
+ interceptor?.getCapturedCosts()
10600
10734
  );
10601
10735
  const conversation = buildConversation2(accumulatedEvents);
10602
10736
  return {
@@ -10608,12 +10742,26 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10608
10742
  outputTokens,
10609
10743
  totalTokens: inputTokens + outputTokens
10610
10744
  },
10611
- costUsd
10745
+ // Single source of truth: gateway-derived cost aggregated in the trace.
10746
+ costUsd: llmTrace.summary.totalCostUsd
10612
10747
  },
10613
10748
  llmTrace,
10614
10749
  conversation
10615
10750
  };
10616
10751
  }
10752
+ async function executeWithOpenCode(skills, scenario, options) {
10753
+ const interceptor = options.aiGatewayUrl ? await startGatewayCostInterceptor(options.aiGatewayUrl) : void 0;
10754
+ try {
10755
+ return await executeWithOpenCodeInner(
10756
+ skills,
10757
+ scenario,
10758
+ options,
10759
+ interceptor
10760
+ );
10761
+ } finally {
10762
+ await interceptor?.close();
10763
+ }
10764
+ }
10617
10765
 
10618
10766
  // src/run-scenario/agents/opencode/opencode-adapter.ts
10619
10767
  var OpenCodeAdapter = class {