@wix/evalforge-evaluator 0.202.0 → 0.203.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -2407,9 +2407,9 @@ var require_debug = __commonJS({
2407
2407
  var require_follow_redirects = __commonJS({
2408
2408
  "../../node_modules/follow-redirects/index.js"(exports2, module2) {
2409
2409
  var url = require("url");
2410
- var URL2 = url.URL;
2411
- var http = require("http");
2412
- var https = require("https");
2410
+ var URL3 = url.URL;
2411
+ var http2 = require("http");
2412
+ var https2 = require("https");
2413
2413
  var Writable = require("stream").Writable;
2414
2414
  var assert = require("assert");
2415
2415
  var debug = require_debug();
@@ -2423,7 +2423,7 @@ var require_follow_redirects = __commonJS({
2423
2423
  })();
2424
2424
  var useNativeURL = false;
2425
2425
  try {
2426
- assert(new URL2(""));
2426
+ assert(new URL3(""));
2427
2427
  } catch (error) {
2428
2428
  useNativeURL = error.code === "ERR_INVALID_URL";
2429
2429
  }
@@ -2803,7 +2803,7 @@ var require_follow_redirects = __commonJS({
2803
2803
  function parseUrl(input) {
2804
2804
  var parsed;
2805
2805
  if (useNativeURL) {
2806
- parsed = new URL2(input);
2806
+ parsed = new URL3(input);
2807
2807
  } else {
2808
2808
  parsed = validateUrl(url.parse(input));
2809
2809
  if (!isString(parsed.protocol)) {
@@ -2813,7 +2813,7 @@ var require_follow_redirects = __commonJS({
2813
2813
  return parsed;
2814
2814
  }
2815
2815
  function resolveUrl2(relative2, base) {
2816
- return useNativeURL ? new URL2(relative2, base) : parseUrl(url.resolve(base, relative2));
2816
+ return useNativeURL ? new URL3(relative2, base) : parseUrl(url.resolve(base, relative2));
2817
2817
  }
2818
2818
  function validateUrl(input) {
2819
2819
  if (/^\[/.test(input.hostname) && !/^\[[:0-9a-f]+\]$/i.test(input.hostname)) {
@@ -2892,9 +2892,9 @@ var require_follow_redirects = __commonJS({
2892
2892
  return typeof value === "object" && "length" in value;
2893
2893
  }
2894
2894
  function isURL(value) {
2895
- return URL2 && value instanceof URL2;
2895
+ return URL3 && value instanceof URL3;
2896
2896
  }
2897
- module2.exports = wrap({ http, https });
2897
+ module2.exports = wrap({ http: http2, https: https2 });
2898
2898
  module2.exports.wrap = wrap;
2899
2899
  }
2900
2900
  });
@@ -2997,8 +2997,8 @@ var require_http = __commonJS({
2997
2997
  var settle = require_settle();
2998
2998
  var buildFullPath = require_buildFullPath();
2999
2999
  var buildURL = require_buildURL();
3000
- var http = require("http");
3001
- var https = require("https");
3000
+ var http2 = require("http");
3001
+ var https2 = require("https");
3002
3002
  var httpFollow = require_follow_redirects().http;
3003
3003
  var httpsFollow = require_follow_redirects().https;
3004
3004
  var url = require("url");
@@ -3137,7 +3137,7 @@ var require_http = __commonJS({
3137
3137
  if (config.transport) {
3138
3138
  transport = config.transport;
3139
3139
  } else if (config.maxRedirects === 0) {
3140
- transport = isHttpsProxy ? https : http;
3140
+ transport = isHttpsProxy ? https2 : http2;
3141
3141
  } else {
3142
3142
  if (config.maxRedirects) {
3143
3143
  options.maxRedirects = config.maxRedirects;
@@ -3282,8 +3282,8 @@ var require_helpers = __commonJS({
3282
3282
  };
3283
3283
  Object.defineProperty(exports2, "__esModule", { value: true });
3284
3284
  exports2.req = exports2.json = exports2.toBuffer = void 0;
3285
- var http = __importStar2(require("http"));
3286
- var https = __importStar2(require("https"));
3285
+ var http2 = __importStar2(require("http"));
3286
+ var https2 = __importStar2(require("https"));
3287
3287
  async function toBuffer(stream) {
3288
3288
  let length = 0;
3289
3289
  const chunks = [];
@@ -3308,7 +3308,7 @@ var require_helpers = __commonJS({
3308
3308
  exports2.json = json;
3309
3309
  function req(url, opts = {}) {
3310
3310
  const href = typeof url === "string" ? url : url.href;
3311
- const req2 = (href.startsWith("https:") ? https : http).request(url, opts);
3311
+ const req2 = (href.startsWith("https:") ? https2 : http2).request(url, opts);
3312
3312
  const promise = new Promise((resolve3, reject) => {
3313
3313
  req2.once("response", resolve3).once("error", reject).end();
3314
3314
  });
@@ -3356,11 +3356,11 @@ var require_dist = __commonJS({
3356
3356
  Object.defineProperty(exports2, "__esModule", { value: true });
3357
3357
  exports2.Agent = void 0;
3358
3358
  var net = __importStar2(require("net"));
3359
- var http = __importStar2(require("http"));
3359
+ var http2 = __importStar2(require("http"));
3360
3360
  var https_1 = require("https");
3361
3361
  __exportStar2(require_helpers(), exports2);
3362
3362
  var INTERNAL = /* @__PURE__ */ Symbol("AgentBaseInternalState");
3363
- var Agent = class extends http.Agent {
3363
+ var Agent = class extends http2.Agent {
3364
3364
  constructor(opts) {
3365
3365
  super(opts);
3366
3366
  this[INTERNAL] = {};
@@ -3432,7 +3432,7 @@ var require_dist = __commonJS({
3432
3432
  const fakeSocket = this.incrementSockets(name);
3433
3433
  Promise.resolve().then(() => this.connect(req, connectOpts)).then((socket) => {
3434
3434
  this.decrementSockets(name, fakeSocket);
3435
- if (socket instanceof http.Agent) {
3435
+ if (socket instanceof http2.Agent) {
3436
3436
  try {
3437
3437
  return socket.addRequest(req, connectOpts);
3438
3438
  } catch (err) {
@@ -9650,14 +9650,12 @@ function toCanonicalModelId(modelId) {
9650
9650
  const slashIndex = modelId.indexOf("/");
9651
9651
  return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
9652
9652
  }
9653
- function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
9654
- const canonicalModel = toCanonicalModelId(model);
9653
+ function isValidCost(cost) {
9654
+ return typeof cost === "number" && Number.isFinite(cost);
9655
+ }
9656
+ function groupEventsIntoTurns(timestampedEvents) {
9655
9657
  const turns = [];
9656
- let current = {
9657
- textParts: [],
9658
- reasoningParts: [],
9659
- toolCalls: []
9660
- };
9658
+ let current = { textParts: [], reasoningParts: [], toolCalls: [] };
9661
9659
  for (const { event: evt, receivedAt } of timestampedEvents) {
9662
9660
  switch (evt.type) {
9663
9661
  case "text":
@@ -9679,160 +9677,197 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9679
9677
  current.stepFinish = sf.part;
9680
9678
  current.receivedAt = receivedAt;
9681
9679
  turns.push(current);
9682
- current = {
9683
- textParts: [],
9684
- reasoningParts: [],
9685
- toolCalls: []
9686
- };
9680
+ current = { textParts: [], reasoningParts: [], toolCalls: [] };
9687
9681
  break;
9688
9682
  }
9689
9683
  }
9690
9684
  }
9691
- if (current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0) {
9685
+ const hasTrailingContent = current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0;
9686
+ if (hasTrailingContent) {
9692
9687
  if (timestampedEvents.length > 0) {
9693
9688
  current.receivedAt = timestampedEvents[timestampedEvents.length - 1].receivedAt;
9694
9689
  }
9695
9690
  turns.push(current);
9696
9691
  }
9697
- const executionStartMs = executionStartTime.getTime();
9698
- const allSteps = turns.flatMap((turn, turnIndex) => {
9699
- const sf = turn.stepFinish;
9700
- const stepInputTokens = sf?.tokens.input ?? 0;
9701
- const stepOutputTokens = sf?.tokens.output ?? 0;
9702
- const stepCost = sf?.cost ?? 0;
9703
- const finishReason = sf?.reason ?? "unknown";
9704
- const stepModel = toCanonicalModelId(sf?.modelID || model);
9705
- const stepProvider = sf?.providerID || provider;
9706
- const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
9707
- const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
9708
- const durationMs = Math.max(0, turnEndMs - prevEndMs);
9709
- const startedAt = new Date(prevEndMs).toISOString();
9710
- const text = turn.textParts.join("");
9711
- const thinking = turn.reasoningParts.join("");
9712
- const toolCallCount = turn.toolCalls.length;
9713
- const hasThinking = !!thinking;
9714
- const hasText = !!text;
9715
- const isSuccess = finishReason !== "error";
9716
- const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
9717
- const subSteps = [];
9718
- const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
9719
- const toolSubSteps = toolCallCount;
9720
- const textSubSteps = hasText && toolCallCount > 0 ? 1 : 0;
9721
- const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
9722
- if (hasThinking && (hasText || toolCallCount > 0)) {
9692
+ return turns;
9693
+ }
9694
+ function resolveTurnCosts(turns, gatewayCosts) {
9695
+ const turnCosts = turns.map((turn, i) => {
9696
+ if (!turn.stepFinish) return 0;
9697
+ const capturedCost = gatewayCosts?.[i];
9698
+ return isValidCost(capturedCost) ? capturedCost : turn.stepFinish.cost;
9699
+ });
9700
+ if (!gatewayCosts || gatewayCosts.length === 0) return turnCosts;
9701
+ const requestTurnIndexes = turns.flatMap(
9702
+ (turn, i) => turn.stepFinish ? [i] : []
9703
+ );
9704
+ const missingCount = requestTurnIndexes.filter(
9705
+ (i) => !isValidCost(gatewayCosts[i])
9706
+ ).length;
9707
+ if (missingCount > 0) {
9708
+ console.warn(
9709
+ `[opencode] gateway cost missing for ${missingCount}/${requestTurnIndexes.length} turn(s); using OpenCode-reported cost for those`
9710
+ );
9711
+ }
9712
+ let extraCallsCost = 0;
9713
+ for (let i = requestTurnIndexes.length; i < gatewayCosts.length; i++) {
9714
+ const capturedCost = gatewayCosts[i];
9715
+ if (isValidCost(capturedCost)) extraCallsCost += capturedCost;
9716
+ }
9717
+ if (extraCallsCost > 0 && requestTurnIndexes.length > 0) {
9718
+ const lastTurnIndex = requestTurnIndexes[requestTurnIndexes.length - 1];
9719
+ turnCosts[lastTurnIndex] += extraCallsCost;
9720
+ console.warn(
9721
+ `[opencode] ${gatewayCosts.length} gateway call(s) for ${requestTurnIndexes.length} turn(s); folded $${extraCallsCost} of extra calls into the last turn`
9722
+ );
9723
+ }
9724
+ return turnCosts;
9725
+ }
9726
+ function buildTurnSteps(turn, turnIndex, ctx) {
9727
+ const {
9728
+ turns,
9729
+ turnCosts,
9730
+ totalDurationMs,
9731
+ executionStartMs,
9732
+ model,
9733
+ provider
9734
+ } = ctx;
9735
+ const sf = turn.stepFinish;
9736
+ const stepInputTokens = sf?.tokens.input ?? 0;
9737
+ const stepOutputTokens = sf?.tokens.output ?? 0;
9738
+ const stepCost = turnCosts[turnIndex];
9739
+ const finishReason = sf?.reason ?? "unknown";
9740
+ const stepModel = toCanonicalModelId(sf?.modelID || model);
9741
+ const stepProvider = sf?.providerID || provider;
9742
+ const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
9743
+ const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
9744
+ const durationMs = Math.max(0, turnEndMs - prevEndMs);
9745
+ const startedAt = new Date(prevEndMs).toISOString();
9746
+ const text = turn.textParts.join("");
9747
+ const thinking = turn.reasoningParts.join("");
9748
+ const toolCallCount = turn.toolCalls.length;
9749
+ const hasThinking = !!thinking;
9750
+ const hasText = !!text;
9751
+ const isSuccess = finishReason !== "error";
9752
+ const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
9753
+ const subSteps = [];
9754
+ const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
9755
+ const toolSubSteps = toolCallCount;
9756
+ const textSubSteps = hasText && toolCallCount > 0 ? 1 : 0;
9757
+ const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
9758
+ if (hasThinking && (hasText || toolCallCount > 0)) {
9759
+ subSteps.push({
9760
+ id: (0, import_crypto3.randomUUID)(),
9761
+ stepNumber: 0,
9762
+ turnIndex,
9763
+ type: import_evalforge_types7.LLMStepType.THINKING,
9764
+ model: stepModel,
9765
+ provider: stepProvider,
9766
+ startedAt,
9767
+ durationMs: Math.round(durationMs / totalSubSteps),
9768
+ tokenUsage: {
9769
+ prompt: Math.round(stepInputTokens / totalSubSteps),
9770
+ completion: Math.round(stepOutputTokens / totalSubSteps),
9771
+ total: Math.round((stepInputTokens + stepOutputTokens) / totalSubSteps)
9772
+ },
9773
+ costUsd: stepCost / totalSubSteps,
9774
+ outputPreview: thinking.slice(0, 200),
9775
+ success: isSuccess,
9776
+ error: errorMsg
9777
+ });
9778
+ }
9779
+ if (toolCallCount > 0) {
9780
+ for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
9781
+ const tc = turn.toolCalls[tcIdx];
9782
+ const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
9783
+ const toolBudgetSteps = toolSubSteps + textSubSteps;
9784
+ const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
9785
+ const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
9723
9786
  subSteps.push({
9724
9787
  id: (0, import_crypto3.randomUUID)(),
9725
9788
  stepNumber: 0,
9726
9789
  turnIndex,
9727
- type: import_evalforge_types7.LLMStepType.THINKING,
9790
+ type: import_evalforge_types7.LLMStepType.TOOL_USE,
9728
9791
  model: stepModel,
9729
9792
  provider: stepProvider,
9730
9793
  startedAt,
9731
- durationMs: Math.round(durationMs / totalSubSteps),
9794
+ durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
9732
9795
  tokenUsage: {
9733
- prompt: Math.round(stepInputTokens / totalSubSteps),
9734
- completion: Math.round(stepOutputTokens / totalSubSteps),
9796
+ prompt: Math.round(
9797
+ stepInputTokens * remainingFraction * toolFraction
9798
+ ),
9799
+ completion: Math.round(
9800
+ stepOutputTokens * remainingFraction * toolFraction
9801
+ ),
9735
9802
  total: Math.round(
9736
- (stepInputTokens + stepOutputTokens) / totalSubSteps
9803
+ (stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
9737
9804
  )
9738
9805
  },
9739
- costUsd: stepCost / totalSubSteps,
9740
- outputPreview: thinking.slice(0, 200),
9741
- success: isSuccess,
9742
- error: errorMsg
9743
- });
9744
- }
9745
- if (toolCallCount > 0) {
9746
- for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
9747
- const tc = turn.toolCalls[tcIdx];
9748
- const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
9749
- const toolBudgetSteps = toolSubSteps + textSubSteps;
9750
- const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
9751
- const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
9752
- subSteps.push({
9753
- id: (0, import_crypto3.randomUUID)(),
9754
- stepNumber: 0,
9755
- turnIndex,
9756
- type: import_evalforge_types7.LLMStepType.TOOL_USE,
9757
- model: stepModel,
9758
- provider: stepProvider,
9759
- startedAt,
9760
- durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
9761
- tokenUsage: {
9762
- prompt: Math.round(
9763
- stepInputTokens * remainingFraction * toolFraction
9764
- ),
9765
- completion: Math.round(
9766
- stepOutputTokens * remainingFraction * toolFraction
9767
- ),
9768
- total: Math.round(
9769
- (stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
9770
- )
9771
- },
9772
- costUsd: stepCost * remainingFraction * toolFraction,
9773
- toolName: tc.toolName,
9774
- toolArguments: JSON.stringify(tc.args),
9775
- outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
9776
- success: isSuccess,
9777
- error: errorMsg
9778
- });
9779
- }
9780
- }
9781
- if (hasText && toolCallCount > 0) {
9782
- subSteps.push({
9783
- id: (0, import_crypto3.randomUUID)(),
9784
- stepNumber: 0,
9785
- turnIndex,
9786
- type: import_evalforge_types7.LLMStepType.COMPLETION,
9787
- model: stepModel,
9788
- provider: stepProvider,
9789
- startedAt,
9790
- durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
9791
- tokenUsage: {
9792
- prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
9793
- completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
9794
- total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
9795
- },
9796
- costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
9797
- outputPreview: text.slice(0, 200),
9798
- success: isSuccess,
9799
- error: errorMsg
9800
- });
9801
- }
9802
- if (subSteps.length === 0) {
9803
- const stepType = hasThinking && !hasText ? import_evalforge_types7.LLMStepType.THINKING : import_evalforge_types7.LLMStepType.COMPLETION;
9804
- subSteps.push({
9805
- id: (0, import_crypto3.randomUUID)(),
9806
- stepNumber: 0,
9807
- turnIndex,
9808
- type: stepType,
9809
- model: stepModel,
9810
- provider: stepProvider,
9811
- startedAt,
9812
- durationMs,
9813
- tokenUsage: {
9814
- prompt: stepInputTokens,
9815
- completion: stepOutputTokens,
9816
- total: stepInputTokens + stepOutputTokens
9817
- },
9818
- costUsd: stepCost,
9819
- outputPreview: (text || thinking)?.slice(0, 200),
9806
+ costUsd: stepCost * remainingFraction * toolFraction,
9807
+ toolName: tc.toolName,
9808
+ toolArguments: JSON.stringify(tc.args),
9809
+ outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
9820
9810
  success: isSuccess,
9821
9811
  error: errorMsg
9822
9812
  });
9823
9813
  }
9824
- return subSteps;
9825
- }).map((s, i) => ({ ...s, stepNumber: i + 1 }));
9814
+ }
9815
+ if (hasText && toolCallCount > 0) {
9816
+ subSteps.push({
9817
+ id: (0, import_crypto3.randomUUID)(),
9818
+ stepNumber: 0,
9819
+ turnIndex,
9820
+ type: import_evalforge_types7.LLMStepType.COMPLETION,
9821
+ model: stepModel,
9822
+ provider: stepProvider,
9823
+ startedAt,
9824
+ durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
9825
+ tokenUsage: {
9826
+ prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
9827
+ completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
9828
+ total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
9829
+ },
9830
+ costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
9831
+ outputPreview: text.slice(0, 200),
9832
+ success: isSuccess,
9833
+ error: errorMsg
9834
+ });
9835
+ }
9836
+ if (subSteps.length === 0) {
9837
+ const stepType = hasThinking && !hasText ? import_evalforge_types7.LLMStepType.THINKING : import_evalforge_types7.LLMStepType.COMPLETION;
9838
+ subSteps.push({
9839
+ id: (0, import_crypto3.randomUUID)(),
9840
+ stepNumber: 0,
9841
+ turnIndex,
9842
+ type: stepType,
9843
+ model: stepModel,
9844
+ provider: stepProvider,
9845
+ startedAt,
9846
+ durationMs,
9847
+ tokenUsage: {
9848
+ prompt: stepInputTokens,
9849
+ completion: stepOutputTokens,
9850
+ total: stepInputTokens + stepOutputTokens
9851
+ },
9852
+ costUsd: stepCost,
9853
+ outputPreview: (text || thinking)?.slice(0, 200),
9854
+ success: isSuccess,
9855
+ error: errorMsg
9856
+ });
9857
+ }
9858
+ return subSteps;
9859
+ }
9860
+ function buildSummary(allSteps, turns, turnCosts, totalDurationMs, canonicalModel) {
9826
9861
  let totalPrompt = 0;
9827
9862
  let totalCompletion = 0;
9828
9863
  let totalCost = 0;
9829
- for (const turn of turns) {
9864
+ turns.forEach((turn, turnIndex) => {
9830
9865
  if (turn.stepFinish) {
9831
9866
  totalPrompt += turn.stepFinish.tokens.input;
9832
9867
  totalCompletion += turn.stepFinish.tokens.output;
9833
- totalCost += turn.stepFinish.cost;
9868
+ totalCost += turnCosts[turnIndex];
9834
9869
  }
9835
- }
9870
+ });
9836
9871
  const totalTokens = {
9837
9872
  prompt: totalPrompt,
9838
9873
  completion: totalCompletion,
@@ -9853,7 +9888,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9853
9888
  stepTypeBreakdown[step.type] = entry;
9854
9889
  }
9855
9890
  const modelUsed = allSteps[0]?.model || canonicalModel;
9856
- const summary = {
9891
+ return {
9857
9892
  totalSteps: allSteps.length,
9858
9893
  totalTurns: turns.length,
9859
9894
  totalDurationMs,
@@ -9870,11 +9905,97 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
9870
9905
  modelsUsed: [modelUsed],
9871
9906
  stepTypeBreakdown
9872
9907
  };
9873
- return {
9874
- id: (0, import_crypto3.randomUUID)(),
9875
- steps: allSteps,
9876
- summary
9908
+ }
9909
+ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime, gatewayCosts) {
9910
+ const canonicalModel = toCanonicalModelId(model);
9911
+ const turns = groupEventsIntoTurns(timestampedEvents);
9912
+ const turnCosts = resolveTurnCosts(turns, gatewayCosts);
9913
+ const ctx = {
9914
+ turns,
9915
+ turnCosts,
9916
+ totalDurationMs,
9917
+ executionStartMs: executionStartTime.getTime(),
9918
+ model,
9919
+ provider
9877
9920
  };
9921
+ const allSteps = turns.flatMap((turn, turnIndex) => buildTurnSteps(turn, turnIndex, ctx)).map((step, i) => ({ ...step, stepNumber: i + 1 }));
9922
+ const summary = buildSummary(
9923
+ allSteps,
9924
+ turns,
9925
+ turnCosts,
9926
+ totalDurationMs,
9927
+ canonicalModel
9928
+ );
9929
+ return { id: (0, import_crypto3.randomUUID)(), steps: allSteps, summary };
9930
+ }
9931
+
9932
+ // src/run-scenario/agents/opencode/gateway-cost-interceptor.ts
9933
+ var import_node_http = __toESM(require("node:http"));
9934
+ var import_node_https = __toESM(require("node:https"));
9935
+ var import_node_url = require("node:url");
9936
+ var TAIL_BYTES = 64 * 1024;
9937
+ var COST_RE = /"total_cost_usd"\s*:\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)/g;
9938
+ function extractLastCost(text) {
9939
+ let match;
9940
+ let last = null;
9941
+ COST_RE.lastIndex = 0;
9942
+ while ((match = COST_RE.exec(text)) !== null) {
9943
+ const value = Number(match[1]);
9944
+ if (Number.isFinite(value)) last = value;
9945
+ }
9946
+ return last;
9947
+ }
9948
+ function startGatewayCostInterceptor(realGatewayUrl) {
9949
+ const base = realGatewayUrl.replace(/\/$/, "");
9950
+ const captured = [];
9951
+ const server = import_node_http.default.createServer((clientReq, clientRes) => {
9952
+ const slot = captured.length;
9953
+ captured.push(null);
9954
+ const target = new import_node_url.URL(base + (clientReq.url ?? ""));
9955
+ const transport = target.protocol === "https:" ? import_node_https.default : import_node_http.default;
9956
+ const headers = { ...clientReq.headers };
9957
+ delete headers.host;
9958
+ headers["accept-encoding"] = "identity";
9959
+ const proxyReq = transport.request(
9960
+ {
9961
+ protocol: target.protocol,
9962
+ hostname: target.hostname,
9963
+ port: target.port,
9964
+ path: target.pathname + target.search,
9965
+ method: clientReq.method,
9966
+ headers
9967
+ },
9968
+ (proxyRes) => {
9969
+ clientRes.writeHead(proxyRes.statusCode ?? 502, proxyRes.headers);
9970
+ let tail = "";
9971
+ proxyRes.on("data", (chunk) => {
9972
+ clientRes.write(chunk);
9973
+ tail = (tail + chunk.toString("utf8")).slice(-TAIL_BYTES);
9974
+ });
9975
+ proxyRes.on("end", () => {
9976
+ clientRes.end();
9977
+ captured[slot] = extractLastCost(tail);
9978
+ });
9979
+ proxyRes.on("error", () => clientRes.destroy());
9980
+ }
9981
+ );
9982
+ proxyReq.on("error", () => {
9983
+ if (!clientRes.headersSent) clientRes.writeHead(502);
9984
+ clientRes.end();
9985
+ });
9986
+ clientReq.pipe(proxyReq);
9987
+ });
9988
+ return new Promise((resolve3) => {
9989
+ server.listen(0, "127.0.0.1", () => {
9990
+ const addr = server.address();
9991
+ const port = typeof addr === "object" && addr ? addr.port : 0;
9992
+ resolve3({
9993
+ url: `http://127.0.0.1:${port}`,
9994
+ getCapturedCosts: () => captured.slice(),
9995
+ close: () => new Promise((r) => server.close(() => r()))
9996
+ });
9997
+ });
9998
+ });
9878
9999
  }
9879
10000
 
9880
10001
  // src/run-scenario/agents/opencode/build-conversation.ts
@@ -10150,6 +10271,7 @@ function spawnOpenCodeProcess(opts) {
10150
10271
  return new Promise((resolve3) => {
10151
10272
  let resolved = false;
10152
10273
  let stderr = "";
10274
+ let rawStdout = "";
10153
10275
  let lineBuffer = "";
10154
10276
  let lastOutputTime = Date.now();
10155
10277
  let traceStepNumber = initialStepNumber;
@@ -10271,6 +10393,7 @@ function spawnOpenCodeProcess(opts) {
10271
10393
  child.stdout?.on("data", (data) => {
10272
10394
  const text = data.toString();
10273
10395
  lastOutputTime = Date.now();
10396
+ rawStdout += text;
10274
10397
  lineBuffer += text;
10275
10398
  const lines = lineBuffer.split("\n");
10276
10399
  lineBuffer = lines.pop() || "";
@@ -10314,7 +10437,7 @@ function spawnOpenCodeProcess(opts) {
10314
10437
  stderr += text;
10315
10438
  lastOutputTime = Date.now();
10316
10439
  });
10317
- child.on("close", (code) => {
10440
+ child.on("close", (code, signal) => {
10318
10441
  if (lineBuffer.trim()) {
10319
10442
  const evt = tryParseJson(lineBuffer);
10320
10443
  if (evt && evt.type) {
@@ -10322,8 +10445,18 @@ function spawnOpenCodeProcess(opts) {
10322
10445
  }
10323
10446
  }
10324
10447
  console.log(
10325
- `[executeWithOpenCode] Process exited with code ${code}, ${events.length} events collected`
10448
+ `[executeWithOpenCode] Process exited with code ${code}, signal ${signal}, ${events.length} events collected`
10326
10449
  );
10450
+ if (events.length === 0) {
10451
+ console.error(
10452
+ `[executeWithOpenCode] No events. exitCode=${code} signal=${signal}
10453
+ --- raw stdout (first 4000) ---
10454
+ ${rawStdout.slice(0, 4e3)}
10455
+ --- raw stderr (first 4000) ---
10456
+ ${stderr.slice(0, 4e3)}
10457
+ --- end raw output ---`
10458
+ );
10459
+ }
10327
10460
  if (code === 0) {
10328
10461
  finalize(true, false);
10329
10462
  } else {
@@ -10346,7 +10479,7 @@ Stderr: ${stderr.slice(0, 1e3)}`
10346
10479
  });
10347
10480
  });
10348
10481
  }
10349
- async function executeWithOpenCode(skills, scenario, options) {
10482
+ async function executeWithOpenCodeInner(skills, scenario, options, interceptor) {
10350
10483
  const skillNames = skills.map((s) => s.name).join(", ");
10351
10484
  console.log("[executeWithOpenCode] Starting execution", {
10352
10485
  skillCount: skills.length,
@@ -10365,7 +10498,9 @@ async function executeWithOpenCode(skills, scenario, options) {
10365
10498
  temperature: options.temperature,
10366
10499
  topP: options.topP,
10367
10500
  maxTurns,
10368
- aiGatewayUrl: options.aiGatewayUrl,
10501
+ // Point OpenCode at the local interceptor (which forwards to the real
10502
+ // gateway and captures the true cost); fall back to the gateway directly.
10503
+ aiGatewayUrl: interceptor?.url ?? options.aiGatewayUrl,
10369
10504
  aiGatewayHeaders: options.aiGatewayHeaders,
10370
10505
  mcps: options.mcps,
10371
10506
  rules: options.rules,
@@ -10518,13 +10653,11 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10518
10653
  }
10519
10654
  let inputTokens = 0;
10520
10655
  let outputTokens = 0;
10521
- let costUsd = 0;
10522
10656
  for (const { event: evt } of accumulatedEvents) {
10523
10657
  if (evt.type === "step_finish") {
10524
10658
  const sf = evt;
10525
10659
  inputTokens += sf.part.tokens.input;
10526
10660
  outputTokens += sf.part.tokens.output;
10527
- costUsd += sf.part.cost;
10528
10661
  }
10529
10662
  }
10530
10663
  if (traceContext) {
@@ -10550,7 +10683,8 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10550
10683
  totalDurationMs,
10551
10684
  modelStr,
10552
10685
  providerID,
10553
- startTime
10686
+ startTime,
10687
+ interceptor?.getCapturedCosts()
10554
10688
  );
10555
10689
  const conversation = buildConversation2(accumulatedEvents);
10556
10690
  return {
@@ -10562,12 +10696,26 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
10562
10696
  outputTokens,
10563
10697
  totalTokens: inputTokens + outputTokens
10564
10698
  },
10565
- costUsd
10699
+ // Single source of truth: gateway-derived cost aggregated in the trace.
10700
+ costUsd: llmTrace.summary.totalCostUsd
10566
10701
  },
10567
10702
  llmTrace,
10568
10703
  conversation
10569
10704
  };
10570
10705
  }
10706
+ async function executeWithOpenCode(skills, scenario, options) {
10707
+ const interceptor = options.aiGatewayUrl ? await startGatewayCostInterceptor(options.aiGatewayUrl) : void 0;
10708
+ try {
10709
+ return await executeWithOpenCodeInner(
10710
+ skills,
10711
+ scenario,
10712
+ options,
10713
+ interceptor
10714
+ );
10715
+ } finally {
10716
+ await interceptor?.close();
10717
+ }
10718
+ }
10571
10719
 
10572
10720
  // src/run-scenario/agents/opencode/opencode-adapter.ts
10573
10721
  var OpenCodeAdapter = class {