@wix/evalforge-evaluator 0.202.0 → 0.204.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/build/index.js +309 -161
- package/build/index.js.map +4 -4
- package/build/index.mjs +309 -161
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/opencode/build-trace.d.ts +15 -1
- package/build/types/run-scenario/agents/opencode/execute.d.ts +4 -8
- package/build/types/run-scenario/agents/opencode/gateway-cost-interceptor.d.ts +28 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -2412,9 +2412,9 @@ var require_debug = __commonJS({
|
|
|
2412
2412
|
var require_follow_redirects = __commonJS({
|
|
2413
2413
|
"../../node_modules/follow-redirects/index.js"(exports, module) {
|
|
2414
2414
|
var url = __require("url");
|
|
2415
|
-
var
|
|
2416
|
-
var
|
|
2417
|
-
var
|
|
2415
|
+
var URL3 = url.URL;
|
|
2416
|
+
var http2 = __require("http");
|
|
2417
|
+
var https2 = __require("https");
|
|
2418
2418
|
var Writable = __require("stream").Writable;
|
|
2419
2419
|
var assert = __require("assert");
|
|
2420
2420
|
var debug = require_debug();
|
|
@@ -2428,7 +2428,7 @@ var require_follow_redirects = __commonJS({
|
|
|
2428
2428
|
})();
|
|
2429
2429
|
var useNativeURL = false;
|
|
2430
2430
|
try {
|
|
2431
|
-
assert(new
|
|
2431
|
+
assert(new URL3(""));
|
|
2432
2432
|
} catch (error) {
|
|
2433
2433
|
useNativeURL = error.code === "ERR_INVALID_URL";
|
|
2434
2434
|
}
|
|
@@ -2808,7 +2808,7 @@ var require_follow_redirects = __commonJS({
|
|
|
2808
2808
|
function parseUrl(input) {
|
|
2809
2809
|
var parsed;
|
|
2810
2810
|
if (useNativeURL) {
|
|
2811
|
-
parsed = new
|
|
2811
|
+
parsed = new URL3(input);
|
|
2812
2812
|
} else {
|
|
2813
2813
|
parsed = validateUrl(url.parse(input));
|
|
2814
2814
|
if (!isString(parsed.protocol)) {
|
|
@@ -2818,7 +2818,7 @@ var require_follow_redirects = __commonJS({
|
|
|
2818
2818
|
return parsed;
|
|
2819
2819
|
}
|
|
2820
2820
|
function resolveUrl2(relative2, base) {
|
|
2821
|
-
return useNativeURL ? new
|
|
2821
|
+
return useNativeURL ? new URL3(relative2, base) : parseUrl(url.resolve(base, relative2));
|
|
2822
2822
|
}
|
|
2823
2823
|
function validateUrl(input) {
|
|
2824
2824
|
if (/^\[/.test(input.hostname) && !/^\[[:0-9a-f]+\]$/i.test(input.hostname)) {
|
|
@@ -2897,9 +2897,9 @@ var require_follow_redirects = __commonJS({
|
|
|
2897
2897
|
return typeof value === "object" && "length" in value;
|
|
2898
2898
|
}
|
|
2899
2899
|
function isURL(value) {
|
|
2900
|
-
return
|
|
2900
|
+
return URL3 && value instanceof URL3;
|
|
2901
2901
|
}
|
|
2902
|
-
module.exports = wrap({ http, https });
|
|
2902
|
+
module.exports = wrap({ http: http2, https: https2 });
|
|
2903
2903
|
module.exports.wrap = wrap;
|
|
2904
2904
|
}
|
|
2905
2905
|
});
|
|
@@ -3002,8 +3002,8 @@ var require_http = __commonJS({
|
|
|
3002
3002
|
var settle = require_settle();
|
|
3003
3003
|
var buildFullPath = require_buildFullPath();
|
|
3004
3004
|
var buildURL = require_buildURL();
|
|
3005
|
-
var
|
|
3006
|
-
var
|
|
3005
|
+
var http2 = __require("http");
|
|
3006
|
+
var https2 = __require("https");
|
|
3007
3007
|
var httpFollow = require_follow_redirects().http;
|
|
3008
3008
|
var httpsFollow = require_follow_redirects().https;
|
|
3009
3009
|
var url = __require("url");
|
|
@@ -3142,7 +3142,7 @@ var require_http = __commonJS({
|
|
|
3142
3142
|
if (config.transport) {
|
|
3143
3143
|
transport = config.transport;
|
|
3144
3144
|
} else if (config.maxRedirects === 0) {
|
|
3145
|
-
transport = isHttpsProxy ?
|
|
3145
|
+
transport = isHttpsProxy ? https2 : http2;
|
|
3146
3146
|
} else {
|
|
3147
3147
|
if (config.maxRedirects) {
|
|
3148
3148
|
options.maxRedirects = config.maxRedirects;
|
|
@@ -3287,8 +3287,8 @@ var require_helpers = __commonJS({
|
|
|
3287
3287
|
};
|
|
3288
3288
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3289
3289
|
exports.req = exports.json = exports.toBuffer = void 0;
|
|
3290
|
-
var
|
|
3291
|
-
var
|
|
3290
|
+
var http2 = __importStar2(__require("http"));
|
|
3291
|
+
var https2 = __importStar2(__require("https"));
|
|
3292
3292
|
async function toBuffer(stream) {
|
|
3293
3293
|
let length = 0;
|
|
3294
3294
|
const chunks = [];
|
|
@@ -3313,7 +3313,7 @@ var require_helpers = __commonJS({
|
|
|
3313
3313
|
exports.json = json;
|
|
3314
3314
|
function req(url, opts = {}) {
|
|
3315
3315
|
const href = typeof url === "string" ? url : url.href;
|
|
3316
|
-
const req2 = (href.startsWith("https:") ?
|
|
3316
|
+
const req2 = (href.startsWith("https:") ? https2 : http2).request(url, opts);
|
|
3317
3317
|
const promise = new Promise((resolve3, reject) => {
|
|
3318
3318
|
req2.once("response", resolve3).once("error", reject).end();
|
|
3319
3319
|
});
|
|
@@ -3361,11 +3361,11 @@ var require_dist = __commonJS({
|
|
|
3361
3361
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3362
3362
|
exports.Agent = void 0;
|
|
3363
3363
|
var net = __importStar2(__require("net"));
|
|
3364
|
-
var
|
|
3364
|
+
var http2 = __importStar2(__require("http"));
|
|
3365
3365
|
var https_1 = __require("https");
|
|
3366
3366
|
__exportStar2(require_helpers(), exports);
|
|
3367
3367
|
var INTERNAL = /* @__PURE__ */ Symbol("AgentBaseInternalState");
|
|
3368
|
-
var Agent = class extends
|
|
3368
|
+
var Agent = class extends http2.Agent {
|
|
3369
3369
|
constructor(opts) {
|
|
3370
3370
|
super(opts);
|
|
3371
3371
|
this[INTERNAL] = {};
|
|
@@ -3437,7 +3437,7 @@ var require_dist = __commonJS({
|
|
|
3437
3437
|
const fakeSocket = this.incrementSockets(name);
|
|
3438
3438
|
Promise.resolve().then(() => this.connect(req, connectOpts)).then((socket) => {
|
|
3439
3439
|
this.decrementSockets(name, fakeSocket);
|
|
3440
|
-
if (socket instanceof
|
|
3440
|
+
if (socket instanceof http2.Agent) {
|
|
3441
3441
|
try {
|
|
3442
3442
|
return socket.addRequest(req, connectOpts);
|
|
3443
3443
|
} catch (err) {
|
|
@@ -7401,7 +7401,7 @@ import {
|
|
|
7401
7401
|
import { createHash } from "crypto";
|
|
7402
7402
|
import path from "path";
|
|
7403
7403
|
import { spawn, execFileSync } from "child_process";
|
|
7404
|
-
var INSTALL_TIMEOUT_MS =
|
|
7404
|
+
var INSTALL_TIMEOUT_MS = 18e4;
|
|
7405
7405
|
var HEARTBEAT_INTERVAL_MS = 5e3;
|
|
7406
7406
|
function reportRegistry(workDir, onProgress) {
|
|
7407
7407
|
try {
|
|
@@ -9696,14 +9696,12 @@ function toCanonicalModelId(modelId) {
|
|
|
9696
9696
|
const slashIndex = modelId.indexOf("/");
|
|
9697
9697
|
return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
|
|
9698
9698
|
}
|
|
9699
|
-
function
|
|
9700
|
-
|
|
9699
|
+
function isValidCost(cost) {
|
|
9700
|
+
return typeof cost === "number" && Number.isFinite(cost);
|
|
9701
|
+
}
|
|
9702
|
+
function groupEventsIntoTurns(timestampedEvents) {
|
|
9701
9703
|
const turns = [];
|
|
9702
|
-
let current = {
|
|
9703
|
-
textParts: [],
|
|
9704
|
-
reasoningParts: [],
|
|
9705
|
-
toolCalls: []
|
|
9706
|
-
};
|
|
9704
|
+
let current = { textParts: [], reasoningParts: [], toolCalls: [] };
|
|
9707
9705
|
for (const { event: evt, receivedAt } of timestampedEvents) {
|
|
9708
9706
|
switch (evt.type) {
|
|
9709
9707
|
case "text":
|
|
@@ -9725,160 +9723,197 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
|
|
|
9725
9723
|
current.stepFinish = sf.part;
|
|
9726
9724
|
current.receivedAt = receivedAt;
|
|
9727
9725
|
turns.push(current);
|
|
9728
|
-
current = {
|
|
9729
|
-
textParts: [],
|
|
9730
|
-
reasoningParts: [],
|
|
9731
|
-
toolCalls: []
|
|
9732
|
-
};
|
|
9726
|
+
current = { textParts: [], reasoningParts: [], toolCalls: [] };
|
|
9733
9727
|
break;
|
|
9734
9728
|
}
|
|
9735
9729
|
}
|
|
9736
9730
|
}
|
|
9737
|
-
|
|
9731
|
+
const hasTrailingContent = current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0;
|
|
9732
|
+
if (hasTrailingContent) {
|
|
9738
9733
|
if (timestampedEvents.length > 0) {
|
|
9739
9734
|
current.receivedAt = timestampedEvents[timestampedEvents.length - 1].receivedAt;
|
|
9740
9735
|
}
|
|
9741
9736
|
turns.push(current);
|
|
9742
9737
|
}
|
|
9743
|
-
|
|
9744
|
-
|
|
9745
|
-
|
|
9746
|
-
|
|
9747
|
-
|
|
9748
|
-
const
|
|
9749
|
-
|
|
9750
|
-
|
|
9751
|
-
|
|
9752
|
-
|
|
9753
|
-
|
|
9754
|
-
|
|
9755
|
-
|
|
9756
|
-
|
|
9757
|
-
|
|
9758
|
-
|
|
9759
|
-
|
|
9760
|
-
|
|
9761
|
-
|
|
9762
|
-
|
|
9763
|
-
|
|
9764
|
-
|
|
9765
|
-
const
|
|
9766
|
-
|
|
9767
|
-
|
|
9768
|
-
|
|
9738
|
+
return turns;
|
|
9739
|
+
}
|
|
9740
|
+
function resolveTurnCosts(turns, gatewayCosts) {
|
|
9741
|
+
const turnCosts = turns.map((turn, i) => {
|
|
9742
|
+
if (!turn.stepFinish) return 0;
|
|
9743
|
+
const capturedCost = gatewayCosts?.[i];
|
|
9744
|
+
return isValidCost(capturedCost) ? capturedCost : turn.stepFinish.cost;
|
|
9745
|
+
});
|
|
9746
|
+
if (!gatewayCosts || gatewayCosts.length === 0) return turnCosts;
|
|
9747
|
+
const requestTurnIndexes = turns.flatMap(
|
|
9748
|
+
(turn, i) => turn.stepFinish ? [i] : []
|
|
9749
|
+
);
|
|
9750
|
+
const missingCount = requestTurnIndexes.filter(
|
|
9751
|
+
(i) => !isValidCost(gatewayCosts[i])
|
|
9752
|
+
).length;
|
|
9753
|
+
if (missingCount > 0) {
|
|
9754
|
+
console.warn(
|
|
9755
|
+
`[opencode] gateway cost missing for ${missingCount}/${requestTurnIndexes.length} turn(s); using OpenCode-reported cost for those`
|
|
9756
|
+
);
|
|
9757
|
+
}
|
|
9758
|
+
let extraCallsCost = 0;
|
|
9759
|
+
for (let i = requestTurnIndexes.length; i < gatewayCosts.length; i++) {
|
|
9760
|
+
const capturedCost = gatewayCosts[i];
|
|
9761
|
+
if (isValidCost(capturedCost)) extraCallsCost += capturedCost;
|
|
9762
|
+
}
|
|
9763
|
+
if (extraCallsCost > 0 && requestTurnIndexes.length > 0) {
|
|
9764
|
+
const lastTurnIndex = requestTurnIndexes[requestTurnIndexes.length - 1];
|
|
9765
|
+
turnCosts[lastTurnIndex] += extraCallsCost;
|
|
9766
|
+
console.warn(
|
|
9767
|
+
`[opencode] ${gatewayCosts.length} gateway call(s) for ${requestTurnIndexes.length} turn(s); folded $${extraCallsCost} of extra calls into the last turn`
|
|
9768
|
+
);
|
|
9769
|
+
}
|
|
9770
|
+
return turnCosts;
|
|
9771
|
+
}
|
|
9772
|
+
function buildTurnSteps(turn, turnIndex, ctx) {
|
|
9773
|
+
const {
|
|
9774
|
+
turns,
|
|
9775
|
+
turnCosts,
|
|
9776
|
+
totalDurationMs,
|
|
9777
|
+
executionStartMs,
|
|
9778
|
+
model,
|
|
9779
|
+
provider
|
|
9780
|
+
} = ctx;
|
|
9781
|
+
const sf = turn.stepFinish;
|
|
9782
|
+
const stepInputTokens = sf?.tokens.input ?? 0;
|
|
9783
|
+
const stepOutputTokens = sf?.tokens.output ?? 0;
|
|
9784
|
+
const stepCost = turnCosts[turnIndex];
|
|
9785
|
+
const finishReason = sf?.reason ?? "unknown";
|
|
9786
|
+
const stepModel = toCanonicalModelId(sf?.modelID || model);
|
|
9787
|
+
const stepProvider = sf?.providerID || provider;
|
|
9788
|
+
const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
|
|
9789
|
+
const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
|
|
9790
|
+
const durationMs = Math.max(0, turnEndMs - prevEndMs);
|
|
9791
|
+
const startedAt = new Date(prevEndMs).toISOString();
|
|
9792
|
+
const text = turn.textParts.join("");
|
|
9793
|
+
const thinking = turn.reasoningParts.join("");
|
|
9794
|
+
const toolCallCount = turn.toolCalls.length;
|
|
9795
|
+
const hasThinking = !!thinking;
|
|
9796
|
+
const hasText = !!text;
|
|
9797
|
+
const isSuccess = finishReason !== "error";
|
|
9798
|
+
const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
|
|
9799
|
+
const subSteps = [];
|
|
9800
|
+
const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
|
|
9801
|
+
const toolSubSteps = toolCallCount;
|
|
9802
|
+
const textSubSteps = hasText && toolCallCount > 0 ? 1 : 0;
|
|
9803
|
+
const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
|
|
9804
|
+
if (hasThinking && (hasText || toolCallCount > 0)) {
|
|
9805
|
+
subSteps.push({
|
|
9806
|
+
id: randomUUID2(),
|
|
9807
|
+
stepNumber: 0,
|
|
9808
|
+
turnIndex,
|
|
9809
|
+
type: LLMStepType2.THINKING,
|
|
9810
|
+
model: stepModel,
|
|
9811
|
+
provider: stepProvider,
|
|
9812
|
+
startedAt,
|
|
9813
|
+
durationMs: Math.round(durationMs / totalSubSteps),
|
|
9814
|
+
tokenUsage: {
|
|
9815
|
+
prompt: Math.round(stepInputTokens / totalSubSteps),
|
|
9816
|
+
completion: Math.round(stepOutputTokens / totalSubSteps),
|
|
9817
|
+
total: Math.round((stepInputTokens + stepOutputTokens) / totalSubSteps)
|
|
9818
|
+
},
|
|
9819
|
+
costUsd: stepCost / totalSubSteps,
|
|
9820
|
+
outputPreview: thinking.slice(0, 200),
|
|
9821
|
+
success: isSuccess,
|
|
9822
|
+
error: errorMsg
|
|
9823
|
+
});
|
|
9824
|
+
}
|
|
9825
|
+
if (toolCallCount > 0) {
|
|
9826
|
+
for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
|
|
9827
|
+
const tc = turn.toolCalls[tcIdx];
|
|
9828
|
+
const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
|
|
9829
|
+
const toolBudgetSteps = toolSubSteps + textSubSteps;
|
|
9830
|
+
const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
|
|
9831
|
+
const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
|
|
9769
9832
|
subSteps.push({
|
|
9770
9833
|
id: randomUUID2(),
|
|
9771
9834
|
stepNumber: 0,
|
|
9772
9835
|
turnIndex,
|
|
9773
|
-
type: LLMStepType2.
|
|
9836
|
+
type: LLMStepType2.TOOL_USE,
|
|
9774
9837
|
model: stepModel,
|
|
9775
9838
|
provider: stepProvider,
|
|
9776
9839
|
startedAt,
|
|
9777
|
-
durationMs: Math.round(durationMs
|
|
9840
|
+
durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
|
|
9778
9841
|
tokenUsage: {
|
|
9779
|
-
prompt: Math.round(
|
|
9780
|
-
|
|
9842
|
+
prompt: Math.round(
|
|
9843
|
+
stepInputTokens * remainingFraction * toolFraction
|
|
9844
|
+
),
|
|
9845
|
+
completion: Math.round(
|
|
9846
|
+
stepOutputTokens * remainingFraction * toolFraction
|
|
9847
|
+
),
|
|
9781
9848
|
total: Math.round(
|
|
9782
|
-
(stepInputTokens + stepOutputTokens)
|
|
9849
|
+
(stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
|
|
9783
9850
|
)
|
|
9784
9851
|
},
|
|
9785
|
-
costUsd: stepCost
|
|
9786
|
-
|
|
9787
|
-
|
|
9788
|
-
|
|
9789
|
-
});
|
|
9790
|
-
}
|
|
9791
|
-
if (toolCallCount > 0) {
|
|
9792
|
-
for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
|
|
9793
|
-
const tc = turn.toolCalls[tcIdx];
|
|
9794
|
-
const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
|
|
9795
|
-
const toolBudgetSteps = toolSubSteps + textSubSteps;
|
|
9796
|
-
const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
|
|
9797
|
-
const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
|
|
9798
|
-
subSteps.push({
|
|
9799
|
-
id: randomUUID2(),
|
|
9800
|
-
stepNumber: 0,
|
|
9801
|
-
turnIndex,
|
|
9802
|
-
type: LLMStepType2.TOOL_USE,
|
|
9803
|
-
model: stepModel,
|
|
9804
|
-
provider: stepProvider,
|
|
9805
|
-
startedAt,
|
|
9806
|
-
durationMs: isLast ? durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0) : Math.round(durationMs * remainingFraction * toolFraction),
|
|
9807
|
-
tokenUsage: {
|
|
9808
|
-
prompt: Math.round(
|
|
9809
|
-
stepInputTokens * remainingFraction * toolFraction
|
|
9810
|
-
),
|
|
9811
|
-
completion: Math.round(
|
|
9812
|
-
stepOutputTokens * remainingFraction * toolFraction
|
|
9813
|
-
),
|
|
9814
|
-
total: Math.round(
|
|
9815
|
-
(stepInputTokens + stepOutputTokens) * remainingFraction * toolFraction
|
|
9816
|
-
)
|
|
9817
|
-
},
|
|
9818
|
-
costUsd: stepCost * remainingFraction * toolFraction,
|
|
9819
|
-
toolName: tc.toolName,
|
|
9820
|
-
toolArguments: JSON.stringify(tc.args),
|
|
9821
|
-
outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
|
|
9822
|
-
success: isSuccess,
|
|
9823
|
-
error: errorMsg
|
|
9824
|
-
});
|
|
9825
|
-
}
|
|
9826
|
-
}
|
|
9827
|
-
if (hasText && toolCallCount > 0) {
|
|
9828
|
-
subSteps.push({
|
|
9829
|
-
id: randomUUID2(),
|
|
9830
|
-
stepNumber: 0,
|
|
9831
|
-
turnIndex,
|
|
9832
|
-
type: LLMStepType2.COMPLETION,
|
|
9833
|
-
model: stepModel,
|
|
9834
|
-
provider: stepProvider,
|
|
9835
|
-
startedAt,
|
|
9836
|
-
durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
|
|
9837
|
-
tokenUsage: {
|
|
9838
|
-
prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
|
|
9839
|
-
completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
|
|
9840
|
-
total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
|
|
9841
|
-
},
|
|
9842
|
-
costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
|
|
9843
|
-
outputPreview: text.slice(0, 200),
|
|
9844
|
-
success: isSuccess,
|
|
9845
|
-
error: errorMsg
|
|
9846
|
-
});
|
|
9847
|
-
}
|
|
9848
|
-
if (subSteps.length === 0) {
|
|
9849
|
-
const stepType = hasThinking && !hasText ? LLMStepType2.THINKING : LLMStepType2.COMPLETION;
|
|
9850
|
-
subSteps.push({
|
|
9851
|
-
id: randomUUID2(),
|
|
9852
|
-
stepNumber: 0,
|
|
9853
|
-
turnIndex,
|
|
9854
|
-
type: stepType,
|
|
9855
|
-
model: stepModel,
|
|
9856
|
-
provider: stepProvider,
|
|
9857
|
-
startedAt,
|
|
9858
|
-
durationMs,
|
|
9859
|
-
tokenUsage: {
|
|
9860
|
-
prompt: stepInputTokens,
|
|
9861
|
-
completion: stepOutputTokens,
|
|
9862
|
-
total: stepInputTokens + stepOutputTokens
|
|
9863
|
-
},
|
|
9864
|
-
costUsd: stepCost,
|
|
9865
|
-
outputPreview: (text || thinking)?.slice(0, 200),
|
|
9852
|
+
costUsd: stepCost * remainingFraction * toolFraction,
|
|
9853
|
+
toolName: tc.toolName,
|
|
9854
|
+
toolArguments: JSON.stringify(tc.args),
|
|
9855
|
+
outputPreview: tcIdx === 0 && !hasText ? (text || thinking)?.slice(0, 200) : void 0,
|
|
9866
9856
|
success: isSuccess,
|
|
9867
9857
|
error: errorMsg
|
|
9868
9858
|
});
|
|
9869
9859
|
}
|
|
9870
|
-
|
|
9871
|
-
|
|
9860
|
+
}
|
|
9861
|
+
if (hasText && toolCallCount > 0) {
|
|
9862
|
+
subSteps.push({
|
|
9863
|
+
id: randomUUID2(),
|
|
9864
|
+
stepNumber: 0,
|
|
9865
|
+
turnIndex,
|
|
9866
|
+
type: LLMStepType2.COMPLETION,
|
|
9867
|
+
model: stepModel,
|
|
9868
|
+
provider: stepProvider,
|
|
9869
|
+
startedAt,
|
|
9870
|
+
durationMs: durationMs - subSteps.reduce((s, ss) => s + ss.durationMs, 0),
|
|
9871
|
+
tokenUsage: {
|
|
9872
|
+
prompt: stepInputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.prompt, 0),
|
|
9873
|
+
completion: stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.completion, 0),
|
|
9874
|
+
total: stepInputTokens + stepOutputTokens - subSteps.reduce((s, ss) => s + ss.tokenUsage.total, 0)
|
|
9875
|
+
},
|
|
9876
|
+
costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
|
|
9877
|
+
outputPreview: text.slice(0, 200),
|
|
9878
|
+
success: isSuccess,
|
|
9879
|
+
error: errorMsg
|
|
9880
|
+
});
|
|
9881
|
+
}
|
|
9882
|
+
if (subSteps.length === 0) {
|
|
9883
|
+
const stepType = hasThinking && !hasText ? LLMStepType2.THINKING : LLMStepType2.COMPLETION;
|
|
9884
|
+
subSteps.push({
|
|
9885
|
+
id: randomUUID2(),
|
|
9886
|
+
stepNumber: 0,
|
|
9887
|
+
turnIndex,
|
|
9888
|
+
type: stepType,
|
|
9889
|
+
model: stepModel,
|
|
9890
|
+
provider: stepProvider,
|
|
9891
|
+
startedAt,
|
|
9892
|
+
durationMs,
|
|
9893
|
+
tokenUsage: {
|
|
9894
|
+
prompt: stepInputTokens,
|
|
9895
|
+
completion: stepOutputTokens,
|
|
9896
|
+
total: stepInputTokens + stepOutputTokens
|
|
9897
|
+
},
|
|
9898
|
+
costUsd: stepCost,
|
|
9899
|
+
outputPreview: (text || thinking)?.slice(0, 200),
|
|
9900
|
+
success: isSuccess,
|
|
9901
|
+
error: errorMsg
|
|
9902
|
+
});
|
|
9903
|
+
}
|
|
9904
|
+
return subSteps;
|
|
9905
|
+
}
|
|
9906
|
+
function buildSummary(allSteps, turns, turnCosts, totalDurationMs, canonicalModel) {
|
|
9872
9907
|
let totalPrompt = 0;
|
|
9873
9908
|
let totalCompletion = 0;
|
|
9874
9909
|
let totalCost = 0;
|
|
9875
|
-
|
|
9910
|
+
turns.forEach((turn, turnIndex) => {
|
|
9876
9911
|
if (turn.stepFinish) {
|
|
9877
9912
|
totalPrompt += turn.stepFinish.tokens.input;
|
|
9878
9913
|
totalCompletion += turn.stepFinish.tokens.output;
|
|
9879
|
-
totalCost +=
|
|
9914
|
+
totalCost += turnCosts[turnIndex];
|
|
9880
9915
|
}
|
|
9881
|
-
}
|
|
9916
|
+
});
|
|
9882
9917
|
const totalTokens = {
|
|
9883
9918
|
prompt: totalPrompt,
|
|
9884
9919
|
completion: totalCompletion,
|
|
@@ -9899,7 +9934,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
|
|
|
9899
9934
|
stepTypeBreakdown[step.type] = entry;
|
|
9900
9935
|
}
|
|
9901
9936
|
const modelUsed = allSteps[0]?.model || canonicalModel;
|
|
9902
|
-
|
|
9937
|
+
return {
|
|
9903
9938
|
totalSteps: allSteps.length,
|
|
9904
9939
|
totalTurns: turns.length,
|
|
9905
9940
|
totalDurationMs,
|
|
@@ -9916,11 +9951,97 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
|
|
|
9916
9951
|
modelsUsed: [modelUsed],
|
|
9917
9952
|
stepTypeBreakdown
|
|
9918
9953
|
};
|
|
9919
|
-
|
|
9920
|
-
|
|
9921
|
-
|
|
9922
|
-
|
|
9954
|
+
}
|
|
9955
|
+
function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime, gatewayCosts) {
|
|
9956
|
+
const canonicalModel = toCanonicalModelId(model);
|
|
9957
|
+
const turns = groupEventsIntoTurns(timestampedEvents);
|
|
9958
|
+
const turnCosts = resolveTurnCosts(turns, gatewayCosts);
|
|
9959
|
+
const ctx = {
|
|
9960
|
+
turns,
|
|
9961
|
+
turnCosts,
|
|
9962
|
+
totalDurationMs,
|
|
9963
|
+
executionStartMs: executionStartTime.getTime(),
|
|
9964
|
+
model,
|
|
9965
|
+
provider
|
|
9923
9966
|
};
|
|
9967
|
+
const allSteps = turns.flatMap((turn, turnIndex) => buildTurnSteps(turn, turnIndex, ctx)).map((step, i) => ({ ...step, stepNumber: i + 1 }));
|
|
9968
|
+
const summary = buildSummary(
|
|
9969
|
+
allSteps,
|
|
9970
|
+
turns,
|
|
9971
|
+
turnCosts,
|
|
9972
|
+
totalDurationMs,
|
|
9973
|
+
canonicalModel
|
|
9974
|
+
);
|
|
9975
|
+
return { id: randomUUID2(), steps: allSteps, summary };
|
|
9976
|
+
}
|
|
9977
|
+
|
|
9978
|
+
// src/run-scenario/agents/opencode/gateway-cost-interceptor.ts
|
|
9979
|
+
import http from "node:http";
|
|
9980
|
+
import https from "node:https";
|
|
9981
|
+
import { URL as URL2 } from "node:url";
|
|
9982
|
+
var TAIL_BYTES = 64 * 1024;
|
|
9983
|
+
var COST_RE = /"total_cost_usd"\s*:\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)/g;
|
|
9984
|
+
function extractLastCost(text) {
|
|
9985
|
+
let match;
|
|
9986
|
+
let last = null;
|
|
9987
|
+
COST_RE.lastIndex = 0;
|
|
9988
|
+
while ((match = COST_RE.exec(text)) !== null) {
|
|
9989
|
+
const value = Number(match[1]);
|
|
9990
|
+
if (Number.isFinite(value)) last = value;
|
|
9991
|
+
}
|
|
9992
|
+
return last;
|
|
9993
|
+
}
|
|
9994
|
+
function startGatewayCostInterceptor(realGatewayUrl) {
|
|
9995
|
+
const base = realGatewayUrl.replace(/\/$/, "");
|
|
9996
|
+
const captured = [];
|
|
9997
|
+
const server = http.createServer((clientReq, clientRes) => {
|
|
9998
|
+
const slot = captured.length;
|
|
9999
|
+
captured.push(null);
|
|
10000
|
+
const target = new URL2(base + (clientReq.url ?? ""));
|
|
10001
|
+
const transport = target.protocol === "https:" ? https : http;
|
|
10002
|
+
const headers = { ...clientReq.headers };
|
|
10003
|
+
delete headers.host;
|
|
10004
|
+
headers["accept-encoding"] = "identity";
|
|
10005
|
+
const proxyReq = transport.request(
|
|
10006
|
+
{
|
|
10007
|
+
protocol: target.protocol,
|
|
10008
|
+
hostname: target.hostname,
|
|
10009
|
+
port: target.port,
|
|
10010
|
+
path: target.pathname + target.search,
|
|
10011
|
+
method: clientReq.method,
|
|
10012
|
+
headers
|
|
10013
|
+
},
|
|
10014
|
+
(proxyRes) => {
|
|
10015
|
+
clientRes.writeHead(proxyRes.statusCode ?? 502, proxyRes.headers);
|
|
10016
|
+
let tail = "";
|
|
10017
|
+
proxyRes.on("data", (chunk) => {
|
|
10018
|
+
clientRes.write(chunk);
|
|
10019
|
+
tail = (tail + chunk.toString("utf8")).slice(-TAIL_BYTES);
|
|
10020
|
+
});
|
|
10021
|
+
proxyRes.on("end", () => {
|
|
10022
|
+
clientRes.end();
|
|
10023
|
+
captured[slot] = extractLastCost(tail);
|
|
10024
|
+
});
|
|
10025
|
+
proxyRes.on("error", () => clientRes.destroy());
|
|
10026
|
+
}
|
|
10027
|
+
);
|
|
10028
|
+
proxyReq.on("error", () => {
|
|
10029
|
+
if (!clientRes.headersSent) clientRes.writeHead(502);
|
|
10030
|
+
clientRes.end();
|
|
10031
|
+
});
|
|
10032
|
+
clientReq.pipe(proxyReq);
|
|
10033
|
+
});
|
|
10034
|
+
return new Promise((resolve3) => {
|
|
10035
|
+
server.listen(0, "127.0.0.1", () => {
|
|
10036
|
+
const addr = server.address();
|
|
10037
|
+
const port = typeof addr === "object" && addr ? addr.port : 0;
|
|
10038
|
+
resolve3({
|
|
10039
|
+
url: `http://127.0.0.1:${port}`,
|
|
10040
|
+
getCapturedCosts: () => captured.slice(),
|
|
10041
|
+
close: () => new Promise((r) => server.close(() => r()))
|
|
10042
|
+
});
|
|
10043
|
+
});
|
|
10044
|
+
});
|
|
9924
10045
|
}
|
|
9925
10046
|
|
|
9926
10047
|
// src/run-scenario/agents/opencode/build-conversation.ts
|
|
@@ -10196,6 +10317,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10196
10317
|
return new Promise((resolve3) => {
|
|
10197
10318
|
let resolved = false;
|
|
10198
10319
|
let stderr = "";
|
|
10320
|
+
let rawStdout = "";
|
|
10199
10321
|
let lineBuffer = "";
|
|
10200
10322
|
let lastOutputTime = Date.now();
|
|
10201
10323
|
let traceStepNumber = initialStepNumber;
|
|
@@ -10317,6 +10439,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10317
10439
|
child.stdout?.on("data", (data) => {
|
|
10318
10440
|
const text = data.toString();
|
|
10319
10441
|
lastOutputTime = Date.now();
|
|
10442
|
+
rawStdout += text;
|
|
10320
10443
|
lineBuffer += text;
|
|
10321
10444
|
const lines = lineBuffer.split("\n");
|
|
10322
10445
|
lineBuffer = lines.pop() || "";
|
|
@@ -10360,7 +10483,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10360
10483
|
stderr += text;
|
|
10361
10484
|
lastOutputTime = Date.now();
|
|
10362
10485
|
});
|
|
10363
|
-
child.on("close", (code) => {
|
|
10486
|
+
child.on("close", (code, signal) => {
|
|
10364
10487
|
if (lineBuffer.trim()) {
|
|
10365
10488
|
const evt = tryParseJson(lineBuffer);
|
|
10366
10489
|
if (evt && evt.type) {
|
|
@@ -10368,8 +10491,18 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10368
10491
|
}
|
|
10369
10492
|
}
|
|
10370
10493
|
console.log(
|
|
10371
|
-
`[executeWithOpenCode] Process exited with code ${code}, ${events.length} events collected`
|
|
10494
|
+
`[executeWithOpenCode] Process exited with code ${code}, signal ${signal}, ${events.length} events collected`
|
|
10372
10495
|
);
|
|
10496
|
+
if (events.length === 0) {
|
|
10497
|
+
console.error(
|
|
10498
|
+
`[executeWithOpenCode] No events. exitCode=${code} signal=${signal}
|
|
10499
|
+
--- raw stdout (first 4000) ---
|
|
10500
|
+
${rawStdout.slice(0, 4e3)}
|
|
10501
|
+
--- raw stderr (first 4000) ---
|
|
10502
|
+
${stderr.slice(0, 4e3)}
|
|
10503
|
+
--- end raw output ---`
|
|
10504
|
+
);
|
|
10505
|
+
}
|
|
10373
10506
|
if (code === 0) {
|
|
10374
10507
|
finalize(true, false);
|
|
10375
10508
|
} else {
|
|
@@ -10392,7 +10525,7 @@ Stderr: ${stderr.slice(0, 1e3)}`
|
|
|
10392
10525
|
});
|
|
10393
10526
|
});
|
|
10394
10527
|
}
|
|
10395
|
-
async function
|
|
10528
|
+
async function executeWithOpenCodeInner(skills, scenario, options, interceptor) {
|
|
10396
10529
|
const skillNames = skills.map((s) => s.name).join(", ");
|
|
10397
10530
|
console.log("[executeWithOpenCode] Starting execution", {
|
|
10398
10531
|
skillCount: skills.length,
|
|
@@ -10411,7 +10544,9 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
10411
10544
|
temperature: options.temperature,
|
|
10412
10545
|
topP: options.topP,
|
|
10413
10546
|
maxTurns,
|
|
10414
|
-
|
|
10547
|
+
// Point OpenCode at the local interceptor (which forwards to the real
|
|
10548
|
+
// gateway and captures the true cost); fall back to the gateway directly.
|
|
10549
|
+
aiGatewayUrl: interceptor?.url ?? options.aiGatewayUrl,
|
|
10415
10550
|
aiGatewayHeaders: options.aiGatewayHeaders,
|
|
10416
10551
|
mcps: options.mcps,
|
|
10417
10552
|
rules: options.rules,
|
|
@@ -10564,13 +10699,11 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
|
|
|
10564
10699
|
}
|
|
10565
10700
|
let inputTokens = 0;
|
|
10566
10701
|
let outputTokens = 0;
|
|
10567
|
-
let costUsd = 0;
|
|
10568
10702
|
for (const { event: evt } of accumulatedEvents) {
|
|
10569
10703
|
if (evt.type === "step_finish") {
|
|
10570
10704
|
const sf = evt;
|
|
10571
10705
|
inputTokens += sf.part.tokens.input;
|
|
10572
10706
|
outputTokens += sf.part.tokens.output;
|
|
10573
|
-
costUsd += sf.part.cost;
|
|
10574
10707
|
}
|
|
10575
10708
|
}
|
|
10576
10709
|
if (traceContext) {
|
|
@@ -10596,7 +10729,8 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
|
|
|
10596
10729
|
totalDurationMs,
|
|
10597
10730
|
modelStr,
|
|
10598
10731
|
providerID,
|
|
10599
|
-
startTime
|
|
10732
|
+
startTime,
|
|
10733
|
+
interceptor?.getCapturedCosts()
|
|
10600
10734
|
);
|
|
10601
10735
|
const conversation = buildConversation2(accumulatedEvents);
|
|
10602
10736
|
return {
|
|
@@ -10608,12 +10742,26 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
|
|
|
10608
10742
|
outputTokens,
|
|
10609
10743
|
totalTokens: inputTokens + outputTokens
|
|
10610
10744
|
},
|
|
10611
|
-
|
|
10745
|
+
// Single source of truth: gateway-derived cost aggregated in the trace.
|
|
10746
|
+
costUsd: llmTrace.summary.totalCostUsd
|
|
10612
10747
|
},
|
|
10613
10748
|
llmTrace,
|
|
10614
10749
|
conversation
|
|
10615
10750
|
};
|
|
10616
10751
|
}
|
|
10752
|
+
async function executeWithOpenCode(skills, scenario, options) {
|
|
10753
|
+
const interceptor = options.aiGatewayUrl ? await startGatewayCostInterceptor(options.aiGatewayUrl) : void 0;
|
|
10754
|
+
try {
|
|
10755
|
+
return await executeWithOpenCodeInner(
|
|
10756
|
+
skills,
|
|
10757
|
+
scenario,
|
|
10758
|
+
options,
|
|
10759
|
+
interceptor
|
|
10760
|
+
);
|
|
10761
|
+
} finally {
|
|
10762
|
+
await interceptor?.close();
|
|
10763
|
+
}
|
|
10764
|
+
}
|
|
10617
10765
|
|
|
10618
10766
|
// src/run-scenario/agents/opencode/opencode-adapter.ts
|
|
10619
10767
|
var OpenCodeAdapter = class {
|