@wix/evalforge-evaluator 0.118.0 → 0.120.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +538 -537
- package/build/index.js.map +4 -4
- package/build/index.mjs +537 -537
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/opencode/build-conversation.d.ts +7 -4
- package/build/types/run-scenario/agents/opencode/build-trace.d.ts +6 -9
- package/build/types/run-scenario/agents/opencode/config.d.ts +5 -11
- package/build/types/run-scenario/agents/opencode/execute.d.ts +3 -4
- package/build/types/run-scenario/agents/opencode/index.d.ts +1 -1
- package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts +2 -3
- package/build/types/run-scenario/agents/opencode/types.d.ts +51 -6
- package/build/types/run-scenario/agents/simple-agent/build-conversation.d.ts +1 -1
- package/build/types/run-scenario/agents/simple-agent/execute.d.ts +1 -1
- package/package.json +4 -5
package/build/index.js
CHANGED
|
@@ -55,6 +55,7 @@ function loadConfig() {
|
|
|
55
55
|
aiGatewayHeaders[key] = value;
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
|
+
aiGatewayHeaders["x-wix-ai-gateway-disable-cache"] = "true";
|
|
58
59
|
const tracePushUrl = process.env.TRACE_PUSH_URL;
|
|
59
60
|
const routeHeader = process.env.EVAL_ROUTE_HEADER;
|
|
60
61
|
const authToken = process.env.EVAL_AUTH_TOKEN;
|
|
@@ -1200,10 +1201,10 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
|
|
|
1200
1201
|
};
|
|
1201
1202
|
}
|
|
1202
1203
|
async function prepareClaudeCodeEnvironment(cwd, skills, options) {
|
|
1203
|
-
const { mkdir: mkdirAsync, writeFile:
|
|
1204
|
+
const { mkdir: mkdirAsync, writeFile: writeFile7 } = await import("fs/promises");
|
|
1204
1205
|
const claudeDir = `${cwd}/.claude`;
|
|
1205
1206
|
await mkdirAsync(claudeDir, { recursive: true });
|
|
1206
|
-
await
|
|
1207
|
+
await writeFile7(`${claudeDir}/settings.json`, "{}", {
|
|
1207
1208
|
flag: "wx"
|
|
1208
1209
|
}).catch(() => {
|
|
1209
1210
|
});
|
|
@@ -2162,9 +2163,18 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
2162
2163
|
var import_evalforge_types9 = require("@wix/evalforge-types");
|
|
2163
2164
|
|
|
2164
2165
|
// src/run-scenario/agents/opencode/execute.ts
|
|
2165
|
-
var
|
|
2166
|
+
var import_child_process = require("child_process");
|
|
2166
2167
|
var import_evalforge_types8 = require("@wix/evalforge-types");
|
|
2167
2168
|
|
|
2169
|
+
// src/run-scenario/agents/opencode/types.ts
|
|
2170
|
+
function tryParseJson(text) {
|
|
2171
|
+
try {
|
|
2172
|
+
return JSON.parse(text);
|
|
2173
|
+
} catch {
|
|
2174
|
+
return null;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2168
2178
|
// src/run-scenario/agents/opencode/write-skills.ts
|
|
2169
2179
|
var import_promises7 = require("fs/promises");
|
|
2170
2180
|
var import_path8 = require("path");
|
|
@@ -2260,6 +2270,7 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = import_eval
|
|
|
2260
2270
|
}
|
|
2261
2271
|
|
|
2262
2272
|
// src/run-scenario/agents/opencode/config.ts
|
|
2273
|
+
var import_os3 = require("os");
|
|
2263
2274
|
var import_evalforge_types6 = require("@wix/evalforge-types");
|
|
2264
2275
|
var DEFAULT_MODEL2 = `${import_evalforge_types6.ClaudeModel.CLAUDE_4_5_SONNET_1_0}`;
|
|
2265
2276
|
function parseModel(model) {
|
|
@@ -2312,7 +2323,14 @@ function toOpenCodeMcpConfig(servers) {
|
|
|
2312
2323
|
}
|
|
2313
2324
|
return result;
|
|
2314
2325
|
}
|
|
2315
|
-
|
|
2326
|
+
function ensureOpenCodeInPath(currentPath) {
|
|
2327
|
+
const opencodeBin = `${(0, import_os3.homedir)()}/.opencode/bin`;
|
|
2328
|
+
if (currentPath.includes(opencodeBin)) {
|
|
2329
|
+
return currentPath;
|
|
2330
|
+
}
|
|
2331
|
+
return `${opencodeBin}:${currentPath}`;
|
|
2332
|
+
}
|
|
2333
|
+
async function buildOpenCodeEnv(options) {
|
|
2316
2334
|
const modelStr = options.model || DEFAULT_MODEL2;
|
|
2317
2335
|
const { providerID, modelID } = parseModel(modelStr);
|
|
2318
2336
|
const provider = {};
|
|
@@ -2324,9 +2342,7 @@ async function buildOpenCodeConfig(options) {
|
|
|
2324
2342
|
if (options.aiGatewayHeaders) {
|
|
2325
2343
|
providerOptions.headers = { ...options.aiGatewayHeaders };
|
|
2326
2344
|
}
|
|
2327
|
-
provider[providerID] = {
|
|
2328
|
-
options: providerOptions
|
|
2329
|
-
};
|
|
2345
|
+
provider[providerID] = { options: providerOptions };
|
|
2330
2346
|
}
|
|
2331
2347
|
let mcp;
|
|
2332
2348
|
if (options.mcps && options.mcps.length > 0) {
|
|
@@ -2367,70 +2383,81 @@ async function buildOpenCodeConfig(options) {
|
|
|
2367
2383
|
},
|
|
2368
2384
|
...mcp ? { mcp } : {}
|
|
2369
2385
|
};
|
|
2370
|
-
|
|
2386
|
+
const env = {
|
|
2387
|
+
...process.env,
|
|
2388
|
+
PATH: ensureOpenCodeInPath(process.env.PATH || ""),
|
|
2389
|
+
OPENCODE_CONFIG_CONTENT: JSON.stringify(config),
|
|
2390
|
+
OPENCODE_DISABLE_LSP_DOWNLOAD: "true"
|
|
2391
|
+
};
|
|
2392
|
+
return { env, providerID, modelID };
|
|
2371
2393
|
}
|
|
2372
2394
|
|
|
2373
2395
|
// src/run-scenario/agents/opencode/build-trace.ts
|
|
2374
2396
|
var import_evalforge_types7 = require("@wix/evalforge-types");
|
|
2375
2397
|
var import_crypto2 = require("crypto");
|
|
2376
|
-
function buildLLMTrace(
|
|
2377
|
-
const
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
}
|
|
2409
|
-
|
|
2410
|
-
const sf = part;
|
|
2411
|
-
stepInputTokens += sf.tokens.input;
|
|
2412
|
-
stepOutputTokens += sf.tokens.output;
|
|
2413
|
-
stepCost += sf.cost;
|
|
2414
|
-
finishReason = sf.reason;
|
|
2415
|
-
break;
|
|
2416
|
-
}
|
|
2398
|
+
function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
|
|
2399
|
+
const turns = [];
|
|
2400
|
+
let current = {
|
|
2401
|
+
textParts: [],
|
|
2402
|
+
reasoningParts: [],
|
|
2403
|
+
toolCalls: []
|
|
2404
|
+
};
|
|
2405
|
+
for (const { event: evt, receivedAt } of timestampedEvents) {
|
|
2406
|
+
switch (evt.type) {
|
|
2407
|
+
case "text":
|
|
2408
|
+
current.textParts.push(evt.part.text);
|
|
2409
|
+
break;
|
|
2410
|
+
case "reasoning":
|
|
2411
|
+
current.reasoningParts.push(evt.part.text);
|
|
2412
|
+
break;
|
|
2413
|
+
case "tool_use": {
|
|
2414
|
+
const tu = evt;
|
|
2415
|
+
current.toolCalls.push({
|
|
2416
|
+
toolName: tu.part.tool,
|
|
2417
|
+
args: tu.part.state.input
|
|
2418
|
+
});
|
|
2419
|
+
break;
|
|
2420
|
+
}
|
|
2421
|
+
case "step_finish": {
|
|
2422
|
+
const sf = evt;
|
|
2423
|
+
current.stepFinish = sf.part;
|
|
2424
|
+
current.receivedAt = receivedAt;
|
|
2425
|
+
turns.push(current);
|
|
2426
|
+
current = {
|
|
2427
|
+
textParts: [],
|
|
2428
|
+
reasoningParts: [],
|
|
2429
|
+
toolCalls: []
|
|
2430
|
+
};
|
|
2431
|
+
break;
|
|
2417
2432
|
}
|
|
2418
2433
|
}
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
}
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
const
|
|
2429
|
-
const
|
|
2430
|
-
const
|
|
2431
|
-
const
|
|
2434
|
+
}
|
|
2435
|
+
if (current.textParts.length > 0 || current.reasoningParts.length > 0 || current.toolCalls.length > 0) {
|
|
2436
|
+
if (timestampedEvents.length > 0) {
|
|
2437
|
+
current.receivedAt = timestampedEvents[timestampedEvents.length - 1].receivedAt;
|
|
2438
|
+
}
|
|
2439
|
+
turns.push(current);
|
|
2440
|
+
}
|
|
2441
|
+
const executionStartMs = executionStartTime.getTime();
|
|
2442
|
+
const allSteps = turns.flatMap((turn, turnIndex) => {
|
|
2443
|
+
const sf = turn.stepFinish;
|
|
2444
|
+
const stepInputTokens = sf?.tokens.input ?? 0;
|
|
2445
|
+
const stepOutputTokens = sf?.tokens.output ?? 0;
|
|
2446
|
+
const stepCost = sf?.cost ?? 0;
|
|
2447
|
+
const finishReason = sf?.reason ?? "unknown";
|
|
2448
|
+
const stepModel = sf?.modelID || model;
|
|
2449
|
+
const stepProvider = sf?.providerID || provider;
|
|
2450
|
+
const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
|
|
2451
|
+
const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
|
|
2452
|
+
const durationMs = Math.max(0, turnEndMs - prevEndMs);
|
|
2453
|
+
const startedAt = new Date(prevEndMs).toISOString();
|
|
2454
|
+
const text = turn.textParts.join("");
|
|
2455
|
+
const thinking = turn.reasoningParts.join("");
|
|
2456
|
+
const toolCallCount = turn.toolCalls.length;
|
|
2432
2457
|
const hasThinking = !!thinking;
|
|
2433
2458
|
const hasText = !!text;
|
|
2459
|
+
const isSuccess = finishReason !== "error";
|
|
2460
|
+
const errorMsg = finishReason === "error" ? "Generation failed" : void 0;
|
|
2434
2461
|
const subSteps = [];
|
|
2435
2462
|
const thinkingSubSteps = hasThinking && (hasText || toolCallCount > 0) ? 1 : 0;
|
|
2436
2463
|
const toolSubSteps = toolCallCount;
|
|
@@ -2440,7 +2467,6 @@ function buildLLMTrace(messages, totalDurationMs, model, provider) {
|
|
|
2440
2467
|
subSteps.push({
|
|
2441
2468
|
id: (0, import_crypto2.randomUUID)(),
|
|
2442
2469
|
stepNumber: 0,
|
|
2443
|
-
// renumbered below
|
|
2444
2470
|
turnIndex,
|
|
2445
2471
|
type: import_evalforge_types7.LLMStepType.THINKING,
|
|
2446
2472
|
model: stepModel,
|
|
@@ -2462,7 +2488,7 @@ function buildLLMTrace(messages, totalDurationMs, model, provider) {
|
|
|
2462
2488
|
}
|
|
2463
2489
|
if (toolCallCount > 0) {
|
|
2464
2490
|
for (let tcIdx = 0; tcIdx < toolCallCount; tcIdx++) {
|
|
2465
|
-
const tc = toolCalls[tcIdx];
|
|
2491
|
+
const tc = turn.toolCalls[tcIdx];
|
|
2466
2492
|
const isLast = tcIdx === toolCallCount - 1 && textSubSteps === 0;
|
|
2467
2493
|
const toolBudgetSteps = toolSubSteps + textSubSteps;
|
|
2468
2494
|
const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
|
|
@@ -2541,11 +2567,21 @@ function buildLLMTrace(messages, totalDurationMs, model, provider) {
|
|
|
2541
2567
|
}
|
|
2542
2568
|
return subSteps;
|
|
2543
2569
|
}).map((s, i) => ({ ...s, stepNumber: i + 1 }));
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2570
|
+
let totalPrompt = 0;
|
|
2571
|
+
let totalCompletion = 0;
|
|
2572
|
+
let totalCost = 0;
|
|
2573
|
+
for (const turn of turns) {
|
|
2574
|
+
if (turn.stepFinish) {
|
|
2575
|
+
totalPrompt += turn.stepFinish.tokens.input;
|
|
2576
|
+
totalCompletion += turn.stepFinish.tokens.output;
|
|
2577
|
+
totalCost += turn.stepFinish.cost;
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
const totalTokens = {
|
|
2581
|
+
prompt: totalPrompt,
|
|
2582
|
+
completion: totalCompletion,
|
|
2583
|
+
total: totalPrompt + totalCompletion
|
|
2584
|
+
};
|
|
2549
2585
|
const stepTypeBreakdown = {};
|
|
2550
2586
|
for (const step of allSteps) {
|
|
2551
2587
|
const entry = stepTypeBreakdown[step.type] ?? {
|
|
@@ -2563,7 +2599,7 @@ function buildLLMTrace(messages, totalDurationMs, model, provider) {
|
|
|
2563
2599
|
const modelUsed = allSteps[0]?.model || model;
|
|
2564
2600
|
const summary = {
|
|
2565
2601
|
totalSteps: allSteps.length,
|
|
2566
|
-
totalTurns:
|
|
2602
|
+
totalTurns: turns.length,
|
|
2567
2603
|
totalDurationMs,
|
|
2568
2604
|
totalTokens,
|
|
2569
2605
|
totalCostUsd: totalCost,
|
|
@@ -2584,116 +2620,100 @@ function buildLLMTrace(messages, totalDurationMs, model, provider) {
|
|
|
2584
2620
|
summary
|
|
2585
2621
|
};
|
|
2586
2622
|
}
|
|
2587
|
-
function buildTotalTokens(assistantMessages) {
|
|
2588
|
-
let prompt = 0;
|
|
2589
|
-
let completion = 0;
|
|
2590
|
-
for (const { info } of assistantMessages) {
|
|
2591
|
-
prompt += info.tokens.input;
|
|
2592
|
-
completion += info.tokens.output;
|
|
2593
|
-
}
|
|
2594
|
-
return { prompt, completion, total: prompt + completion };
|
|
2595
|
-
}
|
|
2596
2623
|
|
|
2597
2624
|
// src/run-scenario/agents/opencode/build-conversation.ts
|
|
2598
|
-
function buildConversation2(
|
|
2625
|
+
function buildConversation2(timestampedEvents) {
|
|
2599
2626
|
const result = [];
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
break;
|
|
2625
|
-
}
|
|
2626
|
-
}
|
|
2627
|
+
let assistantContent = [];
|
|
2628
|
+
let userContent = [];
|
|
2629
|
+
let latestReceivedAt = 0;
|
|
2630
|
+
const flushAssistant = () => {
|
|
2631
|
+
if (assistantContent.length > 0) {
|
|
2632
|
+
const timestamp = latestReceivedAt > 0 ? new Date(latestReceivedAt).toISOString() : (/* @__PURE__ */ new Date()).toISOString();
|
|
2633
|
+
result.push({ role: "assistant", content: assistantContent, timestamp });
|
|
2634
|
+
assistantContent = [];
|
|
2635
|
+
}
|
|
2636
|
+
};
|
|
2637
|
+
const flushUser = () => {
|
|
2638
|
+
if (userContent.length > 0) {
|
|
2639
|
+
const timestamp = latestReceivedAt > 0 ? new Date(latestReceivedAt).toISOString() : (/* @__PURE__ */ new Date()).toISOString();
|
|
2640
|
+
result.push({ role: "user", content: userContent, timestamp });
|
|
2641
|
+
userContent = [];
|
|
2642
|
+
}
|
|
2643
|
+
};
|
|
2644
|
+
for (const { event: evt, receivedAt } of timestampedEvents) {
|
|
2645
|
+
latestReceivedAt = receivedAt;
|
|
2646
|
+
switch (evt.type) {
|
|
2647
|
+
case "text": {
|
|
2648
|
+
const te = evt;
|
|
2649
|
+
assistantContent.push({ type: "text", text: te.part.text });
|
|
2650
|
+
break;
|
|
2627
2651
|
}
|
|
2628
|
-
|
|
2629
|
-
|
|
2652
|
+
case "reasoning": {
|
|
2653
|
+
const re = evt;
|
|
2654
|
+
assistantContent.push({ type: "thinking", thinking: re.part.text });
|
|
2655
|
+
break;
|
|
2630
2656
|
}
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
type: "tool_result",
|
|
2651
|
-
toolUseId: toolPart.callID,
|
|
2652
|
-
content: errState.error,
|
|
2653
|
-
isError: true
|
|
2654
|
-
});
|
|
2655
|
-
}
|
|
2657
|
+
case "tool_use": {
|
|
2658
|
+
const tu = evt;
|
|
2659
|
+
assistantContent.push({
|
|
2660
|
+
type: "tool_use",
|
|
2661
|
+
toolName: tu.part.tool,
|
|
2662
|
+
toolId: tu.part.callID,
|
|
2663
|
+
input: tu.part.state.input
|
|
2664
|
+
});
|
|
2665
|
+
if (tu.part.state.status === "completed" || tu.part.state.status === "error") {
|
|
2666
|
+
flushAssistant();
|
|
2667
|
+
const isError = tu.part.state.status === "error";
|
|
2668
|
+
const content = isError ? tu.part.state.error || "Tool execution failed" : tu.part.state.output || "";
|
|
2669
|
+
userContent.push({
|
|
2670
|
+
type: "tool_result",
|
|
2671
|
+
toolUseId: tu.part.callID,
|
|
2672
|
+
content,
|
|
2673
|
+
...isError ? { isError: true } : {}
|
|
2674
|
+
});
|
|
2675
|
+
flushUser();
|
|
2656
2676
|
}
|
|
2677
|
+
break;
|
|
2657
2678
|
}
|
|
2658
|
-
|
|
2659
|
-
|
|
2679
|
+
case "step_finish": {
|
|
2680
|
+
flushAssistant();
|
|
2681
|
+
flushUser();
|
|
2682
|
+
break;
|
|
2660
2683
|
}
|
|
2661
2684
|
}
|
|
2662
2685
|
}
|
|
2686
|
+
flushAssistant();
|
|
2687
|
+
flushUser();
|
|
2663
2688
|
return result;
|
|
2664
2689
|
}
|
|
2665
2690
|
|
|
2666
2691
|
// src/run-scenario/agents/opencode/execute.ts
|
|
2667
|
-
var
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
process.env.PATH = `${opencodeBin}:${currentPath}`;
|
|
2673
|
-
}
|
|
2674
|
-
}
|
|
2692
|
+
var import_promises9 = require("fs/promises");
|
|
2693
|
+
var import_path10 = require("path");
|
|
2694
|
+
var KILL_GRACE_PERIOD_MS = 5e3;
|
|
2695
|
+
var IDLE_TIMEOUT_MS = 12e4;
|
|
2696
|
+
var IDLE_CHECK_INTERVAL_MS = 15e3;
|
|
2675
2697
|
function extractToolAction(toolName, args) {
|
|
2676
2698
|
if (!toolName) return "Using tool...";
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
50
|
|
2690
|
-
);
|
|
2699
|
+
if ((toolName === "Task" || toolName === "dispatch_agent") && args?.description) {
|
|
2700
|
+
const desc = String(args.description).slice(0, 55);
|
|
2701
|
+
return `Task: ${desc}${String(args.description).length > 55 ? "..." : ""}`;
|
|
2702
|
+
}
|
|
2703
|
+
if ((toolName === "Bash" || toolName === "bash" || toolName === "execute") && args?.command) {
|
|
2704
|
+
const cmd = String(args.command).slice(0, 50);
|
|
2705
|
+
return `Running: ${cmd}${String(args.command).length > 50 ? "..." : ""}`;
|
|
2706
|
+
}
|
|
2707
|
+
if (args?.file_path || args?.path || args?.target_file) {
|
|
2708
|
+
const filePath = String(
|
|
2709
|
+
args.file_path || args.path || args.target_file
|
|
2710
|
+
).slice(0, 50);
|
|
2691
2711
|
if (/write|edit/i.test(toolName)) return `Writing: ${filePath}`;
|
|
2692
2712
|
if (/read|view/i.test(toolName)) return `Reading: ${filePath}`;
|
|
2693
2713
|
}
|
|
2694
2714
|
return `Using ${toolName}...`;
|
|
2695
2715
|
}
|
|
2696
|
-
function
|
|
2716
|
+
function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
|
|
2697
2717
|
const base = {
|
|
2698
2718
|
evalRunId: context.evalRunId,
|
|
2699
2719
|
scenarioId: context.scenarioId,
|
|
@@ -2704,42 +2724,41 @@ function createTraceEventFromPart(part, context, stepNumber, isComplete) {
|
|
|
2704
2724
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2705
2725
|
isComplete
|
|
2706
2726
|
};
|
|
2707
|
-
switch (
|
|
2727
|
+
switch (evt.type) {
|
|
2708
2728
|
case "text": {
|
|
2709
|
-
const
|
|
2729
|
+
const te = evt;
|
|
2710
2730
|
return {
|
|
2711
2731
|
...base,
|
|
2712
2732
|
type: import_evalforge_types8.LiveTraceEventType.COMPLETION,
|
|
2713
|
-
outputPreview:
|
|
2733
|
+
outputPreview: te.part.text.slice(0, 500)
|
|
2714
2734
|
};
|
|
2715
2735
|
}
|
|
2716
|
-
case "reasoning":
|
|
2717
|
-
const reasoningPart = part;
|
|
2736
|
+
case "reasoning":
|
|
2718
2737
|
return {
|
|
2719
2738
|
...base,
|
|
2720
2739
|
type: import_evalforge_types8.LiveTraceEventType.THINKING,
|
|
2721
|
-
thinking:
|
|
2740
|
+
thinking: evt.part.text.slice(0, 500)
|
|
2722
2741
|
};
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
const
|
|
2726
|
-
const
|
|
2727
|
-
const args = toolPart.state.input;
|
|
2742
|
+
case "tool_use": {
|
|
2743
|
+
const tu = evt;
|
|
2744
|
+
const toolName = tu.part.tool;
|
|
2745
|
+
const args = tu.part.state.input;
|
|
2728
2746
|
const toolArgs = JSON.stringify(args).slice(0, 500);
|
|
2729
2747
|
let type = import_evalforge_types8.LiveTraceEventType.TOOL_USE;
|
|
2730
2748
|
let filePath;
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2749
|
+
if (args) {
|
|
2750
|
+
if (args.file_path || args.path || args.target_file) {
|
|
2751
|
+
filePath = String(args.file_path || args.path || args.target_file);
|
|
2752
|
+
if (/write|edit/i.test(toolName)) {
|
|
2753
|
+
type = import_evalforge_types8.LiveTraceEventType.FILE_WRITE;
|
|
2754
|
+
} else if (/read|view/i.test(toolName)) {
|
|
2755
|
+
type = import_evalforge_types8.LiveTraceEventType.FILE_READ;
|
|
2756
|
+
}
|
|
2738
2757
|
}
|
|
2739
2758
|
}
|
|
2740
2759
|
return { ...base, type, toolName, toolArgs, filePath };
|
|
2741
2760
|
}
|
|
2742
|
-
case "
|
|
2761
|
+
case "step_finish":
|
|
2743
2762
|
return {
|
|
2744
2763
|
...base,
|
|
2745
2764
|
type: import_evalforge_types8.LiveTraceEventType.PROGRESS,
|
|
@@ -2769,6 +2788,37 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
|
|
|
2769
2788
|
);
|
|
2770
2789
|
}
|
|
2771
2790
|
}
|
|
2791
|
+
async function writeSystemPromptRule(cwd, systemPrompt) {
|
|
2792
|
+
const rulesDir = (0, import_path10.join)(cwd, ".opencode", "rules");
|
|
2793
|
+
await (0, import_promises9.mkdir)(rulesDir, { recursive: true });
|
|
2794
|
+
await (0, import_promises9.writeFile)(
|
|
2795
|
+
(0, import_path10.join)(rulesDir, "evalforge-system-prompt.md"),
|
|
2796
|
+
systemPrompt,
|
|
2797
|
+
"utf-8"
|
|
2798
|
+
);
|
|
2799
|
+
}
|
|
2800
|
+
function killProcess(child, resolved) {
|
|
2801
|
+
if (!child) return;
|
|
2802
|
+
const killSignal = (signal) => {
|
|
2803
|
+
if (child.pid) {
|
|
2804
|
+
try {
|
|
2805
|
+
process.kill(-child.pid, signal);
|
|
2806
|
+
console.log(
|
|
2807
|
+
`[OpenCode] Sent ${signal} to process group (pid: -${child.pid})`
|
|
2808
|
+
);
|
|
2809
|
+
return;
|
|
2810
|
+
} catch {
|
|
2811
|
+
}
|
|
2812
|
+
}
|
|
2813
|
+
child.kill(signal);
|
|
2814
|
+
};
|
|
2815
|
+
killSignal("SIGTERM");
|
|
2816
|
+
setTimeout(() => {
|
|
2817
|
+
if (child && !resolved) {
|
|
2818
|
+
killSignal("SIGKILL");
|
|
2819
|
+
}
|
|
2820
|
+
}, KILL_GRACE_PERIOD_MS);
|
|
2821
|
+
}
|
|
2772
2822
|
async function executeWithOpenCode(skills, scenario, options) {
|
|
2773
2823
|
const skillNames = skills.map((s) => s.name).join(", ");
|
|
2774
2824
|
console.log("[executeWithOpenCode] Starting execution", {
|
|
@@ -2783,7 +2833,8 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
2783
2833
|
});
|
|
2784
2834
|
const startTime = /* @__PURE__ */ new Date();
|
|
2785
2835
|
const maxTurns = options.maxTurns ?? 10;
|
|
2786
|
-
const
|
|
2836
|
+
const SDK_TIMEOUT_MS = Math.max(3e5, maxTurns * 6e4);
|
|
2837
|
+
const { env, providerID, modelID } = await buildOpenCodeEnv({
|
|
2787
2838
|
model: options.model,
|
|
2788
2839
|
temperature: options.temperature,
|
|
2789
2840
|
maxTurns,
|
|
@@ -2792,12 +2843,6 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
2792
2843
|
mcps: options.mcps,
|
|
2793
2844
|
cwd: options.cwd
|
|
2794
2845
|
});
|
|
2795
|
-
const { createOpencodeServer, createOpencodeClient } = await import("@opencode-ai/sdk");
|
|
2796
|
-
const SDK_TIMEOUT_MS = Math.max(3e5, maxTurns * 6e4);
|
|
2797
|
-
const abortController = new AbortController();
|
|
2798
|
-
let timeoutHandle;
|
|
2799
|
-
let heartbeatHandle;
|
|
2800
|
-
let timedOut = false;
|
|
2801
2846
|
const traceContext = options.traceContext;
|
|
2802
2847
|
let traceStepNumber = 0;
|
|
2803
2848
|
let lastAction = "Starting...";
|
|
@@ -2814,7 +2859,7 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
2814
2859
|
stepNumber: 0,
|
|
2815
2860
|
type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
|
|
2816
2861
|
outputPreview: JSON.stringify({
|
|
2817
|
-
event: "pre-
|
|
2862
|
+
event: "pre-cli-execution",
|
|
2818
2863
|
model: `${providerID}/${modelID}`,
|
|
2819
2864
|
maxTurns,
|
|
2820
2865
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
@@ -2827,105 +2872,200 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
2827
2872
|
traceContext.authToken
|
|
2828
2873
|
);
|
|
2829
2874
|
}
|
|
2830
|
-
let
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2875
|
+
let systemPrompt;
|
|
2876
|
+
if (options.systemPrompt === null || options.systemPrompt === "") {
|
|
2877
|
+
} else if (options.systemPrompt != null) {
|
|
2878
|
+
systemPrompt = options.systemPrompt;
|
|
2879
|
+
} else {
|
|
2880
|
+
systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
|
|
2881
|
+
}
|
|
2882
|
+
if (systemPrompt) {
|
|
2883
|
+
await writeSystemPromptRule(options.cwd, systemPrompt);
|
|
2884
|
+
}
|
|
2885
|
+
const args = [
|
|
2886
|
+
"run",
|
|
2887
|
+
"--format",
|
|
2888
|
+
"json",
|
|
2889
|
+
"--thinking",
|
|
2890
|
+
"--variant",
|
|
2891
|
+
"high",
|
|
2892
|
+
"--model",
|
|
2893
|
+
`${providerID}/${modelID}`,
|
|
2894
|
+
"--dir",
|
|
2895
|
+
options.cwd,
|
|
2896
|
+
// NOTE: Trigger prompt is passed as a positional CLI arg. On Linux a single
|
|
2897
|
+
// arg is capped at 128 KB (MAX_ARG_STRLEN); on macOS the combined args+env
|
|
2898
|
+
// share a ~1 MB limit. Prompts exceeding this would fail with E2BIG.
|
|
2899
|
+
// In practice eval prompts are well under this limit.
|
|
2900
|
+
scenario.triggerPrompt
|
|
2901
|
+
];
|
|
2902
|
+
console.log("[executeWithOpenCode] Spawning: opencode", args.slice(0, 5));
|
|
2903
|
+
return new Promise((resolve2, reject) => {
|
|
2904
|
+
let resolved = false;
|
|
2905
|
+
let stderr = "";
|
|
2906
|
+
let lineBuffer = "";
|
|
2907
|
+
let lastOutputTime = Date.now();
|
|
2908
|
+
const allEvents = [];
|
|
2909
|
+
const timers = {};
|
|
2910
|
+
const cleanup = () => {
|
|
2911
|
+
if (timers.timeout) clearTimeout(timers.timeout);
|
|
2912
|
+
if (timers.idleCheck) clearInterval(timers.idleCheck);
|
|
2913
|
+
if (timers.heartbeat) clearInterval(timers.heartbeat);
|
|
2914
|
+
};
|
|
2915
|
+
const finalize = (success, error) => {
|
|
2916
|
+
if (resolved) return;
|
|
2917
|
+
resolved = true;
|
|
2918
|
+
cleanup();
|
|
2919
|
+
if (!success) {
|
|
2920
|
+
if (traceContext) {
|
|
2921
|
+
emitTraceEvent(
|
|
2922
|
+
{
|
|
2923
|
+
evalRunId: traceContext.evalRunId,
|
|
2924
|
+
scenarioId: traceContext.scenarioId,
|
|
2925
|
+
scenarioName: traceContext.scenarioName,
|
|
2926
|
+
targetId: traceContext.targetId,
|
|
2927
|
+
targetName: traceContext.targetName,
|
|
2928
|
+
stepNumber: traceStepNumber + 1,
|
|
2929
|
+
type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
|
|
2930
|
+
outputPreview: JSON.stringify({
|
|
2931
|
+
event: "cli-execution-failed",
|
|
2932
|
+
error: error?.message ?? "Unknown error"
|
|
2933
|
+
}).slice(0, 2e3),
|
|
2934
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2935
|
+
isComplete: true
|
|
2936
|
+
},
|
|
2937
|
+
traceContext.tracePushUrl,
|
|
2938
|
+
traceContext.routeHeader,
|
|
2939
|
+
traceContext.authToken
|
|
2940
|
+
);
|
|
2941
|
+
}
|
|
2942
|
+
reject(
|
|
2943
|
+
error ?? new Error(
|
|
2944
|
+
`OpenCode CLI execution failed (exit code unknown).
|
|
2945
|
+
Stderr: ${stderr.slice(0, 1e3)}`
|
|
2946
|
+
)
|
|
2947
|
+
);
|
|
2948
|
+
return;
|
|
2949
|
+
}
|
|
2950
|
+
const endTime = /* @__PURE__ */ new Date();
|
|
2951
|
+
const totalDurationMs = endTime.getTime() - startTime.getTime();
|
|
2952
|
+
let outputText = "";
|
|
2953
|
+
for (const { event: evt } of allEvents) {
|
|
2954
|
+
if (evt.type === "text") {
|
|
2955
|
+
outputText += evt.part.text;
|
|
2956
|
+
}
|
|
2957
|
+
}
|
|
2958
|
+
if (!outputText) {
|
|
2959
|
+
reject(
|
|
2960
|
+
new Error(
|
|
2961
|
+
`Agent produced no text output. Model: ${providerID}/${modelID}, Events: ${allEvents.length}`
|
|
2962
|
+
)
|
|
2963
|
+
);
|
|
2964
|
+
return;
|
|
2965
|
+
}
|
|
2966
|
+
let inputTokens = 0;
|
|
2967
|
+
let outputTokens = 0;
|
|
2968
|
+
let costUsd = 0;
|
|
2969
|
+
for (const { event: evt } of allEvents) {
|
|
2970
|
+
if (evt.type === "step_finish") {
|
|
2971
|
+
const sf = evt;
|
|
2972
|
+
inputTokens += sf.part.tokens.input;
|
|
2973
|
+
outputTokens += sf.part.tokens.output;
|
|
2974
|
+
costUsd += sf.part.cost;
|
|
2975
|
+
}
|
|
2976
|
+
}
|
|
2977
|
+
if (traceContext) {
|
|
2978
|
+
emitTraceEvent(
|
|
2979
|
+
{
|
|
2980
|
+
evalRunId: traceContext.evalRunId,
|
|
2981
|
+
scenarioId: traceContext.scenarioId,
|
|
2982
|
+
scenarioName: traceContext.scenarioName,
|
|
2983
|
+
targetId: traceContext.targetId,
|
|
2984
|
+
targetName: traceContext.targetName,
|
|
2985
|
+
stepNumber: traceStepNumber + 1,
|
|
2986
|
+
type: import_evalforge_types8.LiveTraceEventType.COMPLETION,
|
|
2987
|
+
outputPreview: "Scenario execution completed",
|
|
2988
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2989
|
+
isComplete: true
|
|
2990
|
+
},
|
|
2991
|
+
traceContext.tracePushUrl,
|
|
2992
|
+
traceContext.routeHeader,
|
|
2993
|
+
traceContext.authToken
|
|
2994
|
+
);
|
|
2995
|
+
}
|
|
2996
|
+
const modelStr = options.model || `${providerID}/${modelID}`;
|
|
2997
|
+
const llmTrace = buildLLMTrace(
|
|
2998
|
+
allEvents,
|
|
2999
|
+
totalDurationMs,
|
|
3000
|
+
modelStr,
|
|
3001
|
+
providerID,
|
|
3002
|
+
startTime
|
|
2851
3003
|
);
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
3004
|
+
const conversation = buildConversation2(allEvents);
|
|
3005
|
+
resolve2({
|
|
3006
|
+
result: {
|
|
3007
|
+
outputText,
|
|
3008
|
+
durationMs: totalDurationMs,
|
|
3009
|
+
usage: {
|
|
3010
|
+
inputTokens,
|
|
3011
|
+
outputTokens,
|
|
3012
|
+
totalTokens: inputTokens + outputTokens
|
|
3013
|
+
},
|
|
3014
|
+
costUsd
|
|
3015
|
+
},
|
|
3016
|
+
llmTrace,
|
|
3017
|
+
conversation
|
|
3018
|
+
});
|
|
3019
|
+
};
|
|
3020
|
+
let child;
|
|
3021
|
+
try {
|
|
3022
|
+
child = (0, import_child_process.spawn)("opencode", args, {
|
|
3023
|
+
cwd: options.cwd,
|
|
3024
|
+
env,
|
|
3025
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
3026
|
+
detached: true
|
|
3027
|
+
});
|
|
3028
|
+
} catch (spawnError) {
|
|
3029
|
+
reject(
|
|
3030
|
+
new Error(
|
|
3031
|
+
`Failed to spawn opencode: ${spawnError instanceof Error ? spawnError.message : String(spawnError)}`
|
|
3032
|
+
)
|
|
3033
|
+
);
|
|
3034
|
+
return;
|
|
3035
|
+
}
|
|
3036
|
+
timers.timeout = setTimeout(() => {
|
|
3037
|
+
if (!resolved) {
|
|
3038
|
+
console.error(`[OpenCode] Process timed out after ${SDK_TIMEOUT_MS}ms`);
|
|
3039
|
+
killProcess(child, resolved);
|
|
3040
|
+
finalize(
|
|
3041
|
+
false,
|
|
3042
|
+
new Error(
|
|
3043
|
+
`OpenCode execution timed out after ${SDK_TIMEOUT_MS}ms. Skills: ${skillNames}, Scenario: ${scenario.name}, MaxTurns: ${maxTurns}`
|
|
3044
|
+
)
|
|
3045
|
+
);
|
|
3046
|
+
}
|
|
3047
|
+
}, SDK_TIMEOUT_MS);
|
|
3048
|
+
timers.idleCheck = setInterval(() => {
|
|
3049
|
+
if (resolved) return;
|
|
3050
|
+
const idleTime = Date.now() - lastOutputTime;
|
|
3051
|
+
if (idleTime >= IDLE_TIMEOUT_MS) {
|
|
3052
|
+
console.warn(
|
|
3053
|
+
`[OpenCode] Process appears stuck - no output for ${Math.round(idleTime / 1e3)}s. Killing process.`
|
|
3054
|
+
);
|
|
3055
|
+
killProcess(child, resolved);
|
|
3056
|
+
finalize(
|
|
3057
|
+
false,
|
|
3058
|
+
new Error(
|
|
3059
|
+
`OpenCode process stuck - no output for ${Math.round(idleTime / 1e3)} seconds (idle timeout). Skills: ${skillNames}, Scenario: ${scenario.name}`
|
|
3060
|
+
)
|
|
3061
|
+
);
|
|
3062
|
+
}
|
|
3063
|
+
}, IDLE_CHECK_INTERVAL_MS);
|
|
2856
3064
|
if (traceContext) {
|
|
2857
|
-
eventStreamAbort = new AbortController();
|
|
2858
3065
|
const executionStartTime = Date.now();
|
|
2859
|
-
(async () => {
|
|
2860
|
-
try {
|
|
2861
|
-
const events = await client.event.subscribe();
|
|
2862
|
-
for await (const event of events.stream) {
|
|
2863
|
-
if (eventStreamAbort.signal.aborted) break;
|
|
2864
|
-
const evt = event;
|
|
2865
|
-
if (evt.type === "message.part.updated") {
|
|
2866
|
-
const { part } = evt.properties;
|
|
2867
|
-
traceStepNumber++;
|
|
2868
|
-
const traceEvent = createTraceEventFromPart(
|
|
2869
|
-
part,
|
|
2870
|
-
traceContext,
|
|
2871
|
-
traceStepNumber,
|
|
2872
|
-
false
|
|
2873
|
-
);
|
|
2874
|
-
if (traceEvent) {
|
|
2875
|
-
lastToolName = traceEvent.toolName;
|
|
2876
|
-
lastFilePath = traceEvent.filePath;
|
|
2877
|
-
if (traceEvent.type === import_evalforge_types8.LiveTraceEventType.THINKING) {
|
|
2878
|
-
lastAction = "Thinking...";
|
|
2879
|
-
} else if (traceEvent.type === import_evalforge_types8.LiveTraceEventType.TOOL_USE) {
|
|
2880
|
-
lastAction = extractToolAction(
|
|
2881
|
-
traceEvent.toolName ?? "",
|
|
2882
|
-
void 0
|
|
2883
|
-
);
|
|
2884
|
-
} else if (traceEvent.type === import_evalforge_types8.LiveTraceEventType.FILE_WRITE) {
|
|
2885
|
-
lastAction = `Writing: ${traceEvent.filePath || "file"}`;
|
|
2886
|
-
} else if (traceEvent.type === import_evalforge_types8.LiveTraceEventType.FILE_READ) {
|
|
2887
|
-
lastAction = `Reading: ${traceEvent.filePath || "file"}`;
|
|
2888
|
-
} else if (traceEvent.type === import_evalforge_types8.LiveTraceEventType.COMPLETION) {
|
|
2889
|
-
lastAction = "Processing response...";
|
|
2890
|
-
}
|
|
2891
|
-
emitTraceEvent(
|
|
2892
|
-
traceEvent,
|
|
2893
|
-
traceContext.tracePushUrl,
|
|
2894
|
-
traceContext.routeHeader,
|
|
2895
|
-
traceContext.authToken
|
|
2896
|
-
);
|
|
2897
|
-
}
|
|
2898
|
-
} else if (evt.type === "session.error") {
|
|
2899
|
-
const props = evt.properties;
|
|
2900
|
-
traceStepNumber++;
|
|
2901
|
-
emitTraceEvent(
|
|
2902
|
-
{
|
|
2903
|
-
evalRunId: traceContext.evalRunId,
|
|
2904
|
-
scenarioId: traceContext.scenarioId,
|
|
2905
|
-
scenarioName: traceContext.scenarioName,
|
|
2906
|
-
targetId: traceContext.targetId,
|
|
2907
|
-
targetName: traceContext.targetName,
|
|
2908
|
-
stepNumber: traceStepNumber,
|
|
2909
|
-
type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
|
|
2910
|
-
outputPreview: `Session error: ${JSON.stringify(props.error)}`.slice(
|
|
2911
|
-
0,
|
|
2912
|
-
500
|
|
2913
|
-
),
|
|
2914
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2915
|
-
isComplete: false
|
|
2916
|
-
},
|
|
2917
|
-
traceContext.tracePushUrl,
|
|
2918
|
-
traceContext.routeHeader,
|
|
2919
|
-
traceContext.authToken
|
|
2920
|
-
);
|
|
2921
|
-
}
|
|
2922
|
-
}
|
|
2923
|
-
} catch {
|
|
2924
|
-
}
|
|
2925
|
-
})();
|
|
2926
3066
|
let lastReportedAction = "";
|
|
2927
3067
|
let sameActionCount = 0;
|
|
2928
|
-
|
|
3068
|
+
timers.heartbeat = setInterval(() => {
|
|
2929
3069
|
const elapsedMs = Date.now() - executionStartTime;
|
|
2930
3070
|
let progressMessage = lastAction;
|
|
2931
3071
|
if (lastAction === lastReportedAction) {
|
|
@@ -2966,212 +3106,83 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
2966
3106
|
);
|
|
2967
3107
|
}, 1e4);
|
|
2968
3108
|
}
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
if ("error" in promptResult && promptResult.error) {
|
|
3005
|
-
const errPayload = promptResult.error;
|
|
3006
|
-
throw new Error(
|
|
3007
|
-
`Agent prompt failed: ${errPayload.name ?? "UnknownError"} - ${JSON.stringify(errPayload.data ?? errPayload)}`
|
|
3008
|
-
);
|
|
3009
|
-
}
|
|
3010
|
-
console.log("[executeWithOpenCode] Prompt completed, fetching messages...");
|
|
3011
|
-
const messagesResponse = await client.session.messages({
|
|
3012
|
-
path: { id: sessionId }
|
|
3013
|
-
});
|
|
3014
|
-
const allMessages = messagesResponse.data ?? [];
|
|
3015
|
-
console.log(
|
|
3016
|
-
`[executeWithOpenCode] Got ${allMessages.length} message(s) from history`
|
|
3017
|
-
);
|
|
3018
|
-
if (traceContext) {
|
|
3019
|
-
emitTraceEvent(
|
|
3020
|
-
{
|
|
3021
|
-
evalRunId: traceContext.evalRunId,
|
|
3022
|
-
scenarioId: traceContext.scenarioId,
|
|
3023
|
-
scenarioName: traceContext.scenarioName,
|
|
3024
|
-
targetId: traceContext.targetId,
|
|
3025
|
-
targetName: traceContext.targetName,
|
|
3026
|
-
stepNumber: traceStepNumber + 1,
|
|
3027
|
-
type: import_evalforge_types8.LiveTraceEventType.COMPLETION,
|
|
3028
|
-
outputPreview: "Scenario execution completed",
|
|
3029
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3030
|
-
isComplete: true
|
|
3031
|
-
},
|
|
3032
|
-
traceContext.tracePushUrl,
|
|
3033
|
-
traceContext.routeHeader,
|
|
3034
|
-
traceContext.authToken
|
|
3035
|
-
);
|
|
3036
|
-
}
|
|
3037
|
-
const endTime = /* @__PURE__ */ new Date();
|
|
3038
|
-
const totalDurationMs = endTime.getTime() - startTime.getTime();
|
|
3039
|
-
const resultData = promptResult.data;
|
|
3040
|
-
const lastAssistantInfo = resultData?.info;
|
|
3041
|
-
if (lastAssistantInfo?.error) {
|
|
3042
|
-
const err = lastAssistantInfo.error;
|
|
3043
|
-
throw new Error(
|
|
3044
|
-
`Agent execution failed: ${err.name} - ${JSON.stringify(err.data)}`
|
|
3045
|
-
);
|
|
3046
|
-
}
|
|
3047
|
-
let outputText = "";
|
|
3048
|
-
if (resultData?.parts) {
|
|
3049
|
-
for (const part of resultData.parts) {
|
|
3050
|
-
if (part.type === "text") {
|
|
3051
|
-
outputText += part.text;
|
|
3052
|
-
}
|
|
3053
|
-
}
|
|
3054
|
-
}
|
|
3055
|
-
if (!outputText && allMessages.length > 0) {
|
|
3056
|
-
for (let i = allMessages.length - 1; i >= 0; i--) {
|
|
3057
|
-
const msg = allMessages[i];
|
|
3058
|
-
if (msg.info.role === "assistant") {
|
|
3059
|
-
const assistantInfo = msg.info;
|
|
3060
|
-
if (assistantInfo.error) {
|
|
3061
|
-
throw new Error(
|
|
3062
|
-
`Agent execution failed: ${assistantInfo.error.name} - ${JSON.stringify(assistantInfo.error.data)}`
|
|
3063
|
-
);
|
|
3064
|
-
}
|
|
3065
|
-
for (const part of msg.parts) {
|
|
3066
|
-
if (part.type === "text") {
|
|
3067
|
-
outputText += part.text;
|
|
3109
|
+
child.stdout?.on("data", (data) => {
|
|
3110
|
+
const text = data.toString();
|
|
3111
|
+
lastOutputTime = Date.now();
|
|
3112
|
+
lineBuffer += text;
|
|
3113
|
+
const lines = lineBuffer.split("\n");
|
|
3114
|
+
lineBuffer = lines.pop() || "";
|
|
3115
|
+
for (const line of lines) {
|
|
3116
|
+
if (!line.trim()) continue;
|
|
3117
|
+
const evt = tryParseJson(line);
|
|
3118
|
+
if (!evt || !evt.type) continue;
|
|
3119
|
+
allEvents.push({ event: evt, receivedAt: Date.now() });
|
|
3120
|
+
if (traceContext) {
|
|
3121
|
+
traceStepNumber++;
|
|
3122
|
+
const traceEvt = createTraceEventFromNdjson(
|
|
3123
|
+
evt,
|
|
3124
|
+
traceContext,
|
|
3125
|
+
traceStepNumber,
|
|
3126
|
+
false
|
|
3127
|
+
);
|
|
3128
|
+
if (traceEvt) {
|
|
3129
|
+
lastToolName = traceEvt.toolName;
|
|
3130
|
+
lastFilePath = traceEvt.filePath;
|
|
3131
|
+
if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.THINKING) {
|
|
3132
|
+
lastAction = "Thinking...";
|
|
3133
|
+
} else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.TOOL_USE) {
|
|
3134
|
+
lastAction = extractToolAction(
|
|
3135
|
+
traceEvt.toolName ?? "",
|
|
3136
|
+
void 0
|
|
3137
|
+
);
|
|
3138
|
+
} else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.FILE_WRITE) {
|
|
3139
|
+
lastAction = `Writing: ${traceEvt.filePath || "file"}`;
|
|
3140
|
+
} else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.FILE_READ) {
|
|
3141
|
+
lastAction = `Reading: ${traceEvt.filePath || "file"}`;
|
|
3142
|
+
} else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.COMPLETION) {
|
|
3143
|
+
lastAction = "Processing response...";
|
|
3068
3144
|
}
|
|
3145
|
+
emitTraceEvent(
|
|
3146
|
+
traceEvt,
|
|
3147
|
+
traceContext.tracePushUrl,
|
|
3148
|
+
traceContext.routeHeader,
|
|
3149
|
+
traceContext.authToken
|
|
3150
|
+
);
|
|
3069
3151
|
}
|
|
3070
|
-
if (outputText) break;
|
|
3071
3152
|
}
|
|
3072
3153
|
}
|
|
3073
|
-
}
|
|
3074
|
-
|
|
3075
|
-
const
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
|
|
3084
|
-
|
|
3085
|
-
totalTokens: lastAssistantInfo.tokens.input + lastAssistantInfo.tokens.output
|
|
3086
|
-
} : { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
3087
|
-
const costUsd = lastAssistantInfo?.cost;
|
|
3088
|
-
const modelStr = options.model || DEFAULT_MODEL3;
|
|
3089
|
-
const llmTrace = buildLLMTrace(
|
|
3090
|
-
allMessages,
|
|
3091
|
-
totalDurationMs,
|
|
3092
|
-
modelStr,
|
|
3093
|
-
providerID
|
|
3094
|
-
);
|
|
3095
|
-
const conversation = buildConversation2(allMessages);
|
|
3096
|
-
return {
|
|
3097
|
-
result: {
|
|
3098
|
-
outputText,
|
|
3099
|
-
durationMs: totalDurationMs,
|
|
3100
|
-
usage,
|
|
3101
|
-
costUsd
|
|
3102
|
-
},
|
|
3103
|
-
llmTrace,
|
|
3104
|
-
conversation
|
|
3105
|
-
};
|
|
3106
|
-
} catch (sdkError) {
|
|
3107
|
-
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
3108
|
-
if (heartbeatHandle) clearInterval(heartbeatHandle);
|
|
3109
|
-
if (timedOut) {
|
|
3110
|
-
console.error("[SDK-TIMEOUT] Execution timed out:", sdkError);
|
|
3111
|
-
}
|
|
3112
|
-
const errorMessage = sdkError instanceof Error ? sdkError.message : String(sdkError);
|
|
3113
|
-
const errorStack = sdkError instanceof Error ? sdkError.stack : void 0;
|
|
3114
|
-
const errorName = sdkError instanceof Error ? sdkError.name : "Unknown";
|
|
3115
|
-
const causeDetails = [];
|
|
3116
|
-
let current = sdkError;
|
|
3117
|
-
while (current instanceof Error && current.cause) {
|
|
3118
|
-
current = current.cause;
|
|
3119
|
-
if (current instanceof Error) {
|
|
3120
|
-
causeDetails.push(`${current.name}: ${current.message}`);
|
|
3121
|
-
} else {
|
|
3122
|
-
causeDetails.push(String(current));
|
|
3154
|
+
});
|
|
3155
|
+
child.stderr?.on("data", (data) => {
|
|
3156
|
+
const text = data.toString();
|
|
3157
|
+
stderr += text;
|
|
3158
|
+
lastOutputTime = Date.now();
|
|
3159
|
+
});
|
|
3160
|
+
child.on("close", (code) => {
|
|
3161
|
+
if (lineBuffer.trim()) {
|
|
3162
|
+
const evt = tryParseJson(lineBuffer);
|
|
3163
|
+
if (evt && evt.type) {
|
|
3164
|
+
allEvents.push({ event: evt, receivedAt: Date.now() });
|
|
3165
|
+
}
|
|
3123
3166
|
}
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
Cause chain: ${causeDetails.join(" -> ")}` : "";
|
|
3127
|
-
console.error("[SDK-ERROR] ====== OPENCODE SDK EXECUTION FAILED ======");
|
|
3128
|
-
console.error("[SDK-ERROR] Error name:", errorName);
|
|
3129
|
-
console.error("[SDK-ERROR] Error message:", errorMessage);
|
|
3130
|
-
if (causeDetails.length > 0) {
|
|
3131
|
-
console.error("[SDK-ERROR] Cause chain:", causeDetails.join(" -> "));
|
|
3132
|
-
}
|
|
3133
|
-
if (errorStack) {
|
|
3134
|
-
console.error("[SDK-ERROR] Stack:", errorStack);
|
|
3135
|
-
}
|
|
3136
|
-
if (traceContext) {
|
|
3137
|
-
emitTraceEvent(
|
|
3138
|
-
{
|
|
3139
|
-
evalRunId: traceContext.evalRunId,
|
|
3140
|
-
scenarioId: traceContext.scenarioId,
|
|
3141
|
-
scenarioName: traceContext.scenarioName,
|
|
3142
|
-
targetId: traceContext.targetId,
|
|
3143
|
-
targetName: traceContext.targetName,
|
|
3144
|
-
stepNumber: traceStepNumber + 1,
|
|
3145
|
-
type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
|
|
3146
|
-
outputPreview: JSON.stringify({
|
|
3147
|
-
event: "sdk-execution-failed",
|
|
3148
|
-
error: errorMessage,
|
|
3149
|
-
errorName,
|
|
3150
|
-
...causeDetails.length > 0 && {
|
|
3151
|
-
causeChain: causeDetails.join(" -> ")
|
|
3152
|
-
}
|
|
3153
|
-
}).slice(0, 2e3),
|
|
3154
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3155
|
-
isComplete: true
|
|
3156
|
-
},
|
|
3157
|
-
traceContext.tracePushUrl,
|
|
3158
|
-
traceContext.routeHeader,
|
|
3159
|
-
traceContext.authToken
|
|
3167
|
+
console.log(
|
|
3168
|
+
`[executeWithOpenCode] Process exited with code ${code}, ${allEvents.length} events collected`
|
|
3160
3169
|
);
|
|
3161
|
-
|
|
3162
|
-
|
|
3163
|
-
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
|
|
3170
|
-
|
|
3171
|
-
} catch {
|
|
3170
|
+
if (code === 0) {
|
|
3171
|
+
finalize(true);
|
|
3172
|
+
} else {
|
|
3173
|
+
finalize(
|
|
3174
|
+
false,
|
|
3175
|
+
new Error(
|
|
3176
|
+
`OpenCode CLI exited with code ${code}.
|
|
3177
|
+
Stderr: ${stderr.slice(0, 1e3)}`
|
|
3178
|
+
)
|
|
3179
|
+
);
|
|
3172
3180
|
}
|
|
3173
|
-
}
|
|
3174
|
-
|
|
3181
|
+
});
|
|
3182
|
+
child.on("error", (error) => {
|
|
3183
|
+
finalize(false, new Error(`OpenCode CLI spawn error: ${error.message}`));
|
|
3184
|
+
});
|
|
3185
|
+
});
|
|
3175
3186
|
}
|
|
3176
3187
|
|
|
3177
3188
|
// src/run-scenario/agents/opencode/opencode-adapter.ts
|
|
@@ -3394,7 +3405,7 @@ function calculateStepCost(step, modelId, provider, tokenUsage) {
|
|
|
3394
3405
|
}
|
|
3395
3406
|
|
|
3396
3407
|
// src/run-scenario/agents/simple-agent/build-conversation.ts
|
|
3397
|
-
function buildConversation3(triggerPrompt, steps, executionStartMs) {
|
|
3408
|
+
function buildConversation3(triggerPrompt, steps, executionStartMs, stepTimestamps) {
|
|
3398
3409
|
const messages = [];
|
|
3399
3410
|
messages.push({
|
|
3400
3411
|
role: "user",
|
|
@@ -3403,11 +3414,9 @@ function buildConversation3(triggerPrompt, steps, executionStartMs) {
|
|
|
3403
3414
|
});
|
|
3404
3415
|
for (let i = 0; i < steps.length; i++) {
|
|
3405
3416
|
const step = steps[i];
|
|
3406
|
-
const stepTimestamp =
|
|
3407
|
-
executionStartMs
|
|
3408
|
-
|
|
3409
|
-
steps.length
|
|
3410
|
-
);
|
|
3417
|
+
const stepTimestamp = new Date(
|
|
3418
|
+
stepTimestamps[i] ?? executionStartMs
|
|
3419
|
+
).toISOString();
|
|
3411
3420
|
const assistantContent = [];
|
|
3412
3421
|
if (step.reasoningText) {
|
|
3413
3422
|
assistantContent.push({ type: "thinking", thinking: step.reasoningText });
|
|
@@ -3450,10 +3459,6 @@ function buildConversation3(triggerPrompt, steps, executionStartMs) {
|
|
|
3450
3459
|
}
|
|
3451
3460
|
return messages;
|
|
3452
3461
|
}
|
|
3453
|
-
function estimateStepTimestamp(startMs, stepIndex, totalSteps) {
|
|
3454
|
-
const offset = totalSteps > 1 ? (stepIndex + 1) / totalSteps : 1;
|
|
3455
|
-
return new Date(startMs + Math.round(offset * 1e3)).toISOString();
|
|
3456
|
-
}
|
|
3457
3462
|
|
|
3458
3463
|
// src/run-scenario/agents/simple-agent/execute.ts
|
|
3459
3464
|
var PROVIDER_ANTHROPIC2 = "anthropic";
|
|
@@ -3538,6 +3543,7 @@ async function executeWithAiSdk(context) {
|
|
|
3538
3543
|
}
|
|
3539
3544
|
}
|
|
3540
3545
|
};
|
|
3546
|
+
const stepTimestamps = [];
|
|
3541
3547
|
const result = await (0, import_ai.generateText)({
|
|
3542
3548
|
model,
|
|
3543
3549
|
system: systemPrompt,
|
|
@@ -3546,7 +3552,34 @@ async function executeWithAiSdk(context) {
|
|
|
3546
3552
|
maxOutputTokens: modelConfig.maxTokens,
|
|
3547
3553
|
tools: mcpTools,
|
|
3548
3554
|
stopWhen: mcpTools ? (0, import_ai.stepCountIs)(modelConfig.maxTurns ?? DEFAULT_MAX_TOOL_STEPS) : (0, import_ai.stepCountIs)(1),
|
|
3549
|
-
providerOptions: providerOpts
|
|
3555
|
+
providerOptions: providerOpts,
|
|
3556
|
+
onStepFinish: (step) => {
|
|
3557
|
+
stepTimestamps.push(Date.now());
|
|
3558
|
+
if (traceContext) {
|
|
3559
|
+
const isToolStep = step.toolCalls.length > 0;
|
|
3560
|
+
const firstToolCall = step.toolCalls[0];
|
|
3561
|
+
emitTraceEvent(
|
|
3562
|
+
{
|
|
3563
|
+
evalRunId: traceContext.evalRunId,
|
|
3564
|
+
scenarioId: traceContext.scenarioId,
|
|
3565
|
+
scenarioName: traceContext.scenarioName,
|
|
3566
|
+
targetId: traceContext.targetId,
|
|
3567
|
+
targetName: traceContext.targetName,
|
|
3568
|
+
stepNumber: stepTimestamps.length,
|
|
3569
|
+
type: isToolStep ? import_evalforge_types11.LiveTraceEventType.TOOL_USE : import_evalforge_types11.LiveTraceEventType.COMPLETION,
|
|
3570
|
+
toolName: firstToolCall?.toolName,
|
|
3571
|
+
toolArgs: firstToolCall ? (JSON.stringify(firstToolCall.input) ?? "").slice(0, 500) : void 0,
|
|
3572
|
+
outputPreview: step.text?.slice(0, 500),
|
|
3573
|
+
elapsedMs: Date.now() - startTime,
|
|
3574
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3575
|
+
isComplete: false
|
|
3576
|
+
},
|
|
3577
|
+
traceContext.tracePushUrl,
|
|
3578
|
+
traceContext.routeHeader,
|
|
3579
|
+
traceContext.authToken
|
|
3580
|
+
);
|
|
3581
|
+
}
|
|
3582
|
+
}
|
|
3550
3583
|
});
|
|
3551
3584
|
const durationMs = Date.now() - startTime;
|
|
3552
3585
|
const usage = {
|
|
@@ -3560,16 +3593,17 @@ async function executeWithAiSdk(context) {
|
|
|
3560
3593
|
usage,
|
|
3561
3594
|
modelConfig.model,
|
|
3562
3595
|
provider,
|
|
3563
|
-
startTime
|
|
3596
|
+
startTime,
|
|
3597
|
+
stepTimestamps
|
|
3564
3598
|
);
|
|
3565
3599
|
if (traceContext) {
|
|
3566
|
-
|
|
3567
|
-
emitCompletionEvent(traceContext, result.steps.length + 1);
|
|
3600
|
+
emitCompletionEvent(traceContext, stepTimestamps.length + 1);
|
|
3568
3601
|
}
|
|
3569
3602
|
const conversation = buildConversation3(
|
|
3570
3603
|
scenario.triggerPrompt,
|
|
3571
3604
|
result.steps,
|
|
3572
|
-
startTime
|
|
3605
|
+
startTime,
|
|
3606
|
+
stepTimestamps
|
|
3573
3607
|
);
|
|
3574
3608
|
return {
|
|
3575
3609
|
outputText: result.text,
|
|
@@ -3610,20 +3644,16 @@ function findToolResultError(step) {
|
|
|
3610
3644
|
}
|
|
3611
3645
|
return null;
|
|
3612
3646
|
}
|
|
3613
|
-
function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, executionStartMs) {
|
|
3614
|
-
const totalStepTokens = steps.reduce(
|
|
3615
|
-
(sum, s) => sum + (s.usage.totalTokens ?? 0),
|
|
3616
|
-
0
|
|
3617
|
-
);
|
|
3647
|
+
function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, executionStartMs, stepTimestamps) {
|
|
3618
3648
|
const traceSteps = steps.map((step, i) => {
|
|
3619
|
-
const
|
|
3620
|
-
const
|
|
3621
|
-
const stepDurationMs =
|
|
3649
|
+
const stepFinishedAt = stepTimestamps[i] ?? executionStartMs;
|
|
3650
|
+
const stepStartedAt = i === 0 ? executionStartMs : stepTimestamps[i - 1] ?? executionStartMs;
|
|
3651
|
+
const stepDurationMs = stepFinishedAt - stepStartedAt;
|
|
3622
3652
|
const firstToolCall = step.toolCalls[0];
|
|
3623
3653
|
const tokenUsage = {
|
|
3624
3654
|
prompt: step.usage.inputTokens ?? 0,
|
|
3625
3655
|
completion: step.usage.outputTokens ?? 0,
|
|
3626
|
-
total:
|
|
3656
|
+
total: step.usage.totalTokens ?? 0
|
|
3627
3657
|
};
|
|
3628
3658
|
const costUsd = calculateStepCost(step, modelId, provider, tokenUsage);
|
|
3629
3659
|
const toolResultError = findToolResultError(step);
|
|
@@ -3634,9 +3664,7 @@ function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, e
|
|
|
3634
3664
|
type: step.toolCalls.length > 0 ? import_evalforge_types11.LLMStepType.TOOL_USE : import_evalforge_types11.LLMStepType.COMPLETION,
|
|
3635
3665
|
model: modelId,
|
|
3636
3666
|
provider,
|
|
3637
|
-
startedAt: new Date(
|
|
3638
|
-
executionStartMs + Math.round(totalDurationMs * (i / Math.max(steps.length, 1)))
|
|
3639
|
-
).toISOString(),
|
|
3667
|
+
startedAt: new Date(stepStartedAt).toISOString(),
|
|
3640
3668
|
durationMs: stepDurationMs,
|
|
3641
3669
|
tokenUsage,
|
|
3642
3670
|
costUsd,
|
|
@@ -3694,33 +3722,6 @@ function emitStartEvent(traceContext, startTime) {
|
|
|
3694
3722
|
traceContext.authToken
|
|
3695
3723
|
);
|
|
3696
3724
|
}
|
|
3697
|
-
function emitStepEvents(traceContext, steps, startTime) {
|
|
3698
|
-
for (let i = 0; i < steps.length; i++) {
|
|
3699
|
-
const step = steps[i];
|
|
3700
|
-
const isToolStep = step.toolCalls.length > 0;
|
|
3701
|
-
const firstToolCall = step.toolCalls[0];
|
|
3702
|
-
emitTraceEvent(
|
|
3703
|
-
{
|
|
3704
|
-
evalRunId: traceContext.evalRunId,
|
|
3705
|
-
scenarioId: traceContext.scenarioId,
|
|
3706
|
-
scenarioName: traceContext.scenarioName,
|
|
3707
|
-
targetId: traceContext.targetId,
|
|
3708
|
-
targetName: traceContext.targetName,
|
|
3709
|
-
stepNumber: i + 1,
|
|
3710
|
-
type: isToolStep ? import_evalforge_types11.LiveTraceEventType.TOOL_USE : import_evalforge_types11.LiveTraceEventType.COMPLETION,
|
|
3711
|
-
toolName: firstToolCall?.toolName,
|
|
3712
|
-
toolArgs: firstToolCall ? (JSON.stringify(firstToolCall.input) ?? "").slice(0, 500) : void 0,
|
|
3713
|
-
outputPreview: step.text?.slice(0, 500),
|
|
3714
|
-
elapsedMs: Date.now() - startTime,
|
|
3715
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3716
|
-
isComplete: false
|
|
3717
|
-
},
|
|
3718
|
-
traceContext.tracePushUrl,
|
|
3719
|
-
traceContext.routeHeader,
|
|
3720
|
-
traceContext.authToken
|
|
3721
|
-
);
|
|
3722
|
-
}
|
|
3723
|
-
}
|
|
3724
3725
|
function emitCompletionEvent(traceContext, stepNumber) {
|
|
3725
3726
|
emitTraceEvent(
|
|
3726
3727
|
{
|
|
@@ -3757,7 +3758,7 @@ defaultRegistry.register(simpleAgentAdapter);
|
|
|
3757
3758
|
|
|
3758
3759
|
// src/run-scenario/file-diff.ts
|
|
3759
3760
|
var import_fs2 = require("fs");
|
|
3760
|
-
var
|
|
3761
|
+
var import_path11 = require("path");
|
|
3761
3762
|
|
|
3762
3763
|
// ../../node_modules/diff/lib/index.mjs
|
|
3763
3764
|
function Diff() {
|
|
@@ -3933,7 +3934,7 @@ Diff.prototype = {
|
|
|
3933
3934
|
tokenize: function tokenize(value) {
|
|
3934
3935
|
return Array.from(value);
|
|
3935
3936
|
},
|
|
3936
|
-
join: function
|
|
3937
|
+
join: function join9(chars) {
|
|
3937
3938
|
return chars.join("");
|
|
3938
3939
|
},
|
|
3939
3940
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -4382,8 +4383,8 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
4382
4383
|
}
|
|
4383
4384
|
const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
|
|
4384
4385
|
for (const entry of entries) {
|
|
4385
|
-
const fullPath = (0,
|
|
4386
|
-
const relativePath = (0,
|
|
4386
|
+
const fullPath = (0, import_path11.join)(dir, entry.name);
|
|
4387
|
+
const relativePath = (0, import_path11.relative)(base, fullPath);
|
|
4387
4388
|
if (shouldIgnore(entry.name)) {
|
|
4388
4389
|
continue;
|
|
4389
4390
|
}
|