@wix/evalforge-evaluator 0.129.0 → 0.130.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -2327,7 +2327,12 @@ async function buildOpenCodeEnv(options) {
|
|
|
2327
2327
|
// src/run-scenario/agents/opencode/build-trace.ts
|
|
2328
2328
|
var import_evalforge_types7 = require("@wix/evalforge-types");
|
|
2329
2329
|
var import_crypto2 = require("crypto");
|
|
2330
|
+
function toCanonicalModelId(modelId) {
|
|
2331
|
+
const slashIndex = modelId.indexOf("/");
|
|
2332
|
+
return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
|
|
2333
|
+
}
|
|
2330
2334
|
function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
|
|
2335
|
+
const canonicalModel = toCanonicalModelId(model);
|
|
2331
2336
|
const turns = [];
|
|
2332
2337
|
let current = {
|
|
2333
2338
|
textParts: [],
|
|
@@ -2377,7 +2382,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
|
|
|
2377
2382
|
const stepOutputTokens = sf?.tokens.output ?? 0;
|
|
2378
2383
|
const stepCost = sf?.cost ?? 0;
|
|
2379
2384
|
const finishReason = sf?.reason ?? "unknown";
|
|
2380
|
-
const stepModel = sf?.modelID || model;
|
|
2385
|
+
const stepModel = toCanonicalModelId(sf?.modelID || model);
|
|
2381
2386
|
const stepProvider = sf?.providerID || provider;
|
|
2382
2387
|
const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
|
|
2383
2388
|
const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
|
|
@@ -2528,7 +2533,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
|
|
|
2528
2533
|
entry.costUsd += step.costUsd;
|
|
2529
2534
|
stepTypeBreakdown[step.type] = entry;
|
|
2530
2535
|
}
|
|
2531
|
-
const modelUsed = allSteps[0]?.model ||
|
|
2536
|
+
const modelUsed = allSteps[0]?.model || canonicalModel;
|
|
2532
2537
|
const summary = {
|
|
2533
2538
|
totalSteps: allSteps.length,
|
|
2534
2539
|
totalTurns: turns.length,
|
|
@@ -2720,6 +2725,16 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
|
|
|
2720
2725
|
`Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
|
|
2721
2726
|
);
|
|
2722
2727
|
}
|
|
2728
|
+
let systemPrompt;
|
|
2729
|
+
if (options.systemPrompt === null || options.systemPrompt === "") {
|
|
2730
|
+
} else if (options.systemPrompt != null) {
|
|
2731
|
+
systemPrompt = options.systemPrompt;
|
|
2732
|
+
} else {
|
|
2733
|
+
systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
|
|
2734
|
+
}
|
|
2735
|
+
if (systemPrompt) {
|
|
2736
|
+
await writeSystemPromptRule(cwd, systemPrompt);
|
|
2737
|
+
}
|
|
2723
2738
|
}
|
|
2724
2739
|
async function writeSystemPromptRule(cwd, systemPrompt) {
|
|
2725
2740
|
const rulesDir = (0, import_path10.join)(cwd, ".opencode", "rules");
|
|
@@ -3053,16 +3068,6 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3053
3068
|
traceContext.authToken
|
|
3054
3069
|
);
|
|
3055
3070
|
}
|
|
3056
|
-
let systemPrompt;
|
|
3057
|
-
if (options.systemPrompt === null || options.systemPrompt === "") {
|
|
3058
|
-
} else if (options.systemPrompt != null) {
|
|
3059
|
-
systemPrompt = options.systemPrompt;
|
|
3060
|
-
} else {
|
|
3061
|
-
systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
|
|
3062
|
-
}
|
|
3063
|
-
if (systemPrompt) {
|
|
3064
|
-
await writeSystemPromptRule(options.cwd, systemPrompt);
|
|
3065
|
-
}
|
|
3066
3071
|
const baseArgs = [
|
|
3067
3072
|
"run",
|
|
3068
3073
|
"--format",
|
|
@@ -3240,7 +3245,8 @@ var OpenCodeAdapter = class {
|
|
|
3240
3245
|
await prepareOpenCodeEnvironment(context.cwd, context.skills, {
|
|
3241
3246
|
mcps: context.mcps,
|
|
3242
3247
|
subAgents: context.subAgents,
|
|
3243
|
-
rules: context.rules
|
|
3248
|
+
rules: context.rules,
|
|
3249
|
+
systemPrompt: context.systemPrompt
|
|
3244
3250
|
});
|
|
3245
3251
|
}
|
|
3246
3252
|
async execute(context) {
|