@wix/evalforge-evaluator 0.129.0 → 0.130.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -2327,7 +2327,12 @@ async function buildOpenCodeEnv(options) {
2327
2327
  // src/run-scenario/agents/opencode/build-trace.ts
2328
2328
  var import_evalforge_types7 = require("@wix/evalforge-types");
2329
2329
  var import_crypto2 = require("crypto");
2330
+ function toCanonicalModelId(modelId) {
2331
+ const slashIndex = modelId.indexOf("/");
2332
+ return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
2333
+ }
2330
2334
  function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
2335
+ const canonicalModel = toCanonicalModelId(model);
2331
2336
  const turns = [];
2332
2337
  let current = {
2333
2338
  textParts: [],
@@ -2377,7 +2382,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
2377
2382
  const stepOutputTokens = sf?.tokens.output ?? 0;
2378
2383
  const stepCost = sf?.cost ?? 0;
2379
2384
  const finishReason = sf?.reason ?? "unknown";
2380
- const stepModel = sf?.modelID || model;
2385
+ const stepModel = toCanonicalModelId(sf?.modelID || model);
2381
2386
  const stepProvider = sf?.providerID || provider;
2382
2387
  const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
2383
2388
  const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
@@ -2528,7 +2533,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
2528
2533
  entry.costUsd += step.costUsd;
2529
2534
  stepTypeBreakdown[step.type] = entry;
2530
2535
  }
2531
- const modelUsed = allSteps[0]?.model || model;
2536
+ const modelUsed = allSteps[0]?.model || canonicalModel;
2532
2537
  const summary = {
2533
2538
  totalSteps: allSteps.length,
2534
2539
  totalTurns: turns.length,
@@ -2720,6 +2725,16 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
2720
2725
  `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
2721
2726
  );
2722
2727
  }
2728
+ let systemPrompt;
2729
+ if (options.systemPrompt === null || options.systemPrompt === "") {
2730
+ } else if (options.systemPrompt != null) {
2731
+ systemPrompt = options.systemPrompt;
2732
+ } else {
2733
+ systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
2734
+ }
2735
+ if (systemPrompt) {
2736
+ await writeSystemPromptRule(cwd, systemPrompt);
2737
+ }
2723
2738
  }
2724
2739
  async function writeSystemPromptRule(cwd, systemPrompt) {
2725
2740
  const rulesDir = (0, import_path10.join)(cwd, ".opencode", "rules");
@@ -3053,16 +3068,6 @@ async function executeWithOpenCode(skills, scenario, options) {
3053
3068
  traceContext.authToken
3054
3069
  );
3055
3070
  }
3056
- let systemPrompt;
3057
- if (options.systemPrompt === null || options.systemPrompt === "") {
3058
- } else if (options.systemPrompt != null) {
3059
- systemPrompt = options.systemPrompt;
3060
- } else {
3061
- systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
3062
- }
3063
- if (systemPrompt) {
3064
- await writeSystemPromptRule(options.cwd, systemPrompt);
3065
- }
3066
3071
  const baseArgs = [
3067
3072
  "run",
3068
3073
  "--format",
@@ -3240,7 +3245,8 @@ var OpenCodeAdapter = class {
3240
3245
  await prepareOpenCodeEnvironment(context.cwd, context.skills, {
3241
3246
  mcps: context.mcps,
3242
3247
  subAgents: context.subAgents,
3243
- rules: context.rules
3248
+ rules: context.rules,
3249
+ systemPrompt: context.systemPrompt
3244
3250
  });
3245
3251
  }
3246
3252
  async execute(context) {