npm - @wix/evalforge-evaluator - Versions diffs - 0.129.0 → 0.130.0 - Mend

@wix/evalforge-evaluator 0.129.0 → 0.130.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/build/index.js +19 -13
package/build/index.js.map +2 -2
package/build/index.mjs +19 -13
package/build/index.mjs.map +3 -3
package/build/types/run-scenario/agents/opencode/build-trace.d.ts +0 -7
package/build/types/run-scenario/agents/opencode/execute.d.ts +1 -1
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -2327,7 +2327,12 @@ async function buildOpenCodeEnv(options) {
 // src/run-scenario/agents/opencode/build-trace.ts
 var import_evalforge_types7 = require("@wix/evalforge-types");
 var import_crypto2 = require("crypto");
+function toCanonicalModelId(modelId) {
+  const slashIndex = modelId.indexOf("/");
+  return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
+}
 function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, executionStartTime) {
+  const canonicalModel = toCanonicalModelId(model);
   const turns = [];
   let current = {
     textParts: [],
@@ -2377,7 +2382,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     const stepOutputTokens = sf?.tokens.output ?? 0;
     const stepCost = sf?.cost ?? 0;
     const finishReason = sf?.reason ?? "unknown";
-    const stepModel = sf?.modelID || model;
+    const stepModel = toCanonicalModelId(sf?.modelID || model);
     const stepProvider = sf?.providerID || provider;
     const turnEndMs = turn.receivedAt ?? executionStartMs + totalDurationMs;
     const prevEndMs = turnIndex > 0 ? turns[turnIndex - 1].receivedAt ?? executionStartMs : executionStartMs;
@@ -2528,7 +2533,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     entry.costUsd += step.costUsd;
     stepTypeBreakdown[step.type] = entry;
   }
-  const modelUsed = allSteps[0]?.model || model;
+  const modelUsed = allSteps[0]?.model || canonicalModel;
   const summary = {
     totalSteps: allSteps.length,
     totalTurns: turns.length,
@@ -2720,6 +2725,16 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
       `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
     );
   }
+  let systemPrompt;
+  if (options.systemPrompt === null || options.systemPrompt === "") {
+  } else if (options.systemPrompt != null) {
+    systemPrompt = options.systemPrompt;
+  } else {
+    systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
+  }
+  if (systemPrompt) {
+    await writeSystemPromptRule(cwd, systemPrompt);
+  }
 }
 async function writeSystemPromptRule(cwd, systemPrompt) {
   const rulesDir = (0, import_path10.join)(cwd, ".opencode", "rules");
@@ -3053,16 +3068,6 @@ async function executeWithOpenCode(skills, scenario, options) {
       traceContext.authToken
     );
   }
-  let systemPrompt;
-  if (options.systemPrompt === null || options.systemPrompt === "") {
-  } else if (options.systemPrompt != null) {
-    systemPrompt = options.systemPrompt;
-  } else {
-    systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
-  }
-  if (systemPrompt) {
-    await writeSystemPromptRule(options.cwd, systemPrompt);
-  }
   const baseArgs = [
     "run",
     "--format",
@@ -3240,7 +3245,8 @@ var OpenCodeAdapter = class {
     await prepareOpenCodeEnvironment(context.cwd, context.skills, {
       mcps: context.mcps,
       subAgents: context.subAgents,
-      rules: context.rules
+      rules: context.rules,
+      systemPrompt: context.systemPrompt
     });
   }
   async execute(context) {