npm - @wix/evalforge-evaluator - Versions diffs - 0.99.0 → 0.100.0 - Mend

@wix/evalforge-evaluator 0.99.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/build/index.js +147 -148
package/build/index.js.map +4 -4
package/build/index.mjs +127 -128
package/build/index.mjs.map +4 -4
package/build/types/fetch-evaluation-data.d.ts +2 -2
package/build/types/run-scenario/agents/claude-code/write-skills.d.ts +3 -1
package/build/types/run-scenario/agents/registry.d.ts +32 -63
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +3 -3
package/build/types/run-scenario/utils/write-files.d.ts +6 -0
package/package.json +4 -4

package/build/index.js CHANGED Viewed

@@ -382,9 +382,9 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const scenarios = await Promise.all(
     evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
   );
-  let codeAgent = null;
+  let agent = null;
   if (evalRun.agentId) {
-    codeAgent = await api.getAgent(projectId2, evalRun.agentId);
+    agent = await api.getAgent(projectId2, evalRun.agentId);
   }
   let skills = [];
   let skillsGroup = null;
@@ -485,7 +485,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const skillsGroupName = skillsGroup?.name ?? "";
   return {
     evalRun,
-    codeAgent,
+    agent,
     skills,
     skillsGroup,
     skillsGroupName,
@@ -504,56 +504,18 @@ var import_eval_assertions = require("@wix/eval-assertions");
 var import_fs = require("fs");
 var import_os = require("os");
 var import_path2 = __toESM(require("path"));
-var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
+var import_evalforge_github_client = require("@wix/evalforge-github-client");
-// src/run-scenario/agents/claude-code/write-skills.ts
+// src/run-scenario/utils/write-files.ts
 var import_promises = require("fs/promises");
 var import_path = require("path");
-var import_evalforge_github_client = require("@wix/evalforge-github-client");
-async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
-  await Promise.all(
-    skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
-  );
-}
-async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
-  const skillName = skill.name;
-  const skillDir = (0, import_path.join)(cwd, ".claude", "skills", skillName);
-  await (0, import_promises.mkdir)(skillDir, { recursive: true });
-  const version = skill.latestVersion;
-  if (version?.files && version.files.length > 0) {
-    await writeSkillFiles(skillDir, version.files);
-    console.log(
-      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
-    );
-  } else if (skill.source) {
-    try {
-      const files = await fetchFn(skill.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      await writeSkillFiles(skillDir, files);
-      console.log(
-        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
-      );
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
-      );
-      throw new Error(
-        `Failed to write skill ${skillName} to filesystem: ${message}`
-      );
-    }
-  } else {
-    throw new Error(`Skill ${skillName} has no files and no source configured`);
-  }
-}
-async function writeSkillFiles(skillDir, files) {
-  const resolvedBase = (0, import_path.resolve)(skillDir);
+async function writeFilesToDirectory(targetDir, files) {
+  const resolvedBase = (0, import_path.resolve)(targetDir);
   for (const file of files) {
-    const filePath = (0, import_path.resolve)(skillDir, file.path);
+    const filePath = (0, import_path.resolve)(targetDir, file.path);
     if (!filePath.startsWith(resolvedBase + import_path.sep) && filePath !== resolvedBase) {
       throw new Error(
-        `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
+        `Path traversal detected: "${file.path}" resolves outside target directory`
       );
     }
     await (0, import_promises.mkdir)((0, import_path.dirname)(filePath), { recursive: true });
@@ -569,10 +531,10 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
     );
     return;
   }
-  const files = await (0, import_evalforge_github_client2.fetchGitHubFolder)(template.source, {
+  const files = await (0, import_evalforge_github_client.fetchGitHubFolder)(template.source, {
     userAgent: "EvalForge-Evaluator"
   });
-  await writeSkillFiles(workDir, files);
+  await writeFilesToDirectory(workDir, files);
 }
 function writeWixEnvFile(workDir) {
   const configPath = import_path2.default.join(workDir, "wix.config.json");
@@ -626,86 +588,76 @@ var import_crypto2 = require("crypto");
 // src/run-scenario/agents/registry.ts
 var AgentAdapterRegistry = class {
-  /**
-   * Map of run commands to their registered adapters.
-   * Multiple commands can map to the same adapter.
-   */
-  adapters = /* @__PURE__ */ new Map();
-  /**
-   * Set of all registered adapter instances (for getAll).
-   */
+  /** Map of CLI commands to their registered adapters. */
+  commandMap = /* @__PURE__ */ new Map();
+  /** Map of adapter IDs to their registered adapters. */
+  idMap = /* @__PURE__ */ new Map();
+  /** Set of all registered adapter instances (for getAll). */
   registeredAdapters = /* @__PURE__ */ new Set();
   /**
    * Register an agent adapter.
    *
-   * The adapter will be registered for all commands in its supportedCommands array.
-   * If a command is already registered, it will be overwritten with a warning.
-   *
-   * @param adapter - The adapter to register
+   * The adapter is registered by its ID and for all commands in its supportedCommands array.
+   * If a command or ID is already registered, it will be overwritten with a warning.
    */
   register(adapter) {
     this.registeredAdapters.add(adapter);
+    this.idMap.set(adapter.id, adapter);
     for (const command of adapter.supportedCommands) {
-      if (this.adapters.has(command)) {
-        const existing = this.adapters.get(command);
+      if (this.commandMap.has(command)) {
+        const existing = this.commandMap.get(command);
         console.warn(
           `[AgentAdapterRegistry] Command "${command}" already registered by adapter "${existing.id}". Overwriting with adapter "${adapter.id}".`
         );
       }
-      this.adapters.set(command, adapter);
+      this.commandMap.set(command, adapter);
     }
   }
-  /**
-   * Get an adapter by run command.
-   *
-   * @param runCommand - The run command to look up
-   * @returns The registered adapter, or undefined if not found
-   */
-  get(runCommand) {
-    return this.adapters.get(runCommand);
+  /** Get an adapter by CLI command. */
+  getByCommand(command) {
+    return this.commandMap.get(command);
   }
-  /**
-   * Check if a command has a registered adapter.
-   *
-   * @param runCommand - The run command to check
-   * @returns True if an adapter is registered for this command
-   */
-  has(runCommand) {
-    return this.adapters.has(runCommand);
+  /** Get an adapter by adapter ID. */
+  getById(adapterId) {
+    return this.idMap.get(adapterId);
   }
   /**
-   * Get all registered adapters.
-   *
-   * @returns Array of all unique registered adapters
+   * Unified lookup: tries CLI command first, then adapter ID.
+   * Use this when the identifier could be either a command or an adapter ID.
    */
+  resolve(identifier) {
+    return this.commandMap.get(identifier) ?? this.idMap.get(identifier);
+  }
+  /** Check if a command or adapter ID has a registered adapter. */
+  has(identifier) {
+    return this.commandMap.has(identifier) || this.idMap.has(identifier);
+  }
+  /** Get all registered adapters. */
   getAll() {
     return Array.from(this.registeredAdapters);
   }
-  /**
-   * Get all supported commands.
-   *
-   * @returns Array of all registered run commands
-   */
+  /** Get all supported CLI commands. */
   getSupportedCommands() {
-    return Array.from(this.adapters.keys());
+    return Array.from(this.commandMap.keys());
+  }
+  /** Get all registered adapter IDs. */
+  getAdapterIds() {
+    return Array.from(this.idMap.keys());
   }
   /**
    * Unregister an adapter by its ID.
-   *
    * Removes the adapter and all its command mappings.
-   *
-   * @param adapterId - The ID of the adapter to remove
-   * @returns True if the adapter was found and removed
    */
   unregister(adapterId) {
     let found = false;
     for (const adapter of this.registeredAdapters) {
       if (adapter.id === adapterId) {
         this.registeredAdapters.delete(adapter);
+        this.idMap.delete(adapterId);
         found = true;
         for (const command of adapter.supportedCommands) {
-          if (this.adapters.get(command) === adapter) {
-            this.adapters.delete(command);
+          if (this.commandMap.get(command) === adapter) {
+            this.commandMap.delete(command);
           }
         }
         break;
@@ -713,22 +665,21 @@ var AgentAdapterRegistry = class {
     }
     return found;
   }
-  /**
-   * Clear all registered adapters.
-   * Primarily useful for testing.
-   */
+  /** Clear all registered adapters. Primarily useful for testing. */
   clear() {
-    this.adapters.clear();
+    this.commandMap.clear();
+    this.idMap.clear();
     this.registeredAdapters.clear();
   }
 };
 var defaultRegistry = new AgentAdapterRegistry();
-function getAdapter(runCommand) {
-  const adapter = defaultRegistry.get(runCommand);
+function getAdapter(identifier) {
+  const adapter = defaultRegistry.resolve(identifier);
   if (!adapter) {
-    const supported = defaultRegistry.getSupportedCommands();
+    const commands = defaultRegistry.getSupportedCommands();
+    const ids = defaultRegistry.getAdapterIds();
     throw new Error(
-      `No agent adapter registered for command "${runCommand}". Supported commands: ${supported.length > 0 ? supported.join(", ") : "(none registered)"}`
+      `No agent adapter registered for "${identifier}". Supported commands: ${commands.length > 0 ? commands.join(", ") : "(none)"}. Registered adapters: ${ids.length > 0 ? ids.join(", ") : "(none)"}`
     );
   }
   return adapter;
@@ -739,21 +690,65 @@ var import_evalforge_types4 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/execute.ts
 var import_evalforge_types3 = require("@wix/evalforge-types");
+// src/run-scenario/agents/claude-code/write-skills.ts
+var import_promises2 = require("fs/promises");
+var import_path3 = require("path");
+var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
+async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
+  await Promise.all(
+    skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
+  );
+}
+async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
+  const skillName = skill.name;
+  const skillDir = (0, import_path3.join)(cwd, ".claude", "skills", skillName);
+  await (0, import_promises2.mkdir)(skillDir, { recursive: true });
+  const version = skill.latestVersion;
+  if (version?.files && version.files.length > 0) {
+    await writeFilesToDirectory(skillDir, version.files);
+    console.log(
+      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
+    );
+  } else if (skill.source) {
+    try {
+      const files = await fetchFn(skill.source, {
+        userAgent: "EvalForge-Evaluator"
+      });
+      await writeFilesToDirectory(skillDir, files);
+      console.log(
+        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
+      );
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      console.error(
+        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
+      );
+      throw new Error(
+        `Failed to write skill ${skillName} to filesystem: ${message}`
+      );
+    }
+  } else {
+    throw new Error(`Skill ${skillName} has no files and no source configured`);
+  }
+}
+// src/run-scenario/agents/claude-code/execute.ts
 var import_crypto = require("crypto");
 // src/run-scenario/agents/claude-code/write-mcp.ts
-var import_promises3 = require("fs/promises");
-var import_path4 = require("path");
+var import_promises4 = require("fs/promises");
+var import_path5 = require("path");
 var import_evalforge_types2 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/resolve-mcp-placeholders.ts
-var import_promises2 = require("fs/promises");
-var import_path3 = require("path");
+var import_promises3 = require("fs/promises");
+var import_path4 = require("path");
 var import_os2 = require("os");
-var WIX_AUTH_FILE = (0, import_path3.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
+var WIX_AUTH_FILE = (0, import_path4.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
 async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
   try {
-    const content = await (0, import_promises2.readFile)(authFilePath, "utf-8");
+    const content = await (0, import_promises3.readFile)(authFilePath, "utf-8");
     const auth = JSON.parse(content);
     if (!auth.token || !auth.userInfo?.userId) {
       return {};
@@ -806,14 +801,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
     null,
     2
   );
-  const filePath = (0, import_path4.join)(cwd, ".mcp.json");
-  await (0, import_promises3.writeFile)(filePath, content, "utf8");
+  const filePath = (0, import_path5.join)(cwd, ".mcp.json");
+  await (0, import_promises4.writeFile)(filePath, content, "utf8");
   console.log(`[MCP] Written to ${filePath}`);
 }
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
-var import_promises4 = require("fs/promises");
-var import_path5 = require("path");
+var import_promises5 = require("fs/promises");
+var import_path6 = require("path");
 var AGENTS_DIR = ".claude/agents";
 function toAgentFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -823,20 +818,20 @@ function toAgentFilename(name, index, nameCount) {
 }
 async function writeSubAgentsToFilesystem(cwd, subAgents) {
   if (subAgents.length === 0) return;
-  const agentsDir = (0, import_path5.join)(cwd, AGENTS_DIR);
-  await (0, import_promises4.mkdir)(agentsDir, { recursive: true });
+  const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
+  await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
   const nameCount = /* @__PURE__ */ new Map();
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename(agent.name, i, nameCount);
-    const filePath = (0, import_path5.join)(agentsDir, `${filename}.md`);
-    await (0, import_promises4.writeFile)(filePath, agent.subAgentMd, "utf8");
+    const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
+    await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
   }
   console.log(`[SubAgents] Written to ${agentsDir}`);
 }
 // src/run-scenario/agents/claude-code/write-rules.ts
-var import_promises5 = require("fs/promises");
-var import_path6 = require("path");
+var import_promises6 = require("fs/promises");
+var import_path7 = require("path");
 var CURSOR_RULES_DIR = ".cursor/rules";
 function toRuleFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -847,13 +842,13 @@ function toRuleFilename(name, index, nameCount) {
 async function appendToFile(filePath, content) {
   let existing = "";
   try {
-    existing = await (0, import_promises5.readFile)(filePath, "utf8");
+    existing = await (0, import_promises6.readFile)(filePath, "utf8");
   } catch {
   }
   const merged = existing ? `${existing.trimEnd()}
 ${content}` : content;
-  await (0, import_promises5.writeFile)(filePath, merged, "utf8");
+  await (0, import_promises6.writeFile)(filePath, merged, "utf8");
 }
 async function writeRulesToFilesystem(cwd, rules) {
   if (rules.length === 0) return;
@@ -862,21 +857,21 @@ async function writeRulesToFilesystem(cwd, rules) {
   for (const [i, rule] of rules.entries()) {
     switch (rule.ruleType) {
       case "claude-md": {
-        await appendToFile((0, import_path6.join)(cwd, "CLAUDE.md"), rule.content);
+        await appendToFile((0, import_path7.join)(cwd, "CLAUDE.md"), rule.content);
         break;
       }
       case "agents-md": {
-        await appendToFile((0, import_path6.join)(cwd, "AGENTS.md"), rule.content);
+        await appendToFile((0, import_path7.join)(cwd, "AGENTS.md"), rule.content);
         break;
       }
       case "cursor-rule": {
         if (!hasCursorRules) {
-          await (0, import_promises5.mkdir)((0, import_path6.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
+          await (0, import_promises6.mkdir)((0, import_path7.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
           hasCursorRules = true;
         }
         const filename = toRuleFilename(rule.name, i, nameCount);
-        const filePath = (0, import_path6.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
-        await (0, import_promises5.writeFile)(filePath, rule.content, "utf8");
+        const filePath = (0, import_path7.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
+        await (0, import_promises6.writeFile)(filePath, rule.content, "utf8");
         break;
       }
     }
@@ -1911,7 +1906,7 @@ defaultRegistry.register(claudeCodeAdapter);
 // src/run-scenario/file-diff.ts
 var import_fs2 = require("fs");
-var import_path7 = require("path");
+var import_path8 = require("path");
 // ../../node_modules/diff/lib/index.mjs
 function Diff() {
@@ -2527,8 +2522,8 @@ function snapshotDirectory(dir, baseDir) {
   }
   const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
   for (const entry of entries) {
-    const fullPath = (0, import_path7.join)(dir, entry.name);
-    const relativePath = (0, import_path7.relative)(base, fullPath);
+    const fullPath = (0, import_path8.join)(dir, entry.name);
+    const relativePath = (0, import_path8.relative)(base, fullPath);
     if (shouldIgnore(entry.name)) {
       continue;
     }
@@ -2640,14 +2635,17 @@ var import_evalforge_types5 = require("@wix/evalforge-types");
 var DEFAULT_AGENT_COMMAND = import_evalforge_types5.AgentRunCommand.CLAUDE;
 async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
   const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const agent = evalData.agent ?? void 0;
+  const isSDK = agent?.agentType === import_evalforge_types5.AgentType.SDK;
   if (!skillsGroupId) {
     throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
   }
-  const agent = evalData.codeAgent ?? void 0;
-  const runCommand = agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
-  const adapter = getAdapter(runCommand);
+  const identifier = isSDK ? agent.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
+  const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
   const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
+  const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
+  const targetName = evalData.skillsGroupName || agent?.name || "";
   const executionContext = {
     skills: evalData.skills,
     scenario,
@@ -2659,8 +2657,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
       evalRunId: evalRunId2,
       scenarioId: scenario.id,
       scenarioName: scenario.name,
-      targetId: skillsGroupId,
-      targetName: evalData.skillsGroupName,
+      targetId,
+      targetName,
       tracePushUrl: config.tracePushUrl,
       routeHeader: config.routeHeader,
       authToken: config.authToken
@@ -2677,8 +2675,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
   const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
   return {
     id: (0, import_crypto2.randomUUID)(),
-    targetId: skillsGroupId,
-    targetName: evalData.skillsGroupName,
+    targetId,
+    targetName,
     scenarioId: scenario.id,
     scenarioName: scenario.name,
     modelConfig: agent?.modelConfig,
@@ -2694,11 +2692,11 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
 // src/run-scenario/index.ts
 async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
-  const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
   const workDir = await prepareWorkingDirectory(
     config,
     evalRunId2,
-    skillsGroupId,
+    targetId,
     scenario.id,
     template
   );
@@ -2726,7 +2724,8 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     })),
     durationMs: partialResult.duration
   };
-  const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
+  const { "x-wix-ai-gateway-stream": _ignored, ...judgeHeaders } = config.aiGatewayHeaders;
+  void _ignored;
   const defaultJudgeModel = import_evalforge_types6.DEFAULT_JUDGE_MODEL;
   const assertionContext = {
     workDir,
@@ -2903,7 +2902,7 @@ async function runEvaluation(projectId2, evalRunId2) {
         scenarioItemCount: evalData.scenarioItems.length,
         scenarios: evalData.scenarioItems.map((s) => s.scenario.name),
         skillsCount: evalData.skills.length,
-        hasCodeAgent: !!evalData.codeAgent,
+        hasAgent: !!evalData.agent,
         timestamp: Date.now()
       })
     );
@@ -2921,14 +2920,14 @@ async function runEvaluation(projectId2, evalRunId2) {
       `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
     );
   }
-  const { codeAgent, skills, scenarioItems } = evalData;
+  const { agent, skills, scenarioItems } = evalData;
   state.currentPhase = ExecutionPhase.VALIDATION;
   state.currentContext = {
     projectId: projectId2,
     evalRunId: evalRunId2,
     scenarioCount: scenarioItems.length,
     skillCount: skills.length,
-    hasAgent: !!codeAgent,
+    hasAgent: !!agent,
     agentId: evalData.evalRun.agentId,
     skillsGroupId: evalData.evalRun.skillsGroupId
   };
@@ -2937,9 +2936,9 @@ async function runEvaluation(projectId2, evalRunId2) {
       `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
     );
   }
-  if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
+  if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
     );
   }
   let completedScenarios = 0;
@@ -2953,8 +2952,8 @@ async function runEvaluation(projectId2, evalRunId2) {
       scenarioName: scenario.name,
       skillsGroupId: evalData.evalRun.skillsGroupId,
       skillsGroupName: evalData.skillsGroupName,
-      agentId: codeAgent?.id,
-      agentName: codeAgent?.name,
+      agentId: agent?.id,
+      agentName: agent?.name,
       progress: `${completedScenarios + 1}/${totalScenarios}`
     };
     const skillNames = evalData.skills.map((s) => s.name).join(", ");
@@ -2962,7 +2961,7 @@ async function runEvaluation(projectId2, evalRunId2) {
       "[Evaluator] Running scenario with skills group:",
       evalData.skillsGroupName,
       skillNames ? `(${skillNames})` : "",
-      codeAgent ? `with agent: ${codeAgent.name}` : "",
+      agent ? `with agent: ${agent.name}` : "",
       `(${completedScenarios + 1}/${totalScenarios})`
     );
     try {