npm - @wix/evalforge-evaluator - Versions diffs - 0.98.0 → 0.100.0 - Mend

@wix/evalforge-evaluator 0.98.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/build/index.js +237 -151
package/build/index.js.map +4 -4
package/build/index.mjs +233 -147
package/build/index.mjs.map +4 -4
package/build/types/fetch-evaluation-data.d.ts +2 -2
package/build/types/resolve-placeholders.d.ts +26 -0
package/build/types/run-scenario/agents/claude-code/resolve-mcp-placeholders.d.ts +31 -0
package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +3 -0
package/build/types/run-scenario/agents/claude-code/write-skills.d.ts +3 -1
package/build/types/run-scenario/agents/registry.d.ts +32 -63
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +3 -3
package/build/types/run-scenario/utils/write-files.d.ts +6 -0
package/package.json +4 -4

package/build/index.mjs CHANGED Viewed

@@ -187,6 +187,53 @@ import {
   isSystemAssertionId,
   SYSTEM_ASSERTIONS
 } from "@wix/evalforge-types";
+// src/resolve-placeholders.ts
+var PLACEHOLDER_PATTERN = /\{\{([^}]+)\}\}/g;
+function findPlaceholders(value) {
+  const keys = /* @__PURE__ */ new Set();
+  collectPlaceholders(value, keys);
+  return [...keys];
+}
+function collectPlaceholders(value, keys) {
+  if (typeof value === "string") {
+    for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
+      keys.add(match[1].trim());
+    }
+  } else if (Array.isArray(value)) {
+    for (const item of value) {
+      collectPlaceholders(item, keys);
+    }
+  } else if (typeof value === "object" && value !== null) {
+    for (const val of Object.values(value)) {
+      collectPlaceholders(val, keys);
+    }
+  }
+}
+function resolveValue(value, placeholders) {
+  if (typeof value === "string") {
+    return value.replace(PLACEHOLDER_PATTERN, (match, key) => {
+      const trimmed = key.trim();
+      return trimmed in placeholders ? placeholders[trimmed] : match;
+    });
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => resolveValue(item, placeholders));
+  }
+  if (typeof value === "object" && value !== null) {
+    const result = {};
+    for (const [k, v] of Object.entries(value)) {
+      result[k] = resolveValue(v, placeholders);
+    }
+    return result;
+  }
+  return value;
+}
+function resolvePlaceholdersInString(text, placeholders) {
+  return resolveValue(text, placeholders);
+}
+// src/fetch-evaluation-data.ts
 function parseSkillNamesFromParams(value) {
   if (typeof value !== "string") {
     return [];
@@ -202,13 +249,11 @@ function applyParamsToAssertion(assertion, params) {
     return assertion;
   }
   if (assertion.type === "llm_judge") {
-    let prompt = assertion.prompt;
+    const stringParams = {};
     for (const [key, value] of Object.entries(params)) {
-      const placeholder = `{{${key}}}`;
-      const escapedPlaceholder = placeholder.replace(/[{}]/g, "\\$&");
-      const replacement = String(value ?? "");
-      prompt = prompt.replace(new RegExp(escapedPlaceholder, "g"), replacement);
+      stringParams[key] = String(value ?? "");
     }
+    const prompt = resolvePlaceholdersInString(assertion.prompt, stringParams);
     return {
       ...assertion,
       prompt,
@@ -317,9 +362,9 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const scenarios = await Promise.all(
     evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
   );
-  let codeAgent = null;
+  let agent = null;
   if (evalRun.agentId) {
-    codeAgent = await api.getAgent(projectId2, evalRun.agentId);
+    agent = await api.getAgent(projectId2, evalRun.agentId);
   }
   let skills = [];
   let skillsGroup = null;
@@ -420,7 +465,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const skillsGroupName = skillsGroup?.name ?? "";
   return {
     evalRun,
-    codeAgent,
+    agent,
     skills,
     skillsGroup,
     skillsGroupName,
@@ -444,56 +489,18 @@ import {
 import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
 import { tmpdir } from "os";
 import path from "path";
-import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
+import { fetchGitHubFolder } from "@wix/evalforge-github-client";
-// src/run-scenario/agents/claude-code/write-skills.ts
+// src/run-scenario/utils/write-files.ts
 import { mkdir, writeFile } from "fs/promises";
-import { dirname, join, resolve, sep } from "path";
-import { fetchGitHubFolder } from "@wix/evalforge-github-client";
-async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder) {
-  await Promise.all(
-    skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
-  );
-}
-async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder) {
-  const skillName = skill.name;
-  const skillDir = join(cwd, ".claude", "skills", skillName);
-  await mkdir(skillDir, { recursive: true });
-  const version = skill.latestVersion;
-  if (version?.files && version.files.length > 0) {
-    await writeSkillFiles(skillDir, version.files);
-    console.log(
-      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
-    );
-  } else if (skill.source) {
-    try {
-      const files = await fetchFn(skill.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      await writeSkillFiles(skillDir, files);
-      console.log(
-        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
-      );
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
-      );
-      throw new Error(
-        `Failed to write skill ${skillName} to filesystem: ${message}`
-      );
-    }
-  } else {
-    throw new Error(`Skill ${skillName} has no files and no source configured`);
-  }
-}
-async function writeSkillFiles(skillDir, files) {
-  const resolvedBase = resolve(skillDir);
+import { dirname, resolve, sep } from "path";
+async function writeFilesToDirectory(targetDir, files) {
+  const resolvedBase = resolve(targetDir);
   for (const file of files) {
-    const filePath = resolve(skillDir, file.path);
+    const filePath = resolve(targetDir, file.path);
     if (!filePath.startsWith(resolvedBase + sep) && filePath !== resolvedBase) {
       throw new Error(
-        `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
+        `Path traversal detected: "${file.path}" resolves outside target directory`
       );
     }
     await mkdir(dirname(filePath), { recursive: true });
@@ -509,10 +516,10 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
     );
     return;
   }
-  const files = await fetchGitHubFolder2(template.source, {
+  const files = await fetchGitHubFolder(template.source, {
     userAgent: "EvalForge-Evaluator"
   });
-  await writeSkillFiles(workDir, files);
+  await writeFilesToDirectory(workDir, files);
 }
 function writeWixEnvFile(workDir) {
   const configPath = path.join(workDir, "wix.config.json");
@@ -566,86 +573,76 @@ import { randomUUID as randomUUID2 } from "crypto";
 // src/run-scenario/agents/registry.ts
 var AgentAdapterRegistry = class {
-  /**
-   * Map of run commands to their registered adapters.
-   * Multiple commands can map to the same adapter.
-   */
-  adapters = /* @__PURE__ */ new Map();
-  /**
-   * Set of all registered adapter instances (for getAll).
-   */
+  /** Map of CLI commands to their registered adapters. */
+  commandMap = /* @__PURE__ */ new Map();
+  /** Map of adapter IDs to their registered adapters. */
+  idMap = /* @__PURE__ */ new Map();
+  /** Set of all registered adapter instances (for getAll). */
   registeredAdapters = /* @__PURE__ */ new Set();
   /**
    * Register an agent adapter.
    *
-   * The adapter will be registered for all commands in its supportedCommands array.
-   * If a command is already registered, it will be overwritten with a warning.
-   *
-   * @param adapter - The adapter to register
+   * The adapter is registered by its ID and for all commands in its supportedCommands array.
+   * If a command or ID is already registered, it will be overwritten with a warning.
    */
   register(adapter) {
     this.registeredAdapters.add(adapter);
+    this.idMap.set(adapter.id, adapter);
     for (const command of adapter.supportedCommands) {
-      if (this.adapters.has(command)) {
-        const existing = this.adapters.get(command);
+      if (this.commandMap.has(command)) {
+        const existing = this.commandMap.get(command);
         console.warn(
           `[AgentAdapterRegistry] Command "${command}" already registered by adapter "${existing.id}". Overwriting with adapter "${adapter.id}".`
         );
       }
-      this.adapters.set(command, adapter);
+      this.commandMap.set(command, adapter);
     }
   }
-  /**
-   * Get an adapter by run command.
-   *
-   * @param runCommand - The run command to look up
-   * @returns The registered adapter, or undefined if not found
-   */
-  get(runCommand) {
-    return this.adapters.get(runCommand);
+  /** Get an adapter by CLI command. */
+  getByCommand(command) {
+    return this.commandMap.get(command);
   }
-  /**
-   * Check if a command has a registered adapter.
-   *
-   * @param runCommand - The run command to check
-   * @returns True if an adapter is registered for this command
-   */
-  has(runCommand) {
-    return this.adapters.has(runCommand);
+  /** Get an adapter by adapter ID. */
+  getById(adapterId) {
+    return this.idMap.get(adapterId);
   }
   /**
-   * Get all registered adapters.
-   *
-   * @returns Array of all unique registered adapters
+   * Unified lookup: tries CLI command first, then adapter ID.
+   * Use this when the identifier could be either a command or an adapter ID.
    */
+  resolve(identifier) {
+    return this.commandMap.get(identifier) ?? this.idMap.get(identifier);
+  }
+  /** Check if a command or adapter ID has a registered adapter. */
+  has(identifier) {
+    return this.commandMap.has(identifier) || this.idMap.has(identifier);
+  }
+  /** Get all registered adapters. */
   getAll() {
     return Array.from(this.registeredAdapters);
   }
-  /**
-   * Get all supported commands.
-   *
-   * @returns Array of all registered run commands
-   */
+  /** Get all supported CLI commands. */
   getSupportedCommands() {
-    return Array.from(this.adapters.keys());
+    return Array.from(this.commandMap.keys());
+  }
+  /** Get all registered adapter IDs. */
+  getAdapterIds() {
+    return Array.from(this.idMap.keys());
   }
   /**
    * Unregister an adapter by its ID.
-   *
    * Removes the adapter and all its command mappings.
-   *
-   * @param adapterId - The ID of the adapter to remove
-   * @returns True if the adapter was found and removed
    */
   unregister(adapterId) {
     let found = false;
     for (const adapter of this.registeredAdapters) {
       if (adapter.id === adapterId) {
         this.registeredAdapters.delete(adapter);
+        this.idMap.delete(adapterId);
         found = true;
         for (const command of adapter.supportedCommands) {
-          if (this.adapters.get(command) === adapter) {
-            this.adapters.delete(command);
+          if (this.commandMap.get(command) === adapter) {
+            this.commandMap.delete(command);
           }
         }
         break;
@@ -653,22 +650,21 @@ var AgentAdapterRegistry = class {
     }
     return found;
   }
-  /**
-   * Clear all registered adapters.
-   * Primarily useful for testing.
-   */
+  /** Clear all registered adapters. Primarily useful for testing. */
   clear() {
-    this.adapters.clear();
+    this.commandMap.clear();
+    this.idMap.clear();
     this.registeredAdapters.clear();
   }
 };
 var defaultRegistry = new AgentAdapterRegistry();
-function getAdapter(runCommand) {
-  const adapter = defaultRegistry.get(runCommand);
+function getAdapter(identifier) {
+  const adapter = defaultRegistry.resolve(identifier);
   if (!adapter) {
-    const supported = defaultRegistry.getSupportedCommands();
+    const commands = defaultRegistry.getSupportedCommands();
+    const ids = defaultRegistry.getAdapterIds();
     throw new Error(
-      `No agent adapter registered for command "${runCommand}". Supported commands: ${supported.length > 0 ? supported.join(", ") : "(none registered)"}`
+      `No agent adapter registered for "${identifier}". Supported commands: ${commands.length > 0 ? commands.join(", ") : "(none)"}. Registered adapters: ${ids.length > 0 ? ids.join(", ") : "(none)"}`
     );
   }
   return adapter;
@@ -685,12 +681,97 @@ import {
   LiveTraceEventType,
   TRACE_EVENT_PREFIX
 } from "@wix/evalforge-types";
+// src/run-scenario/agents/claude-code/write-skills.ts
+import { mkdir as mkdir2 } from "fs/promises";
+import { join } from "path";
+import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
+async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder2) {
+  await Promise.all(
+    skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
+  );
+}
+async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2) {
+  const skillName = skill.name;
+  const skillDir = join(cwd, ".claude", "skills", skillName);
+  await mkdir2(skillDir, { recursive: true });
+  const version = skill.latestVersion;
+  if (version?.files && version.files.length > 0) {
+    await writeFilesToDirectory(skillDir, version.files);
+    console.log(
+      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
+    );
+  } else if (skill.source) {
+    try {
+      const files = await fetchFn(skill.source, {
+        userAgent: "EvalForge-Evaluator"
+      });
+      await writeFilesToDirectory(skillDir, files);
+      console.log(
+        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
+      );
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      console.error(
+        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
+      );
+      throw new Error(
+        `Failed to write skill ${skillName} to filesystem: ${message}`
+      );
+    }
+  } else {
+    throw new Error(`Skill ${skillName} has no files and no source configured`);
+  }
+}
+// src/run-scenario/agents/claude-code/execute.ts
 import { randomUUID } from "crypto";
 // src/run-scenario/agents/claude-code/write-mcp.ts
 import { writeFile as writeFile2 } from "fs/promises";
-import { join as join2 } from "path";
+import { join as join3 } from "path";
 import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
+// src/run-scenario/agents/claude-code/resolve-mcp-placeholders.ts
+import { readFile } from "fs/promises";
+import { join as join2 } from "path";
+import { homedir } from "os";
+var WIX_AUTH_FILE = join2(homedir(), ".wix", "auth", "api-key.json");
+async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
+  try {
+    const content = await readFile(authFilePath, "utf-8");
+    const auth = JSON.parse(content);
+    if (!auth.token || !auth.userInfo?.userId) {
+      return {};
+    }
+    return {
+      "wix-auth-token": auth.token,
+      "wix-auth-user-id": auth.userInfo.userId
+    };
+  } catch (err) {
+    console.warn(
+      `[MCP] Could not load Wix auth file: ${err.message}`
+    );
+    return {};
+  }
+}
+async function resolveMcpPlaceholders(mcpServers, authFilePath) {
+  const needed = findPlaceholders(mcpServers);
+  if (needed.length === 0) {
+    return mcpServers;
+  }
+  const placeholders = await loadWixAuthPlaceholders(authFilePath);
+  const unresolved = needed.filter((key) => !(key in placeholders));
+  if (unresolved.length > 0) {
+    throw new Error(
+      `MCP config contains unresolvable placeholders: ${unresolved.map((k) => `{{${k}}}`).join(", ")}. Ensure ~/.wix/auth/api-key.json exists (run \`npx @wix/cli login\`).`
+    );
+  }
+  console.log(`[MCP] Resolved ${needed.length} placeholder(s)`);
+  return resolveValue(mcpServers, placeholders);
+}
+// src/run-scenario/agents/claude-code/write-mcp.ts
 async function writeMcpToFilesystem(cwd, mcps) {
   if (mcps.length === 0) return;
   const mcpServers = {};
@@ -705,19 +786,20 @@ async function writeMcpToFilesystem(cwd, mcps) {
       mcpServers[key] = value;
     }
   }
+  const resolvedServers = await resolveMcpPlaceholders(mcpServers);
   const content = JSON.stringify(
-    { [MCP_SERVERS_JSON_KEY]: mcpServers },
+    { [MCP_SERVERS_JSON_KEY]: resolvedServers },
     null,
     2
   );
-  const filePath = join2(cwd, ".mcp.json");
+  const filePath = join3(cwd, ".mcp.json");
   await writeFile2(filePath, content, "utf8");
   console.log(`[MCP] Written to ${filePath}`);
 }
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
-import { mkdir as mkdir2, writeFile as writeFile3 } from "fs/promises";
-import { join as join3 } from "path";
+import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
+import { join as join4 } from "path";
 var AGENTS_DIR = ".claude/agents";
 function toAgentFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -727,20 +809,20 @@ function toAgentFilename(name, index, nameCount) {
 }
 async function writeSubAgentsToFilesystem(cwd, subAgents) {
   if (subAgents.length === 0) return;
-  const agentsDir = join3(cwd, AGENTS_DIR);
-  await mkdir2(agentsDir, { recursive: true });
+  const agentsDir = join4(cwd, AGENTS_DIR);
+  await mkdir3(agentsDir, { recursive: true });
   const nameCount = /* @__PURE__ */ new Map();
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename(agent.name, i, nameCount);
-    const filePath = join3(agentsDir, `${filename}.md`);
+    const filePath = join4(agentsDir, `${filename}.md`);
     await writeFile3(filePath, agent.subAgentMd, "utf8");
   }
   console.log(`[SubAgents] Written to ${agentsDir}`);
 }
 // src/run-scenario/agents/claude-code/write-rules.ts
-import { mkdir as mkdir3, writeFile as writeFile4, readFile } from "fs/promises";
-import { join as join4 } from "path";
+import { mkdir as mkdir4, writeFile as writeFile4, readFile as readFile2 } from "fs/promises";
+import { join as join5 } from "path";
 var CURSOR_RULES_DIR = ".cursor/rules";
 function toRuleFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -751,7 +833,7 @@ function toRuleFilename(name, index, nameCount) {
 async function appendToFile(filePath, content) {
   let existing = "";
   try {
-    existing = await readFile(filePath, "utf8");
+    existing = await readFile2(filePath, "utf8");
   } catch {
   }
   const merged = existing ? `${existing.trimEnd()}
@@ -766,20 +848,20 @@ async function writeRulesToFilesystem(cwd, rules) {
   for (const [i, rule] of rules.entries()) {
     switch (rule.ruleType) {
       case "claude-md": {
-        await appendToFile(join4(cwd, "CLAUDE.md"), rule.content);
+        await appendToFile(join5(cwd, "CLAUDE.md"), rule.content);
         break;
       }
       case "agents-md": {
-        await appendToFile(join4(cwd, "AGENTS.md"), rule.content);
+        await appendToFile(join5(cwd, "AGENTS.md"), rule.content);
         break;
       }
       case "cursor-rule": {
         if (!hasCursorRules) {
-          await mkdir3(join4(cwd, CURSOR_RULES_DIR), { recursive: true });
+          await mkdir4(join5(cwd, CURSOR_RULES_DIR), { recursive: true });
           hasCursorRules = true;
         }
         const filename = toRuleFilename(rule.name, i, nameCount);
-        const filePath = join4(cwd, CURSOR_RULES_DIR, `${filename}.md`);
+        const filePath = join5(cwd, CURSOR_RULES_DIR, `${filename}.md`);
         await writeFile4(filePath, rule.content, "utf8");
         break;
       }
@@ -1815,7 +1897,7 @@ defaultRegistry.register(claudeCodeAdapter);
 // src/run-scenario/file-diff.ts
 import { readdirSync, readFileSync as readFileSync2, statSync, existsSync as existsSync2 } from "fs";
-import { join as join6, relative } from "path";
+import { join as join7, relative } from "path";
 // ../../node_modules/diff/lib/index.mjs
 function Diff() {
@@ -1991,7 +2073,7 @@ Diff.prototype = {
   tokenize: function tokenize(value) {
     return Array.from(value);
   },
-  join: function join5(chars) {
+  join: function join6(chars) {
     return chars.join("");
   },
   postProcess: function postProcess(changeObjects) {
@@ -2431,7 +2513,7 @@ function snapshotDirectory(dir, baseDir) {
   }
   const entries = readdirSync(dir, { withFileTypes: true });
   for (const entry of entries) {
-    const fullPath = join6(dir, entry.name);
+    const fullPath = join7(dir, entry.name);
     const relativePath = relative(base, fullPath);
     if (shouldIgnore(entry.name)) {
       continue;
@@ -2540,18 +2622,21 @@ function extractTemplateFiles(before, after) {
 }
 // src/run-scenario/run-agent-with-context.ts
-import { AgentRunCommand as AgentRunCommand2 } from "@wix/evalforge-types";
+import { AgentRunCommand as AgentRunCommand2, AgentType } from "@wix/evalforge-types";
 var DEFAULT_AGENT_COMMAND = AgentRunCommand2.CLAUDE;
 async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
   const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const agent = evalData.agent ?? void 0;
+  const isSDK = agent?.agentType === AgentType.SDK;
   if (!skillsGroupId) {
     throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
   }
-  const agent = evalData.codeAgent ?? void 0;
-  const runCommand = agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
-  const adapter = getAdapter(runCommand);
+  const identifier = isSDK ? agent.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
+  const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
   const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
+  const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
+  const targetName = evalData.skillsGroupName || agent?.name || "";
   const executionContext = {
     skills: evalData.skills,
     scenario,
@@ -2563,8 +2648,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
       evalRunId: evalRunId2,
       scenarioId: scenario.id,
       scenarioName: scenario.name,
-      targetId: skillsGroupId,
-      targetName: evalData.skillsGroupName,
+      targetId,
+      targetName,
       tracePushUrl: config.tracePushUrl,
       routeHeader: config.routeHeader,
       authToken: config.authToken
@@ -2581,8 +2666,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
   const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
   return {
     id: randomUUID2(),
-    targetId: skillsGroupId,
-    targetName: evalData.skillsGroupName,
+    targetId,
+    targetName,
     scenarioId: scenario.id,
     scenarioName: scenario.name,
     modelConfig: agent?.modelConfig,
@@ -2598,11 +2683,11 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
 // src/run-scenario/index.ts
 async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
-  const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
   const workDir = await prepareWorkingDirectory(
     config,
     evalRunId2,
-    skillsGroupId,
+    targetId,
     scenario.id,
     template
   );
@@ -2630,7 +2715,8 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     })),
     durationMs: partialResult.duration
   };
-  const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
+  const { "x-wix-ai-gateway-stream": _ignored, ...judgeHeaders } = config.aiGatewayHeaders;
+  void _ignored;
   const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
   const assertionContext = {
     workDir,
@@ -2807,7 +2893,7 @@ async function runEvaluation(projectId2, evalRunId2) {
         scenarioItemCount: evalData.scenarioItems.length,
         scenarios: evalData.scenarioItems.map((s) => s.scenario.name),
         skillsCount: evalData.skills.length,
-        hasCodeAgent: !!evalData.codeAgent,
+        hasAgent: !!evalData.agent,
         timestamp: Date.now()
       })
     );
@@ -2825,14 +2911,14 @@ async function runEvaluation(projectId2, evalRunId2) {
       `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
     );
   }
-  const { codeAgent, skills, scenarioItems } = evalData;
+  const { agent, skills, scenarioItems } = evalData;
   state.currentPhase = ExecutionPhase.VALIDATION;
   state.currentContext = {
     projectId: projectId2,
     evalRunId: evalRunId2,
     scenarioCount: scenarioItems.length,
     skillCount: skills.length,
-    hasAgent: !!codeAgent,
+    hasAgent: !!agent,
     agentId: evalData.evalRun.agentId,
     skillsGroupId: evalData.evalRun.skillsGroupId
   };
@@ -2841,9 +2927,9 @@ async function runEvaluation(projectId2, evalRunId2) {
       `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
     );
   }
-  if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
+  if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
     );
   }
   let completedScenarios = 0;
@@ -2857,8 +2943,8 @@ async function runEvaluation(projectId2, evalRunId2) {
       scenarioName: scenario.name,
       skillsGroupId: evalData.evalRun.skillsGroupId,
       skillsGroupName: evalData.skillsGroupName,
-      agentId: codeAgent?.id,
-      agentName: codeAgent?.name,
+      agentId: agent?.id,
+      agentName: agent?.name,
       progress: `${completedScenarios + 1}/${totalScenarios}`
     };
     const skillNames = evalData.skills.map((s) => s.name).join(", ");
@@ -2866,7 +2952,7 @@ async function runEvaluation(projectId2, evalRunId2) {
       "[Evaluator] Running scenario with skills group:",
       evalData.skillsGroupName,
       skillNames ? `(${skillNames})` : "",
-      codeAgent ? `with agent: ${codeAgent.name}` : "",
+      agent ? `with agent: ${agent.name}` : "",
       `(${completedScenarios + 1}/${totalScenarios})`
     );
     try {