npm - @wix/evalforge-evaluator - Versions diffs - 0.170.0 → 0.172.0 - Mend

@wix/evalforge-evaluator 0.170.0 → 0.172.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/build/index.js +203 -105
package/build/index.js.map +4 -4
package/build/index.mjs +153 -47
package/build/index.mjs.map +4 -4
package/build/types/run-scenario/install-dependencies.d.ts +11 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -90,8 +90,8 @@ function createApiClient(serverUrl, options = "") {
     }
     return headers;
   }
-  async function fetchJson(path2) {
-    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
+  async function fetchJson(path3) {
+    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
     console.error(`[API] GET ${url}`);
     const headers = buildHeaders();
     const response = await fetch(url, {
@@ -105,8 +105,8 @@ function createApiClient(serverUrl, options = "") {
     }
     return response.json();
   }
-  async function postJson(path2, body) {
-    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
+  async function postJson(path3, body) {
+    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
     console.error(`[API] POST ${url}`);
     const response = await fetch(url, {
       method: "POST",
@@ -120,8 +120,8 @@ function createApiClient(serverUrl, options = "") {
       );
     }
   }
-  async function deleteRequest(path2) {
-    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
+  async function deleteRequest(path3) {
+    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
     console.error(`[API] DELETE ${url}`);
     const headers = buildHeaders();
     const response = await fetch(url, {
@@ -135,8 +135,8 @@ function createApiClient(serverUrl, options = "") {
       );
     }
   }
-  async function putJson(path2, body) {
-    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
+  async function putJson(path3, body) {
+    const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
     console.error(`[API] PUT ${url}`);
     const response = await fetch(url, {
       method: "PUT",
@@ -432,10 +432,10 @@ var import_evalforge_types13 = require("@wix/evalforge-types");
 var import_eval_assertions = require("@wix/eval-assertions");
 // src/run-scenario/environment.ts
-var import_fs = require("fs");
+var import_fs2 = require("fs");
 var import_promises2 = require("fs/promises");
 var import_os = require("os");
-var import_path2 = __toESM(require("path"));
+var import_path3 = __toESM(require("path"));
 var import_evalforge_github_client = require("@wix/evalforge-github-client");
 // src/run-scenario/utils/write-files.ts
@@ -455,6 +455,84 @@ async function writeFilesToDirectory(targetDir, files) {
   }
 }
+// src/run-scenario/install-dependencies.ts
+var import_fs = require("fs");
+var import_crypto = require("crypto");
+var import_path2 = __toESM(require("path"));
+var import_child_process = require("child_process");
+function detectPackageManager(workDir) {
+  if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "pnpm-lock.yaml"))) {
+    return { cmd: "pnpm", args: ["install", "--frozen-lockfile"], cacheSourceFile: "pnpm-lock.yaml" };
+  }
+  if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "package-lock.json"))) {
+    return { cmd: "npm", args: ["ci"], cacheSourceFile: "package-lock.json" };
+  }
+  if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "yarn.lock"))) {
+    return { cmd: "yarn", args: ["install", "--frozen-lockfile"], cacheSourceFile: "yarn.lock" };
+  }
+  return { cmd: "npm", args: ["install", "--legacy-peer-deps", "--prefer-offline", "--no-fund", "--no-audit"], cacheSourceFile: "package.json" };
+}
+function cloneDirectory(src, dest) {
+  if (process.platform === "darwin") {
+    (0, import_child_process.execFileSync)("cp", ["-rc", src, dest]);
+  } else {
+    (0, import_fs.cpSync)(src, dest, { recursive: true });
+  }
+}
+function installWithCache(workDir, exec, cacheBase, pm) {
+  const sourceContent = (0, import_fs.readFileSync)(import_path2.default.join(workDir, pm.cacheSourceFile), "utf-8");
+  const cacheKey = (0, import_crypto.createHash)("sha256").update(sourceContent).digest("hex").slice(0, 16);
+  const cachedNodeModules = import_path2.default.join(cacheBase, cacheKey, "node_modules");
+  const targetNodeModules = import_path2.default.join(workDir, "node_modules");
+  const cacheDir = import_path2.default.dirname(cachedNodeModules);
+  const cachedYarnLock = import_path2.default.join(cacheDir, "yarn.lock");
+  if ((0, import_fs.existsSync)(cachedNodeModules)) {
+    console.log(`[environment] Restoring node_modules from cache (key: ${cacheKey})`);
+    if (!(0, import_fs.existsSync)(targetNodeModules)) {
+      cloneDirectory(cachedNodeModules, targetNodeModules);
+    }
+    if ((0, import_fs.existsSync)(cachedYarnLock)) {
+      (0, import_fs.copyFileSync)(cachedYarnLock, import_path2.default.join(workDir, "yarn.lock"));
+    }
+    return;
+  }
+  console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`);
+  try {
+    exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
+  } catch (err) {
+    console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
+    return;
+  }
+  console.log("[environment] Dependency installation complete \u2014 saving to cache");
+  try {
+    (0, import_fs.mkdirSync)(cacheDir, { recursive: true });
+    const yarnLockPath = import_path2.default.join(workDir, "yarn.lock");
+    if ((0, import_fs.existsSync)(yarnLockPath)) {
+      (0, import_fs.copyFileSync)(yarnLockPath, cachedYarnLock);
+    }
+    cloneDirectory(targetNodeModules, cachedNodeModules);
+  } catch (err) {
+    console.error("[environment] Failed to save to cache (installation still succeeded):", err instanceof Error ? err.message : String(err));
+  }
+}
+async function installDependencies(workDir, exec = import_child_process.execFileSync, cacheBase) {
+  if (!(0, import_fs.existsSync)(import_path2.default.join(workDir, "package.json"))) {
+    return;
+  }
+  const pm = detectPackageManager(workDir);
+  if (cacheBase) {
+    installWithCache(workDir, exec, cacheBase, pm);
+    return;
+  }
+  console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`);
+  try {
+    exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
+    console.log("[environment] Dependency installation complete");
+  } catch (err) {
+    console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
+  }
+}
 // src/run-scenario/environment.ts
 async function fetchAndWriteTemplateFiles(template, workDir) {
   let sourceFiles = [];
@@ -475,27 +553,27 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
       const content = ef.gitSource ? await (0, import_evalforge_github_client.fetchGitHubFile)(ef.gitSource, {
         userAgent: "EvalForge-Evaluator"
       }) : ef.content ?? "";
-      const dest = import_path2.default.resolve(workDir, ef.path);
-      if (!dest.startsWith(workDir + import_path2.sep)) {
+      const dest = import_path3.default.resolve(workDir, ef.path);
+      if (!dest.startsWith(workDir + import_path3.sep)) {
         throw new Error(
           `Extra file path escapes working directory: "${ef.path}"`
         );
       }
-      await (0, import_promises2.mkdir)(import_path2.default.dirname(dest), { recursive: true });
+      await (0, import_promises2.mkdir)(import_path3.default.dirname(dest), { recursive: true });
       await (0, import_promises2.writeFile)(dest, content, "utf8");
     })
   );
 }
 function writeWixEnvFile(workDir) {
-  const configPath = import_path2.default.join(workDir, "wix.config.json");
-  if (!(0, import_fs.existsSync)(configPath)) {
+  const configPath = import_path3.default.join(workDir, "wix.config.json");
+  if (!(0, import_fs2.existsSync)(configPath)) {
     return;
   }
   try {
-    const config = JSON.parse((0, import_fs.readFileSync)(configPath, "utf-8"));
+    const config = JSON.parse((0, import_fs2.readFileSync)(configPath, "utf-8"));
     if (config.appId) {
-      (0, import_fs.writeFileSync)(
-        import_path2.default.join(workDir, ".env"),
+      (0, import_fs2.writeFileSync)(
+        import_path3.default.join(workDir, ".env"),
         `WIX_CLIENT_ID=${config.appId}
 `,
         "utf-8"
@@ -507,34 +585,36 @@ function writeWixEnvFile(workDir) {
   }
 }
 async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
-  const baseDir = config.evaluationsDir ?? import_path2.default.join((0, import_os.tmpdir)(), "evalforge-evaluations");
+  const baseDir = config.evaluationsDir ?? import_path3.default.join((0, import_os.tmpdir)(), "evalforge-evaluations");
+  const nodeModulesCacheDir = import_path3.default.join(baseDir, "_node_modules_cache");
   if (template) {
     if (!config.evaluationsDir) {
       console.warn(
         "Template specified but EVALUATIONS_DIR not set, using temp directory"
       );
     }
-    const workDir2 = import_path2.default.join(baseDir, `${evalRunId2}_${targetId}`);
-    if ((0, import_fs.existsSync)(workDir2)) {
-      (0, import_fs.rmSync)(workDir2, { recursive: true });
+    const workDir2 = import_path3.default.join(baseDir, `${evalRunId2}_${targetId}`);
+    if ((0, import_fs2.existsSync)(workDir2)) {
+      (0, import_fs2.rmSync)(workDir2, { recursive: true });
     }
-    (0, import_fs.mkdirSync)(workDir2, { recursive: true });
+    (0, import_fs2.mkdirSync)(workDir2, { recursive: true });
     await fetchAndWriteTemplateFiles(template, workDir2);
     console.log(`Template files written to ${workDir2}`);
     writeWixEnvFile(workDir2);
+    await installDependencies(workDir2, void 0, nodeModulesCacheDir);
     return workDir2;
   }
-  const workDir = import_path2.default.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
-  if ((0, import_fs.existsSync)(workDir)) {
-    (0, import_fs.rmSync)(workDir, { recursive: true });
+  const workDir = import_path3.default.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
+  if ((0, import_fs2.existsSync)(workDir)) {
+    (0, import_fs2.rmSync)(workDir, { recursive: true });
   }
-  (0, import_fs.mkdirSync)(workDir, { recursive: true });
+  (0, import_fs2.mkdirSync)(workDir, { recursive: true });
   console.log(`Empty working directory created at ${workDir}`);
   return workDir;
 }
 // src/run-scenario/run-agent-with-context.ts
-var import_crypto4 = require("crypto");
+var import_crypto5 = require("crypto");
 // src/run-scenario/agents/registry.ts
 var AgentAdapterRegistry = class {
@@ -643,7 +723,7 @@ var import_evalforge_types4 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/write-skills.ts
 var import_promises3 = require("fs/promises");
-var import_path3 = require("path");
+var import_path4 = require("path");
 var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
 async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
   await Promise.all(
@@ -652,7 +732,7 @@ async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_g
 }
 async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
   const skillName = skill.name;
-  const skillDir = (0, import_path3.join)(cwd, ".claude", "skills", skillName);
+  const skillDir = (0, import_path4.join)(cwd, ".claude", "skills", skillName);
   await (0, import_promises3.mkdir)(skillDir, { recursive: true });
   const version = skill.latestVersion;
   if (version?.files && version.files.length > 0) {
@@ -692,18 +772,18 @@ function resolveTimeoutMs(maxTurns, maxDurationMs) {
 }
 // src/run-scenario/agents/claude-code/execute.ts
-var import_crypto = require("crypto");
+var import_crypto2 = require("crypto");
 // src/run-scenario/agents/claude-code/write-mcp.ts
 var import_promises5 = require("fs/promises");
-var import_path5 = require("path");
+var import_path6 = require("path");
 var import_evalforge_types2 = require("@wix/evalforge-types");
 // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
 var import_promises4 = require("fs/promises");
-var import_path4 = require("path");
+var import_path5 = require("path");
 var import_os2 = require("os");
-var WIX_AUTH_FILE = (0, import_path4.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
+var WIX_AUTH_FILE = (0, import_path5.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
 async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
   try {
     const content = await (0, import_promises4.readFile)(authFilePath, "utf-8");
@@ -762,14 +842,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
     null,
     2
   );
-  const filePath = (0, import_path5.join)(cwd, ".mcp.json");
+  const filePath = (0, import_path6.join)(cwd, ".mcp.json");
   await (0, import_promises5.writeFile)(filePath, content, "utf8");
   console.log(`[MCP] Written to ${filePath}`);
 }
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
 var import_promises6 = require("fs/promises");
-var import_path6 = require("path");
+var import_path7 = require("path");
 var import_evalforge_github_client3 = require("@wix/evalforge-github-client");
 var AGENTS_DIR = ".claude/agents";
 function toAgentFilename(name, index, nameCount) {
@@ -807,12 +887,12 @@ async function resolveSubAgentContent(agent, fetchFn) {
 }
 async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = import_evalforge_github_client3.fetchGitHubFile) {
   if (subAgents.length === 0) return;
-  const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
+  const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
   await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
   const nameCount = /* @__PURE__ */ new Map();
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename(agent.name, i, nameCount);
-    const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
+    const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
     const content = await resolveSubAgentContent(agent, fetchFn);
     await (0, import_promises6.writeFile)(filePath, content, "utf8");
   }
@@ -821,7 +901,7 @@ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = import_evalf
 // src/run-scenario/agents/claude-code/write-rules.ts
 var import_promises7 = require("fs/promises");
-var import_path7 = require("path");
+var import_path8 = require("path");
 var CURSOR_RULES_DIR = ".cursor/rules";
 function toRuleFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -854,9 +934,9 @@ function validateGenericDirectory(dir, cwd) {
       `Generic rule directory may not contain "..", got: "${dir}"`
     );
   }
-  const normalizedCwd = cwd.endsWith(import_path7.sep) ? cwd.slice(0, -1) : cwd;
-  const resolved = (0, import_path7.resolve)(normalizedCwd, trimmed);
-  if (!resolved.startsWith(normalizedCwd + import_path7.sep)) {
+  const normalizedCwd = cwd.endsWith(import_path8.sep) ? cwd.slice(0, -1) : cwd;
+  const resolved = (0, import_path8.resolve)(normalizedCwd, trimmed);
+  if (!resolved.startsWith(normalizedCwd + import_path8.sep)) {
     throw new Error(
       `Generic rule directory escapes the working directory: "${dir}"`
     );
@@ -870,20 +950,20 @@ async function writeRulesToFilesystem(cwd, rules) {
   for (const [i, rule] of rules.entries()) {
     switch (rule.ruleType) {
       case "claude-md": {
-        await appendToFile((0, import_path7.join)(cwd, "CLAUDE.md"), rule.content);
+        await appendToFile((0, import_path8.join)(cwd, "CLAUDE.md"), rule.content);
         break;
       }
       case "agents-md": {
-        await appendToFile((0, import_path7.join)(cwd, "AGENTS.md"), rule.content);
+        await appendToFile((0, import_path8.join)(cwd, "AGENTS.md"), rule.content);
         break;
       }
       case "cursor-rule": {
         if (!hasCursorRules) {
-          await (0, import_promises7.mkdir)((0, import_path7.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
+          await (0, import_promises7.mkdir)((0, import_path8.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
           hasCursorRules = true;
         }
         const filename = toRuleFilename(rule.name, i, nameCount);
-        const filePath = (0, import_path7.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
+        const filePath = (0, import_path8.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
         await (0, import_promises7.writeFile)(filePath, rule.content, "utf8");
         break;
       }
@@ -892,10 +972,10 @@ async function writeRulesToFilesystem(cwd, rules) {
           rule.directory ?? ".opencode/rules",
           cwd
         );
-        const dirPath = (0, import_path7.join)(cwd, directory);
+        const dirPath = (0, import_path8.join)(cwd, directory);
         await (0, import_promises7.mkdir)(dirPath, { recursive: true });
         const filename = toRuleFilename(rule.name, i, nameCount);
-        await (0, import_promises7.writeFile)((0, import_path7.join)(dirPath, `${filename}.md`), rule.content, "utf8");
+        await (0, import_promises7.writeFile)((0, import_path8.join)(dirPath, `${filename}.md`), rule.content, "utf8");
         break;
       }
       default: {
@@ -1066,8 +1146,8 @@ function extractToolActionDescription(toolName, toolArgs) {
         }
       }
       if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
-        const path2 = args.path || args.directory || ".";
-        return `Listing: ${String(path2).slice(0, 50)}`;
+        const path3 = args.path || args.directory || ".";
+        return `Listing: ${String(path3).slice(0, 50)}`;
       }
       if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
         const filePath = String(
@@ -1950,7 +2030,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
     const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
     if (hasThinking && (hasText || toolCallCount > 0)) {
       subSteps.push({
-        id: (0, import_crypto.randomUUID)(),
+        id: (0, import_crypto2.randomUUID)(),
         stepNumber: 0,
         // renumbered below
         turnIndex,
@@ -1980,7 +2060,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
         const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
         subSteps.push({
-          id: (0, import_crypto.randomUUID)(),
+          id: (0, import_crypto2.randomUUID)(),
           stepNumber: 0,
           turnIndex,
           type: import_evalforge_types4.LLMStepType.TOOL_USE,
@@ -2010,7 +2090,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
     }
     if (hasText && toolCallCount > 0) {
       subSteps.push({
-        id: (0, import_crypto.randomUUID)(),
+        id: (0, import_crypto2.randomUUID)(),
         stepNumber: 0,
         turnIndex,
         type: import_evalforge_types4.LLMStepType.COMPLETION,
@@ -2032,7 +2112,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
     if (subSteps.length === 0) {
       const stepType = hasThinking && !hasText ? import_evalforge_types4.LLMStepType.THINKING : import_evalforge_types4.LLMStepType.COMPLETION;
       subSteps.push({
-        id: (0, import_crypto.randomUUID)(),
+        id: (0, import_crypto2.randomUUID)(),
         stepNumber: 0,
         turnIndex,
         type: stepType,
@@ -2090,7 +2170,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
     stepTypeBreakdown
   };
   return {
-    id: (0, import_crypto.randomUUID)(),
+    id: (0, import_crypto2.randomUUID)(),
     steps: traceSteps,
     summary
   };
@@ -2192,7 +2272,7 @@ defaultRegistry.register(claudeCodeAdapter);
 var import_evalforge_types9 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/execute.ts
-var import_child_process = require("child_process");
+var import_child_process2 = require("child_process");
 var import_evalforge_types8 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/types.ts
@@ -2206,7 +2286,7 @@ function tryParseJson(text) {
 // src/run-scenario/agents/opencode/write-skills.ts
 var import_promises8 = require("fs/promises");
-var import_path8 = require("path");
+var import_path9 = require("path");
 var import_evalforge_github_client4 = require("@wix/evalforge-github-client");
 async function writeSkillsToFilesystem2(cwd, skills, fetchFn = import_evalforge_github_client4.fetchGitHubFolder) {
   await Promise.all(
@@ -2215,7 +2295,7 @@ async function writeSkillsToFilesystem2(cwd, skills, fetchFn = import_evalforge_
 }
 async function writeSkillToFilesystem2(cwd, skill, fetchFn) {
   const skillName = skill.name;
-  const skillDir = (0, import_path8.join)(cwd, ".opencode", "skills", skillName);
+  const skillDir = (0, import_path9.join)(cwd, ".opencode", "skills", skillName);
   await (0, import_promises8.mkdir)(skillDir, { recursive: true });
   const version = skill.latestVersion;
   if (version?.files && version.files.length > 0) {
@@ -2248,7 +2328,7 @@ async function writeSkillToFilesystem2(cwd, skill, fetchFn) {
 // src/run-scenario/agents/opencode/write-sub-agents.ts
 var import_promises9 = require("fs/promises");
-var import_path9 = require("path");
+var import_path10 = require("path");
 var import_evalforge_github_client5 = require("@wix/evalforge-github-client");
 var AGENTS_DIR2 = ".opencode/agents";
 function toAgentFilename2(name, index, nameCount) {
@@ -2286,12 +2366,12 @@ async function resolveSubAgentContent2(agent, fetchFn) {
 }
 async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = import_evalforge_github_client5.fetchGitHubFile) {
   if (subAgents.length === 0) return;
-  const agentsDir = (0, import_path9.join)(cwd, AGENTS_DIR2);
+  const agentsDir = (0, import_path10.join)(cwd, AGENTS_DIR2);
   await (0, import_promises9.mkdir)(agentsDir, { recursive: true });
   const nameCount = /* @__PURE__ */ new Map();
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename2(agent.name, i, nameCount);
-    const filePath = (0, import_path9.join)(agentsDir, `${filename}.md`);
+    const filePath = (0, import_path10.join)(agentsDir, `${filename}.md`);
     const content = await resolveSubAgentContent2(agent, fetchFn);
     await (0, import_promises9.writeFile)(filePath, content, "utf8");
   }
@@ -2454,7 +2534,7 @@ async function buildOpenCodeEnv(options) {
 // src/run-scenario/agents/opencode/build-trace.ts
 var import_evalforge_types7 = require("@wix/evalforge-types");
-var import_crypto2 = require("crypto");
+var import_crypto3 = require("crypto");
 function toCanonicalModelId(modelId) {
   const slashIndex = modelId.indexOf("/");
   return slashIndex > 0 ? modelId.slice(slashIndex + 1) : modelId;
@@ -2530,7 +2610,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     const totalSubSteps = thinkingSubSteps + toolSubSteps + textSubSteps || 1;
     if (hasThinking && (hasText || toolCallCount > 0)) {
       subSteps.push({
-        id: (0, import_crypto2.randomUUID)(),
+        id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
         turnIndex,
         type: import_evalforge_types7.LLMStepType.THINKING,
@@ -2559,7 +2639,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
         const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
         const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
         subSteps.push({
-          id: (0, import_crypto2.randomUUID)(),
+          id: (0, import_crypto3.randomUUID)(),
           stepNumber: 0,
           turnIndex,
           type: import_evalforge_types7.LLMStepType.TOOL_USE,
@@ -2589,7 +2669,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     }
     if (hasText && toolCallCount > 0) {
       subSteps.push({
-        id: (0, import_crypto2.randomUUID)(),
+        id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
         turnIndex,
         type: import_evalforge_types7.LLMStepType.COMPLETION,
@@ -2611,7 +2691,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     if (subSteps.length === 0) {
       const stepType = hasThinking && !hasText ? import_evalforge_types7.LLMStepType.THINKING : import_evalforge_types7.LLMStepType.COMPLETION;
       subSteps.push({
-        id: (0, import_crypto2.randomUUID)(),
+        id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
         turnIndex,
         type: stepType,
@@ -2680,7 +2760,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
     stepTypeBreakdown
   };
   return {
-    id: (0, import_crypto2.randomUUID)(),
+    id: (0, import_crypto3.randomUUID)(),
     steps: allSteps,
     summary
   };
@@ -2755,7 +2835,7 @@ function buildConversation2(timestampedEvents) {
 // src/run-scenario/agents/opencode/execute.ts
 var import_promises10 = require("fs/promises");
-var import_path10 = require("path");
+var import_path11 = require("path");
 var KILL_GRACE_PERIOD_MS = 5e3;
 var IDLE_TIMEOUT_MS = 12e4;
 var IDLE_CHECK_INTERVAL_MS = 15e3;
@@ -2780,14 +2860,14 @@ function extractToolAction(toolName, args) {
   return `Using ${toolName}...`;
 }
 async function writePromptImages(cwd, images) {
-  const imagesDir = (0, import_path10.join)(cwd, "prompt-images");
+  const imagesDir = (0, import_path11.join)(cwd, "prompt-images");
   await (0, import_promises10.mkdir)(imagesDir, { recursive: true });
   const filePaths = [];
   for (let i = 0; i < images.length; i++) {
     const img = images[i];
     const ext = img.mediaType.split("/")[1] || "png";
     const filename = `image-${i}.${ext}`;
-    const filepath = (0, import_path10.join)(imagesDir, filename);
+    const filepath = (0, import_path11.join)(imagesDir, filename);
     const buffer = Buffer.from(img.base64, "base64");
     await (0, import_promises10.writeFile)(filepath, buffer);
     filePaths.push(filepath);
@@ -2880,10 +2960,10 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
   }
 }
 async function writeSystemPromptRule(cwd, systemPrompt) {
-  const rulesDir = (0, import_path10.join)(cwd, ".opencode", "rules");
+  const rulesDir = (0, import_path11.join)(cwd, ".opencode", "rules");
   await (0, import_promises10.mkdir)(rulesDir, { recursive: true });
   await (0, import_promises10.writeFile)(
-    (0, import_path10.join)(rulesDir, "evalforge-system-prompt.md"),
+    (0, import_path11.join)(rulesDir, "evalforge-system-prompt.md"),
     systemPrompt,
     "utf-8"
   );
@@ -2986,7 +3066,7 @@ function spawnOpenCodeProcess(opts) {
     };
     let child;
     try {
-      child = (0, import_child_process.spawn)("opencode", args, {
+      child = (0, import_child_process2.spawn)("opencode", args, {
         cwd,
         env,
         stdio: ["ignore", "pipe", "pipe"],
@@ -3471,7 +3551,7 @@ var import_anthropic = require("@ai-sdk/anthropic");
 var import_google = require("@ai-sdk/google");
 var import_openai = require("@ai-sdk/openai");
 var import_evalforge_types11 = require("@wix/evalforge-types");
-var import_crypto3 = require("crypto");
+var import_crypto4 = require("crypto");
 // src/run-scenario/agents/simple-agent/mcp-tools.ts
 var import_mcp = require("@ai-sdk/mcp");
@@ -4087,7 +4167,7 @@ function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, e
     const costUsd = calculateStepCost(step, modelId, provider, tokenUsage);
     const toolResultError = findToolResultError(step);
     return {
-      id: (0, import_crypto3.randomUUID)(),
+      id: (0, import_crypto4.randomUUID)(),
       stepNumber: i + 1,
       turnIndex: i,
       type: step.toolCalls.length > 0 ? import_evalforge_types11.LLMStepType.TOOL_USE : import_evalforge_types11.LLMStepType.COMPLETION,
@@ -4111,7 +4191,7 @@ function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, e
     total: totalUsage.totalTokens
   };
   return {
-    id: (0, import_crypto3.randomUUID)(),
+    id: (0, import_crypto4.randomUUID)(),
     steps: traceSteps,
     summary: {
       totalSteps: traceSteps.length,
@@ -4186,8 +4266,8 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
 defaultRegistry.register(simpleAgentAdapter);
 // src/run-scenario/file-diff.ts
-var import_fs2 = require("fs");
-var import_path11 = require("path");
+var import_fs3 = require("fs");
+var import_path12 = require("path");
 // ../../node_modules/diff/lib/index.mjs
 function Diff() {
@@ -4291,11 +4371,11 @@ Diff.prototype = {
       }
     }
   },
-  addToPath: function addToPath(path2, added, removed, oldPosInc, options) {
-    var last = path2.lastComponent;
+  addToPath: function addToPath(path3, added, removed, oldPosInc, options) {
+    var last = path3.lastComponent;
     if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
       return {
-        oldPos: path2.oldPos + oldPosInc,
+        oldPos: path3.oldPos + oldPosInc,
         lastComponent: {
           count: last.count + 1,
           added,
@@ -4305,7 +4385,7 @@ Diff.prototype = {
       };
     } else {
       return {
-        oldPos: path2.oldPos + oldPosInc,
+        oldPos: path3.oldPos + oldPosInc,
         lastComponent: {
           count: 1,
           added,
@@ -4745,9 +4825,9 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
 // src/run-scenario/file-diff.ts
 function deriveInfrastructurePaths(prePrep, postPrep) {
   const infraPaths = /* @__PURE__ */ new Set();
-  for (const path2 of Object.keys(postPrep)) {
-    if (prePrep[path2] === void 0 || prePrep[path2] !== postPrep[path2]) {
-      infraPaths.add(path2);
+  for (const path3 of Object.keys(postPrep)) {
+    if (prePrep[path3] === void 0 || prePrep[path3] !== postPrep[path3]) {
+      infraPaths.add(path3);
     }
   }
   return infraPaths;
@@ -4807,13 +4887,13 @@ function isBinaryFile(filename) {
 function snapshotDirectory(dir, baseDir) {
   const snapshot = {};
   const base = baseDir || dir;
-  if (!(0, import_fs2.existsSync)(dir)) {
+  if (!(0, import_fs3.existsSync)(dir)) {
     return snapshot;
   }
-  const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
+  const entries = (0, import_fs3.readdirSync)(dir, { withFileTypes: true });
   for (const entry of entries) {
-    const fullPath = (0, import_path11.join)(dir, entry.name);
-    const relativePath = (0, import_path11.relative)(base, fullPath);
+    const fullPath = (0, import_path12.join)(dir, entry.name);
+    const relativePath = (0, import_path12.relative)(base, fullPath);
     if (shouldIgnore(entry.name)) {
       continue;
     }
@@ -4825,11 +4905,11 @@ function snapshotDirectory(dir, baseDir) {
         continue;
       }
       try {
-        const stats = (0, import_fs2.statSync)(fullPath);
+        const stats = (0, import_fs3.statSync)(fullPath);
         if (stats.size > MAX_FILE_SIZE) {
           continue;
         }
-        const content = (0, import_fs2.readFileSync)(fullPath, "utf-8");
+        const content = (0, import_fs3.readFileSync)(fullPath, "utf-8");
         snapshot[relativePath] = content;
       } catch {
         continue;
@@ -4858,19 +4938,19 @@ function generateDiffLines(before, after) {
 function diffSnapshots(before, after, infrastructurePaths) {
   const diffs = [];
   const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
-  for (const path2 of allPaths) {
-    const beforeContent = before[path2] ?? "";
-    const afterContent = after[path2] ?? "";
-    if (before[path2] !== void 0 && beforeContent === afterContent) {
+  for (const path3 of allPaths) {
+    const beforeContent = before[path3] ?? "";
+    const afterContent = after[path3] ?? "";
+    if (before[path3] !== void 0 && beforeContent === afterContent) {
       continue;
     }
     const diffLines2 = generateDiffLines(beforeContent, afterContent);
     diffs.push({
-      path: path2,
+      path: path3,
       expected: beforeContent,
       actual: afterContent,
       diffLines: diffLines2,
-      ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
+      ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
     });
   }
   const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
@@ -4897,9 +4977,9 @@ function diffSnapshots(before, after, infrastructurePaths) {
 function extractTemplateFiles(before, after, infrastructurePaths) {
   const files = [];
   const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
-  for (const path2 of allPaths) {
-    const beforeContent = before[path2];
-    const afterContent = after[path2];
+  for (const path3 of allPaths) {
+    const beforeContent = before[path3];
+    const afterContent = after[path3];
     if (afterContent === void 0) {
       continue;
     }
@@ -4912,10 +4992,10 @@ function extractTemplateFiles(before, after, infrastructurePaths) {
       status = "unchanged";
     }
     files.push({
-      path: path2,
+      path: path3,
       content: afterContent,
       status,
-      ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
+      ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
     });
   }
   files.sort((a, b) => a.path.localeCompare(b.path));
@@ -4999,7 +5079,7 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
     }
   } : agent?.modelConfig ?? (llmTrace?.summary.modelsUsed?.[0] ? { model: llmTrace.summary.modelsUsed[0] } : void 0);
   return {
-    id: (0, import_crypto4.randomUUID)(),
+    id: (0, import_crypto5.randomUUID)(),
     targetId,
     targetName,
     scenarioId: scenario.id,
@@ -5020,6 +5100,24 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
 // src/run-scenario/index.ts
 async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
   const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
+  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
+  if (template) {
+    console.log(
+      (0, import_evalforge_types13.formatTraceEventLine)({
+        evalRunId: evalRunId2,
+        scenarioId: scenario.id,
+        scenarioName: scenario.name,
+        targetId,
+        targetName,
+        stepNumber: 0,
+        type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
+        outputPreview: "Setting up environment (installing dependencies)...",
+        elapsedMs: 0,
+        timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+        isComplete: false
+      })
+    );
+  }
   const workDir = await prepareWorkingDirectory(
     config,
     evalRunId2,
@@ -5083,7 +5181,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
 }
 // src/evaluation-loop.ts
-var import_crypto5 = require("crypto");
+var import_crypto6 = require("crypto");
 async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
   const runsPerScenario = evalData.evalRun.runsPerScenario ?? 1;
   let completedExecutions = 0;
@@ -5109,7 +5207,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
           `[Evaluator] Scenario iteration failed, recording as error result: "${scenario.name}"${iterLabel} \u2014 ${errorMsg}`
         );
         const errorResult = {
-          id: (0, import_crypto5.randomUUID)(),
+          id: (0, import_crypto6.randomUUID)(),
           targetId,
           targetName,
           scenarioId: scenario.id,