npm - @wix/evalforge-evaluator - Versions diffs - 0.182.0 → 0.184.0 - Mend

@wix/evalforge-evaluator 0.182.0 → 0.184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/build/index.js CHANGED Viewed

@@ -5226,7 +5226,7 @@ var require_index_node = __commonJS({
 });
 // src/index.ts
-var import_evalforge_types15 = require("@wix/evalforge-types");
+var import_evalforge_types16 = require("@wix/evalforge-types");
 // src/config.ts
 function loadConfig() {
@@ -7115,7 +7115,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
 }
 // src/run-scenario/index.ts
-var import_evalforge_types13 = require("@wix/evalforge-types");
+var import_evalforge_types14 = require("@wix/evalforge-types");
 var import_eval_assertions = require("@wix/eval-assertions");
 // src/run-scenario/environment.ts
@@ -7451,50 +7451,122 @@ function getAdapter(identifier) {
 }
 // src/run-scenario/agents/claude-code/claude-code-adapter.ts
-var import_evalforge_types5 = require("@wix/evalforge-types");
+var import_evalforge_types6 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/execute.ts
-var import_evalforge_types4 = require("@wix/evalforge-types");
+var import_evalforge_types5 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/write-skills.ts
 var import_promises3 = require("fs/promises");
 var import_path4 = require("path");
+// src/run-scenario/agents/shared/resolve-capability-content.ts
 var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
-async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
+var import_evalforge_types2 = require("@wix/evalforge-types");
+var USER_AGENT = "EvalForge-Evaluator";
+async function resolveSkillFiles(skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
+  const version = skill.latestVersion;
+  if (version?.files && version.files.length > 0) {
+    console.log(
+      `[Skill] ${skill.name}: using ${version.files.length} file(s) from snapshot`
+    );
+    return version.files;
+  }
+  if (skill.source) {
+    const files = await fetchFn(skill.source, { userAgent: USER_AGENT });
+    console.log(
+      `[Skill] ${skill.name}: fetched ${files.length} file(s) from GitHub (live)`
+    );
+    return files;
+  }
+  throw new Error(`Skill ${skill.name} has no files and no source configured`);
+}
+async function fetchSourceFile(label, noun, name, source, fetchFn) {
+  try {
+    const content = await fetchFn(source, { userAgent: USER_AGENT });
+    console.log(
+      `[${label}] Fetched "${name}" from ${source.owner}/${source.repo}/${source.path}@${source.ref}`
+    );
+    return content;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Unknown error";
+    console.error(`[${label}] "${name}": GitHub fetch failed: ${message}`);
+    throw new Error(
+      `Failed to fetch ${noun} "${name}" from GitHub: ${message}`
+    );
+  }
+}
+async function resolveSubAgentMd(agent, fetchFn = import_evalforge_github_client2.fetchGitHubFile) {
+  if (agent.source) {
+    return fetchSourceFile(
+      "SubAgents",
+      "sub-agent",
+      agent.name,
+      agent.source,
+      fetchFn
+    );
+  }
+  if (!agent.subAgentMd) {
+    console.warn(
+      `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
+    );
+  }
+  return agent.subAgentMd;
+}
+async function resolveRuleText(rule, fetchFn = import_evalforge_github_client2.fetchGitHubFile) {
+  if (!rule.source) {
+    return rule.content;
+  }
+  return fetchSourceFile("Rules", "rule", rule.name, rule.source, fetchFn);
+}
+async function resolveMcpConfig(mcp, fetchFn = import_evalforge_github_client2.fetchGitHubFile) {
+  if (!mcp.source) {
+    return mcp.config;
+  }
+  const raw = await fetchSourceFile(
+    "MCP",
+    "MCP",
+    mcp.name,
+    mcp.source,
+    fetchFn
+  );
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `MCP "${mcp.name}" GitHub source is not valid JSON: ${message}`
+    );
+  }
+  if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
+    throw new Error(`MCP "${mcp.name}" GitHub source must be a JSON object`);
+  }
+  const obj = parsed;
+  const servers = obj[import_evalforge_types2.MCP_SERVERS_JSON_KEY];
+  if (typeof servers === "object" && servers !== null && !Array.isArray(servers)) {
+    return servers;
+  }
+  return obj;
+}
+// src/run-scenario/agents/claude-code/write-skills.ts
+async function writeSkillsToFilesystem(cwd, skills, fetchFn) {
   await Promise.all(
     skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
   );
 }
-async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
-  const skillName = skill.name;
-  const skillDir = (0, import_path4.join)(cwd, ".claude", "skills", skillName);
+async function writeSkillToFilesystem(cwd, skill, fetchFn) {
+  const skillDir = (0, import_path4.join)(cwd, ".claude", "skills", skill.name);
   await (0, import_promises3.mkdir)(skillDir, { recursive: true });
-  const version = skill.latestVersion;
-  if (version?.files && version.files.length > 0) {
-    await writeFilesToDirectory(skillDir, version.files);
-    console.log(
-      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
+  try {
+    const files = await resolveSkillFiles(skill, fetchFn);
+    await writeFilesToDirectory(skillDir, files);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to write skill ${skill.name} to filesystem: ${message}`
     );
-  } else if (skill.source) {
-    try {
-      const files = await fetchFn(skill.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      await writeFilesToDirectory(skillDir, files);
-      console.log(
-        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
-      );
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
-      );
-      throw new Error(
-        `Failed to write skill ${skillName} to filesystem: ${message}`
-      );
-    }
-  } else {
-    throw new Error(`Skill ${skillName} has no files and no source configured`);
   }
 }
@@ -7512,7 +7584,7 @@ var import_crypto2 = require("crypto");
 // src/run-scenario/agents/claude-code/write-mcp.ts
 var import_promises5 = require("fs/promises");
 var import_path6 = require("path");
-var import_evalforge_types2 = require("@wix/evalforge-types");
+var import_evalforge_types3 = require("@wix/evalforge-types");
 // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
 var import_promises4 = require("fs/promises");
@@ -7557,11 +7629,11 @@ async function resolveMcpPlaceholders(mcpServers, options = {}) {
 }
 // src/run-scenario/agents/claude-code/write-mcp.ts
-async function writeMcpToFilesystem(cwd, mcps) {
+async function writeMcpToFilesystem(cwd, mcps, fetchFn) {
   if (mcps.length === 0) return;
   const mcpServers = {};
   for (const mcp of mcps) {
-    const config = mcp.config;
+    const config = await resolveMcpConfig(mcp, fetchFn);
     for (const [key, value] of Object.entries(config)) {
       if (typeof value !== "object" || value === null || Array.isArray(value)) {
         throw new Error(
@@ -7573,7 +7645,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
   }
   const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
   const content = JSON.stringify(
-    { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: resolvedServers },
+    { [import_evalforge_types3.MCP_SERVERS_JSON_KEY]: resolvedServers },
     null,
     2
   );
@@ -7585,7 +7657,6 @@ async function writeMcpToFilesystem(cwd, mcps) {
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
 var import_promises6 = require("fs/promises");
 var import_path7 = require("path");
-var import_evalforge_github_client3 = require("@wix/evalforge-github-client");
 var AGENTS_DIR = ".claude/agents";
 function toAgentFilename(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -7593,34 +7664,7 @@ function toAgentFilename(name, index, nameCount) {
   nameCount.set(base, count + 1);
   return count === 0 ? base : `${base}-${count + 1}`;
 }
-async function resolveSubAgentContent(agent, fetchFn) {
-  if (agent.source) {
-    try {
-      const content = await fetchFn(agent.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      console.log(
-        `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
-      );
-      return content;
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
-      );
-      throw new Error(
-        `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
-      );
-    }
-  }
-  if (!agent.subAgentMd) {
-    console.warn(
-      `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
-    );
-  }
-  return agent.subAgentMd;
-}
-async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = import_evalforge_github_client3.fetchGitHubFile) {
+async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn) {
   if (subAgents.length === 0) return;
   const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
   await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
@@ -7628,7 +7672,7 @@ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = import_evalf
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename(agent.name, i, nameCount);
     const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
-    const content = await resolveSubAgentContent(agent, fetchFn);
+    const content = await resolveSubAgentMd(agent, fetchFn);
     await (0, import_promises6.writeFile)(filePath, content, "utf8");
   }
   console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -7678,18 +7722,19 @@ function validateGenericDirectory(dir, cwd) {
   }
   return trimmed;
 }
-async function writeRulesToFilesystem(cwd, rules) {
+async function writeRulesToFilesystem(cwd, rules, fetchFn) {
   if (rules.length === 0) return;
   const nameCount = /* @__PURE__ */ new Map();
   let hasCursorRules = false;
   for (const [i, rule] of rules.entries()) {
+    const content = await resolveRuleText(rule, fetchFn);
     switch (rule.ruleType) {
       case "claude-md": {
-        await appendToFile((0, import_path8.join)(cwd, "CLAUDE.md"), rule.content);
+        await appendToFile((0, import_path8.join)(cwd, "CLAUDE.md"), content);
         break;
       }
       case "agents-md": {
-        await appendToFile((0, import_path8.join)(cwd, "AGENTS.md"), rule.content);
+        await appendToFile((0, import_path8.join)(cwd, "AGENTS.md"), content);
         break;
       }
       case "cursor-rule": {
@@ -7699,7 +7744,7 @@ async function writeRulesToFilesystem(cwd, rules) {
         }
         const filename = toRuleFilename(rule.name, i, nameCount);
         const filePath = (0, import_path8.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
-        await (0, import_promises7.writeFile)(filePath, rule.content, "utf8");
+        await (0, import_promises7.writeFile)(filePath, content, "utf8");
         break;
       }
       case "generic": {
@@ -7710,7 +7755,7 @@ async function writeRulesToFilesystem(cwd, rules) {
         const dirPath = (0, import_path8.join)(cwd, directory);
         await (0, import_promises7.mkdir)(dirPath, { recursive: true });
         const filename = toRuleFilename(rule.name, i, nameCount);
-        await (0, import_promises7.writeFile)((0, import_path8.join)(dirPath, `${filename}.md`), rule.content, "utf8");
+        await (0, import_promises7.writeFile)((0, import_path8.join)(dirPath, `${filename}.md`), content, "utf8");
         break;
       }
       default: {
@@ -7800,14 +7845,14 @@ function buildConversation(timestampedMessages) {
 }
 // src/run-scenario/agents/shared/trace-emit.ts
-var import_evalforge_types3 = require("@wix/evalforge-types");
+var import_evalforge_types4 = require("@wix/evalforge-types");
 function emitTraceEvent(event, pushEvent) {
-  console.log(`${import_evalforge_types3.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
+  console.log(`${import_evalforge_types4.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
   pushEvent?.(event);
 }
 // src/run-scenario/agents/claude-code/execute.ts
-var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
+var DEFAULT_MODEL = import_evalforge_types5.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
 async function* buildPromptStream(triggerPrompt, images) {
   yield {
     type: "user",
@@ -7872,7 +7917,7 @@ function extractToolActionDescription(toolName, toolArgs) {
   return `Using ${toolName}...`;
 }
 function createTraceEventFromMessage(message, context, stepNumber, isComplete) {
-  let type = import_evalforge_types4.LiveTraceEventType.COMPLETION;
+  let type = import_evalforge_types5.LiveTraceEventType.COMPLETION;
   let toolName;
   let toolArgs;
   let outputPreview;
@@ -7880,28 +7925,28 @@ function createTraceEventFromMessage(message, context, stepNumber, isComplete) {
   let thinking;
   for (const block of message.message.content) {
     if (block.type === "tool_use") {
-      type = import_evalforge_types4.LiveTraceEventType.TOOL_USE;
+      type = import_evalforge_types5.LiveTraceEventType.TOOL_USE;
       toolName = block.name;
       toolArgs = JSON.stringify(block.input).slice(0, 500);
       const input = block.input;
       if (input.file_path || input.path || input.target_file) {
         filePath = String(input.file_path || input.path || input.target_file);
         if (block.name === "Write" || block.name === "Edit" || block.name === "write" || block.name === "edit") {
-          type = import_evalforge_types4.LiveTraceEventType.FILE_WRITE;
+          type = import_evalforge_types5.LiveTraceEventType.FILE_WRITE;
         } else if (block.name === "Read" || block.name === "read" || block.name === "View") {
-          type = import_evalforge_types4.LiveTraceEventType.FILE_READ;
+          type = import_evalforge_types5.LiveTraceEventType.FILE_READ;
         }
       }
     } else if (block.type === "text") {
       outputPreview = block.text.slice(0, 500);
       if (!toolName) {
-        type = import_evalforge_types4.LiveTraceEventType.COMPLETION;
+        type = import_evalforge_types5.LiveTraceEventType.COMPLETION;
       }
     } else if (block.type === "thinking") {
       const thinkingBlock = block;
       thinking = thinkingBlock.thinking.slice(0, 500);
       if (!outputPreview && !toolName) {
-        type = import_evalforge_types4.LiveTraceEventType.THINKING;
+        type = import_evalforge_types5.LiveTraceEventType.THINKING;
       }
     }
   }
@@ -7967,7 +8012,7 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
     }
     return {
       ...baseEvent,
-      type: import_evalforge_types4.LiveTraceEventType.TOOL_RESULT,
+      type: import_evalforge_types5.LiveTraceEventType.TOOL_RESULT,
       outputPreview: outputPreview || "(tool result)"
     };
   }
@@ -7975,7 +8020,7 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
     const sysMsg = message;
     return {
       ...baseEvent,
-      type: import_evalforge_types4.LiveTraceEventType.SYSTEM,
+      type: import_evalforge_types5.LiveTraceEventType.SYSTEM,
       outputPreview: sysMsg.subtype || "system"
     };
   }
@@ -7984,7 +8029,7 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
   }
   return {
     ...baseEvent,
-    type: import_evalforge_types4.LiveTraceEventType.PROGRESS,
+    type: import_evalforge_types5.LiveTraceEventType.PROGRESS,
     outputPreview: `Message type: ${message.type}`
   };
 }
@@ -8086,7 +8131,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
     queryOptions.systemPrompt = {
       type: "preset",
       preset: "claude_code",
-      append: import_evalforge_types4.DEFAULT_EVALUATOR_SYSTEM_PROMPT
+      append: import_evalforge_types5.DEFAULT_EVALUATOR_SYSTEM_PROMPT
     };
   }
   if (options.temperature !== void 0) {
@@ -8121,7 +8166,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
       targetId: traceContext.targetId,
       targetName: traceContext.targetName,
       stepNumber: 0,
-      type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
+      type: import_evalforge_types5.LiveTraceEventType.DIAGNOSTIC,
       outputPreview: JSON.stringify({
         event: "pre-sdk-execution",
         model: queryOptions.model,
@@ -8185,7 +8230,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
           targetId: traceContext.targetId,
           targetName: traceContext.targetName,
           stepNumber: traceStepNumber,
-          type: import_evalforge_types4.LiveTraceEventType.PROGRESS,
+          type: import_evalforge_types5.LiveTraceEventType.PROGRESS,
           outputPreview: progressMessage,
           toolName: lastToolName,
           filePath: lastFilePath,
@@ -8222,18 +8267,18 @@ async function executeWithClaudeCode(skills, scenario, options) {
           if (traceEvent) {
             lastToolName = traceEvent.toolName;
             lastFilePath = traceEvent.filePath;
-            if (traceEvent.type === import_evalforge_types4.LiveTraceEventType.THINKING) {
+            if (traceEvent.type === import_evalforge_types5.LiveTraceEventType.THINKING) {
               lastAction = "Thinking...";
-            } else if (traceEvent.type === import_evalforge_types4.LiveTraceEventType.TOOL_USE) {
+            } else if (traceEvent.type === import_evalforge_types5.LiveTraceEventType.TOOL_USE) {
               lastAction = extractToolActionDescription(
                 traceEvent.toolName,
                 traceEvent.toolArgs
               );
-            } else if (traceEvent.type === import_evalforge_types4.LiveTraceEventType.FILE_WRITE) {
+            } else if (traceEvent.type === import_evalforge_types5.LiveTraceEventType.FILE_WRITE) {
               lastAction = `Writing: ${traceEvent.filePath || "file"}`;
-            } else if (traceEvent.type === import_evalforge_types4.LiveTraceEventType.FILE_READ) {
+            } else if (traceEvent.type === import_evalforge_types5.LiveTraceEventType.FILE_READ) {
               lastAction = `Reading: ${traceEvent.filePath || "file"}`;
-            } else if (traceEvent.type === import_evalforge_types4.LiveTraceEventType.COMPLETION) {
+            } else if (traceEvent.type === import_evalforge_types5.LiveTraceEventType.COMPLETION) {
               lastAction = "Processing response...";
             }
             emitTraceEvent(traceEvent, traceContext.pushEvent);
@@ -8411,7 +8456,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
         targetId: traceContext.targetId,
         targetName: traceContext.targetName,
         stepNumber: traceStepNumber + 1,
-        type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
+        type: import_evalforge_types5.LiveTraceEventType.DIAGNOSTIC,
         outputPreview: JSON.stringify(
           {
             event: "sdk-execution-failed",
@@ -8445,7 +8490,7 @@ Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
         targetId: traceContext.targetId,
         targetName: traceContext.targetName,
         stepNumber: traceStepNumber + 1,
-        type: import_evalforge_types4.LiveTraceEventType.COMPLETION,
+        type: import_evalforge_types5.LiveTraceEventType.COMPLETION,
         outputPreview: "Scenario execution completed",
         timestamp: (/* @__PURE__ */ new Date()).toISOString(),
         isComplete: true
@@ -8625,9 +8670,12 @@ function processMessages(timestampedMessages, startTime, endTime) {
       if (!step.toolCalls) continue;
       for (const tc of step.toolCalls) {
         if (tc.toolUseId && toolResultErrors.has(tc.toolUseId)) {
-          step.hasToolError = true;
-          step.toolErrorContent = toolResultErrors.get(tc.toolUseId);
-          break;
+          tc.isError = true;
+          tc.errorContent = toolResultErrors.get(tc.toolUseId);
+          if (!step.hasToolError) {
+            step.hasToolError = true;
+            step.toolErrorContent = tc.errorContent;
+          }
         }
       }
     }
@@ -8717,7 +8765,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         stepNumber: 0,
         // renumbered below
         turnIndex,
-        type: import_evalforge_types4.LLMStepType.THINKING,
+        type: import_evalforge_types5.LLMStepType.THINKING,
         model,
         provider: "anthropic",
         startedAt: step.startedAt.toISOString(),
@@ -8731,8 +8779,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         },
         costUsd: stepCost / totalSubSteps,
         outputPreview: step.thinking?.slice(0, 200),
-        success: isSuccess,
-        error: errorMsg
+        success: true,
+        error: void 0
       });
     }
     if (toolCallCount > 0) {
@@ -8742,11 +8790,13 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         const toolBudgetSteps = toolSubSteps + textSubSteps;
         const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
         const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
+        const toolSuccess = !tc.isError;
+        const toolError = tc.isError ? tc.errorContent ?? "Tool call failed" : void 0;
         subSteps.push({
           id: (0, import_crypto2.randomUUID)(),
           stepNumber: 0,
           turnIndex,
-          type: import_evalforge_types4.LLMStepType.TOOL_USE,
+          type: import_evalforge_types5.LLMStepType.TOOL_USE,
           model,
           provider: "anthropic",
           startedAt: step.startedAt.toISOString(),
@@ -8766,8 +8816,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
           toolName: tc.toolName,
           toolArguments: JSON.stringify(tc.args),
           outputPreview: tcIdx === 0 && !hasText ? (step.text || step.thinking)?.slice(0, 200) : void 0,
-          success: isSuccess,
-          error: errorMsg
+          success: toolSuccess,
+          error: toolError
         });
       }
     }
@@ -8776,7 +8826,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         id: (0, import_crypto2.randomUUID)(),
         stepNumber: 0,
         turnIndex,
-        type: import_evalforge_types4.LLMStepType.COMPLETION,
+        type: import_evalforge_types5.LLMStepType.COMPLETION,
         model,
         provider: "anthropic",
         startedAt: step.startedAt.toISOString(),
@@ -8788,12 +8838,12 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
         },
         costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
         outputPreview: step.text?.slice(0, 200),
-        success: isSuccess,
-        error: errorMsg
+        success: true,
+        error: void 0
       });
     }
     if (subSteps.length === 0) {
-      const stepType = hasThinking && !hasText ? import_evalforge_types4.LLMStepType.THINKING : import_evalforge_types4.LLMStepType.COMPLETION;
+      const stepType = hasThinking && !hasText ? import_evalforge_types5.LLMStepType.THINKING : import_evalforge_types5.LLMStepType.COMPLETION;
       subSteps.push({
         id: (0, import_crypto2.randomUUID)(),
         stepNumber: 0,
@@ -8863,7 +8913,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
 var ClaudeCodeAdapter = class {
   id = "claude-code";
   name = "Claude Code";
-  supportedCommands = [import_evalforge_types5.AgentRunCommand.CLAUDE];
+  supportedCommands = [import_evalforge_types6.AgentRunCommand.CLAUDE];
   /**
    * Write infrastructure files (settings, MCPs, sub-agents, rules, skills)
    * before the baseline snapshot is taken.
@@ -8895,9 +8945,9 @@ var ClaudeCodeAdapter = class {
       rules,
       systemPrompt
     } = context;
-    const typed = config ? import_evalforge_types5.ClaudeCodeConfigSchema.passthrough().safeParse(config) : void 0;
+    const typed = config ? import_evalforge_types6.ClaudeCodeConfigSchema.passthrough().safeParse(config) : void 0;
     const cfg = typed?.success ? typed.data : void 0;
-    const schemaKeys = new Set(Object.keys(import_evalforge_types5.ClaudeCodeConfigSchema.shape));
+    const schemaKeys = new Set(Object.keys(import_evalforge_types6.ClaudeCodeConfigSchema.shape));
     const extras = {};
     if (config) {
       for (const [key, value] of Object.entries(config)) {
@@ -8952,11 +9002,11 @@ var claudeCodeAdapter = new ClaudeCodeAdapter();
 defaultRegistry.register(claudeCodeAdapter);
 // src/run-scenario/agents/opencode/opencode-adapter.ts
-var import_evalforge_types9 = require("@wix/evalforge-types");
+var import_evalforge_types10 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/execute.ts
 var import_child_process2 = require("child_process");
-var import_evalforge_types8 = require("@wix/evalforge-types");
+var import_evalforge_types9 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/types.ts
 function tryParseJson(text) {
@@ -8970,49 +9020,28 @@ function tryParseJson(text) {
 // src/run-scenario/agents/opencode/write-skills.ts
 var import_promises8 = require("fs/promises");
 var import_path9 = require("path");
-var import_evalforge_github_client4 = require("@wix/evalforge-github-client");
-async function writeSkillsToFilesystem2(cwd, skills, fetchFn = import_evalforge_github_client4.fetchGitHubFolder) {
+async function writeSkillsToFilesystem2(cwd, skills, fetchFn) {
   await Promise.all(
     skills.map((skill) => writeSkillToFilesystem2(cwd, skill, fetchFn))
   );
 }
 async function writeSkillToFilesystem2(cwd, skill, fetchFn) {
-  const skillName = skill.name;
-  const skillDir = (0, import_path9.join)(cwd, ".opencode", "skills", skillName);
+  const skillDir = (0, import_path9.join)(cwd, ".opencode", "skills", skill.name);
   await (0, import_promises8.mkdir)(skillDir, { recursive: true });
-  const version = skill.latestVersion;
-  if (version?.files && version.files.length > 0) {
-    await writeFilesToDirectory(skillDir, version.files);
-    console.log(
-      `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
+  try {
+    const files = await resolveSkillFiles(skill, fetchFn);
+    await writeFilesToDirectory(skillDir, files);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to write skill ${skill.name} to filesystem: ${message}`
     );
-  } else if (skill.source) {
-    try {
-      const files = await fetchFn(skill.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      await writeFilesToDirectory(skillDir, files);
-      console.log(
-        `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
-      );
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
-      );
-      throw new Error(
-        `Failed to write skill ${skillName} to filesystem: ${message}`
-      );
-    }
-  } else {
-    throw new Error(`Skill ${skillName} has no files and no source configured`);
   }
 }
 // src/run-scenario/agents/opencode/write-sub-agents.ts
 var import_promises9 = require("fs/promises");
 var import_path10 = require("path");
-var import_evalforge_github_client5 = require("@wix/evalforge-github-client");
 var AGENTS_DIR2 = ".opencode/agents";
 function toAgentFilename2(name, index, nameCount) {
   const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -9020,34 +9049,7 @@ function toAgentFilename2(name, index, nameCount) {
   nameCount.set(base, count + 1);
   return count === 0 ? base : `${base}-${count + 1}`;
 }
-async function resolveSubAgentContent2(agent, fetchFn) {
-  if (agent.source) {
-    try {
-      const content = await fetchFn(agent.source, {
-        userAgent: "EvalForge-Evaluator"
-      });
-      console.log(
-        `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
-      );
-      return content;
-    } catch (error) {
-      const message = error instanceof Error ? error.message : "Unknown error";
-      console.error(
-        `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
-      );
-      throw new Error(
-        `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
-      );
-    }
-  }
-  if (!agent.subAgentMd) {
-    console.warn(
-      `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
-    );
-  }
-  return agent.subAgentMd;
-}
-async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = import_evalforge_github_client5.fetchGitHubFile) {
+async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn) {
   if (subAgents.length === 0) return;
   const agentsDir = (0, import_path10.join)(cwd, AGENTS_DIR2);
   await (0, import_promises9.mkdir)(agentsDir, { recursive: true });
@@ -9055,7 +9057,7 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = import_eval
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename2(agent.name, i, nameCount);
     const filePath = (0, import_path10.join)(agentsDir, `${filename}.md`);
-    const content = await resolveSubAgentContent2(agent, fetchFn);
+    const content = await resolveSubAgentMd(agent, fetchFn);
     await (0, import_promises9.writeFile)(filePath, content, "utf8");
   }
   console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -9063,8 +9065,8 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = import_eval
 // src/run-scenario/agents/opencode/config.ts
 var import_os3 = require("os");
-var import_evalforge_types6 = require("@wix/evalforge-types");
-var DEFAULT_MODEL2 = `${import_evalforge_types6.ClaudeModel.CLAUDE_4_5_SONNET_1_0}`;
+var import_evalforge_types7 = require("@wix/evalforge-types");
+var DEFAULT_MODEL2 = `${import_evalforge_types7.ClaudeModel.CLAUDE_4_5_SONNET_1_0}`;
 var OPENCODE_MODEL_ALIASES = {
   "claude-sonnet-4": "claude-sonnet-4-0",
   "claude-opus-4": "claude-opus-4-0"
@@ -9080,10 +9082,10 @@ function parseModel(model) {
     };
   }
   const modelID = OPENCODE_MODEL_ALIASES[model] ?? model;
-  const isOpenAI = import_evalforge_types6.AVAILABLE_OPENAI_MODEL_IDS.includes(
+  const isOpenAI = import_evalforge_types7.AVAILABLE_OPENAI_MODEL_IDS.includes(
     model
   );
-  const isGemini = import_evalforge_types6.AVAILABLE_GEMINI_MODEL_IDS.includes(
+  const isGemini = import_evalforge_types7.AVAILABLE_GEMINI_MODEL_IDS.includes(
     model
   );
   if (isGemini) return { providerID: "google", modelID };
@@ -9152,7 +9154,7 @@ async function buildOpenCodeEnv(options) {
   if (options.mcps && options.mcps.length > 0) {
     const mcpServers = {};
     for (const mcpEntity of options.mcps) {
-      const entityConfig = mcpEntity.config;
+      const entityConfig = await resolveMcpConfig(mcpEntity);
       for (const [key, value] of Object.entries(entityConfig)) {
         if (typeof value !== "object" || value === null || Array.isArray(value)) {
           throw new Error(
@@ -9177,7 +9179,7 @@ async function buildOpenCodeEnv(options) {
   if (options.maxTurns != null && options.maxTurns > 0) {
     agentOverrides.maxSteps = options.maxTurns;
   }
-  const parsed = options.config ? import_evalforge_types6.OpenCodeConfigSchema.passthrough().safeParse(options.config) : void 0;
+  const parsed = options.config ? import_evalforge_types7.OpenCodeConfigSchema.passthrough().safeParse(options.config) : void 0;
   const configPermission = parsed?.success ? parsed.data.permission : void 0;
   const defaultPermission = {
     "*": "allow"
@@ -9219,7 +9221,7 @@ async function buildOpenCodeEnv(options) {
 }
 // src/run-scenario/agents/opencode/build-trace.ts
-var import_evalforge_types7 = require("@wix/evalforge-types");
+var import_evalforge_types8 = require("@wix/evalforge-types");
 var import_crypto3 = require("crypto");
 function toCanonicalModelId(modelId) {
   const slashIndex = modelId.indexOf("/");
@@ -9299,7 +9301,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
         id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
         turnIndex,
-        type: import_evalforge_types7.LLMStepType.THINKING,
+        type: import_evalforge_types8.LLMStepType.THINKING,
         model: stepModel,
         provider: stepProvider,
         startedAt,
@@ -9328,7 +9330,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
           id: (0, import_crypto3.randomUUID)(),
           stepNumber: 0,
           turnIndex,
-          type: import_evalforge_types7.LLMStepType.TOOL_USE,
+          type: import_evalforge_types8.LLMStepType.TOOL_USE,
           model: stepModel,
           provider: stepProvider,
           startedAt,
@@ -9358,7 +9360,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
         id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
         turnIndex,
-        type: import_evalforge_types7.LLMStepType.COMPLETION,
+        type: import_evalforge_types8.LLMStepType.COMPLETION,
         model: stepModel,
         provider: stepProvider,
         startedAt,
@@ -9375,7 +9377,7 @@ function buildLLMTrace(timestampedEvents, totalDurationMs, model, provider, exec
       });
     }
     if (subSteps.length === 0) {
-      const stepType = hasThinking && !hasText ? import_evalforge_types7.LLMStepType.THINKING : import_evalforge_types7.LLMStepType.COMPLETION;
+      const stepType = hasThinking && !hasText ? import_evalforge_types8.LLMStepType.THINKING : import_evalforge_types8.LLMStepType.COMPLETION;
       subSteps.push({
         id: (0, import_crypto3.randomUUID)(),
         stepNumber: 0,
@@ -9576,14 +9578,14 @@ function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
       const te = evt;
       return {
         ...base,
-        type: import_evalforge_types8.LiveTraceEventType.COMPLETION,
+        type: import_evalforge_types9.LiveTraceEventType.COMPLETION,
         outputPreview: te.part.text.slice(0, 500)
       };
     }
     case "reasoning":
       return {
         ...base,
-        type: import_evalforge_types8.LiveTraceEventType.THINKING,
+        type: import_evalforge_types9.LiveTraceEventType.THINKING,
         thinking: evt.part.text.slice(0, 500)
       };
     case "tool_use": {
@@ -9591,15 +9593,15 @@ function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
       const toolName = tu.part.tool;
       const args = tu.part.state.input;
       const toolArgs = JSON.stringify(args).slice(0, 500);
-      let type = import_evalforge_types8.LiveTraceEventType.TOOL_USE;
+      let type = import_evalforge_types9.LiveTraceEventType.TOOL_USE;
       let filePath;
       if (args) {
         if (args.file_path || args.path || args.target_file) {
           filePath = String(args.file_path || args.path || args.target_file);
           if (/write|edit/i.test(toolName)) {
-            type = import_evalforge_types8.LiveTraceEventType.FILE_WRITE;
+            type = import_evalforge_types9.LiveTraceEventType.FILE_WRITE;
           } else if (/read|view/i.test(toolName)) {
-            type = import_evalforge_types8.LiveTraceEventType.FILE_READ;
+            type = import_evalforge_types9.LiveTraceEventType.FILE_READ;
           }
         }
       }
@@ -9608,7 +9610,7 @@ function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
     case "step_finish":
       return {
         ...base,
-        type: import_evalforge_types8.LiveTraceEventType.PROGRESS,
+        type: import_evalforge_types9.LiveTraceEventType.PROGRESS,
         outputPreview: "Step completed"
       };
     default:
@@ -9639,7 +9641,7 @@ async function prepareOpenCodeEnvironment(cwd, skills, options) {
   } else if (options.systemPrompt != null) {
     systemPrompt = options.systemPrompt;
   } else {
-    systemPrompt = import_evalforge_types8.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
+    systemPrompt = import_evalforge_types9.DEFAULT_EVALUATOR_SYSTEM_PROMPT;
   }
   if (systemPrompt) {
     await writeSystemPromptRule(cwd, systemPrompt);
@@ -9831,7 +9833,7 @@ function spawnOpenCodeProcess(opts) {
             targetId: traceContext.targetId,
             targetName: traceContext.targetName,
             stepNumber: traceStepNumber,
-            type: import_evalforge_types8.LiveTraceEventType.PROGRESS,
+            type: import_evalforge_types9.LiveTraceEventType.PROGRESS,
             outputPreview: progressMessage,
             toolName: lastToolName,
             filePath: lastFilePath,
@@ -9865,18 +9867,18 @@ function spawnOpenCodeProcess(opts) {
           if (traceEvt) {
             lastToolName = traceEvt.toolName;
             lastFilePath = traceEvt.filePath;
-            if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.THINKING) {
+            if (traceEvt.type === import_evalforge_types9.LiveTraceEventType.THINKING) {
               lastAction = "Thinking...";
-            } else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.TOOL_USE) {
+            } else if (traceEvt.type === import_evalforge_types9.LiveTraceEventType.TOOL_USE) {
               lastAction = extractToolAction(
                 traceEvt.toolName ?? "",
                 void 0
               );
-            } else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.FILE_WRITE) {
+            } else if (traceEvt.type === import_evalforge_types9.LiveTraceEventType.FILE_WRITE) {
               lastAction = `Writing: ${traceEvt.filePath || "file"}`;
-            } else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.FILE_READ) {
+            } else if (traceEvt.type === import_evalforge_types9.LiveTraceEventType.FILE_READ) {
               lastAction = `Reading: ${traceEvt.filePath || "file"}`;
-            } else if (traceEvt.type === import_evalforge_types8.LiveTraceEventType.COMPLETION) {
+            } else if (traceEvt.type === import_evalforge_types9.LiveTraceEventType.COMPLETION) {
               lastAction = "Processing response...";
             }
             emitTraceEvent(traceEvt, traceContext.pushEvent);
@@ -9958,7 +9960,7 @@ async function executeWithOpenCode(skills, scenario, options) {
         targetId: traceContext.targetId,
         targetName: traceContext.targetName,
         stepNumber: 0,
-        type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
+        type: import_evalforge_types9.LiveTraceEventType.DIAGNOSTIC,
         outputPreview: JSON.stringify({
           event: "pre-cli-execution",
           model: `${providerID}/${modelID}`,
@@ -10012,7 +10014,7 @@ async function executeWithOpenCode(skills, scenario, options) {
             targetId: traceContext.targetId,
             targetName: traceContext.targetName,
             stepNumber: traceStepNumber + 1,
-            type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
+            type: import_evalforge_types9.LiveTraceEventType.DIAGNOSTIC,
             outputPreview: JSON.stringify({
               event: "idle-timeout-retry",
               attempt,
@@ -10056,7 +10058,7 @@ async function executeWithOpenCode(skills, scenario, options) {
             targetId: traceContext.targetId,
             targetName: traceContext.targetName,
             stepNumber: traceStepNumber + 1,
-            type: import_evalforge_types8.LiveTraceEventType.DIAGNOSTIC,
+            type: import_evalforge_types9.LiveTraceEventType.DIAGNOSTIC,
             outputPreview: JSON.stringify({
               event: "cli-execution-failed",
               error: lastAttemptResult.error?.message ?? "Unknown error",
@@ -10111,7 +10113,7 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
         targetId: traceContext.targetId,
         targetName: traceContext.targetName,
         stepNumber: traceStepNumber + 1,
-        type: import_evalforge_types8.LiveTraceEventType.COMPLETION,
+        type: import_evalforge_types9.LiveTraceEventType.COMPLETION,
         outputPreview: "Scenario execution completed",
         timestamp: (/* @__PURE__ */ new Date()).toISOString(),
         isComplete: true
@@ -10148,7 +10150,7 @@ Attempt: ${attempt}, Events: ${accumulatedEvents.length}`
 var OpenCodeAdapter = class {
   id = "opencode";
   name = "OpenCode";
-  supportedCommands = [import_evalforge_types9.AgentRunCommand.OPENCODE];
+  supportedCommands = [import_evalforge_types10.AgentRunCommand.OPENCODE];
   async prepareEnvironment(context) {
     await prepareOpenCodeEnvironment(context.cwd, context.skills, {
       mcps: context.mcps,
@@ -10171,7 +10173,7 @@ var OpenCodeAdapter = class {
       rules,
       systemPrompt
     } = context;
-    const typed = config ? import_evalforge_types9.OpenCodeConfigSchema.passthrough().safeParse(config) : void 0;
+    const typed = config ? import_evalforge_types10.OpenCodeConfigSchema.passthrough().safeParse(config) : void 0;
     const cfg = typed?.success ? typed.data : void 0;
     const rawMaxTurns = cfg?.maxTurns;
     const maxTurns = rawMaxTurns === 0 ? void 0 : rawMaxTurns;
@@ -10221,7 +10223,7 @@ var import_ai = require("ai");
 var import_anthropic = require("@ai-sdk/anthropic");
 var import_google = require("@ai-sdk/google");
 var import_openai = require("@ai-sdk/openai");
-var import_evalforge_types11 = require("@wix/evalforge-types");
+var import_evalforge_types12 = require("@wix/evalforge-types");
 var import_crypto4 = require("crypto");
 // src/run-scenario/agents/simple-agent/mcp-tools.ts
@@ -10318,7 +10320,7 @@ function extractErrorText(content) {
 }
 // src/run-scenario/agents/simple-agent/cost-calculation.ts
-var import_evalforge_types10 = require("@wix/evalforge-types");
+var import_evalforge_types11 = require("@wix/evalforge-types");
 var PROVIDER_ANTHROPIC = "anthropic";
 var PROVIDER_GEMINI = "gemini";
 var MODEL_PRICING = {
@@ -10387,7 +10389,7 @@ function extractGatewayCost(step, provider) {
   }
 }
 function calculateFromPricing(modelId, tokenUsage) {
-  const normalized = (0, import_evalforge_types10.normalizeModelId)(modelId);
+  const normalized = (0, import_evalforge_types11.normalizeModelId)(modelId);
   const pricing = MODEL_PRICING[normalized] ?? MODEL_PRICING[Object.keys(MODEL_PRICING).find((key) => normalized.startsWith(key)) ?? ""];
   if (!pricing) return 0;
   return tokenUsage.prompt / 1e6 * pricing.input + tokenUsage.completion / 1e6 * pricing.output;
@@ -10480,7 +10482,7 @@ function createModel(modelId, baseUrl, headers) {
     apiKey: "proxy-auth",
     headers
   });
-  if ([...import_evalforge_types11.OPENAI_RESPONSES_MODEL_IDS].some(
+  if ([...import_evalforge_types12.OPENAI_RESPONSES_MODEL_IDS].some(
     (id) => modelId === id || modelId.startsWith(id)
   )) {
     return openai.responses(modelId);
@@ -10488,12 +10490,12 @@ function createModel(modelId, baseUrl, headers) {
   return openai.chat(modelId);
 }
 function isClaudeModelId(modelId) {
-  return import_evalforge_types11.AVAILABLE_CLAUDE_MODEL_IDS.some(
+  return import_evalforge_types12.AVAILABLE_CLAUDE_MODEL_IDS.some(
     (id) => modelId === id || modelId.startsWith(id)
   );
 }
 function isGeminiModelId(modelId) {
-  return import_evalforge_types11.AVAILABLE_GEMINI_MODEL_IDS.some(
+  return import_evalforge_types12.AVAILABLE_GEMINI_MODEL_IDS.some(
     (id) => modelId === id || modelId.startsWith(id)
   );
 }
@@ -10513,9 +10515,9 @@ async function executeWithAiSdk(context) {
     mcps,
     traceContext
   } = context;
-  const typed = config ? import_evalforge_types11.SimpleAgentConfigSchema.passthrough().safeParse(config) : void 0;
+  const typed = config ? import_evalforge_types12.SimpleAgentConfigSchema.passthrough().safeParse(config) : void 0;
   const cfg = typed?.success ? typed.data : void 0;
-  const schemaKeys = new Set(Object.keys(import_evalforge_types11.SimpleAgentConfigSchema.shape));
+  const schemaKeys = new Set(Object.keys(import_evalforge_types12.SimpleAgentConfigSchema.shape));
   const configExtras = {};
   if (config) {
     for (const [key, value] of Object.entries(config)) {
@@ -10552,11 +10554,11 @@ async function executeWithAiSdk(context) {
   }, SDK_TIMEOUT_MS);
   try {
     const isAnthropic = provider === PROVIDER_ANTHROPIC2;
-    const isResponsesAPI = [...import_evalforge_types11.OPENAI_RESPONSES_MODEL_IDS].some(
+    const isResponsesAPI = [...import_evalforge_types12.OPENAI_RESPONSES_MODEL_IDS].some(
       (id) => modelId === id || modelId.startsWith(id)
     );
     const isGemini = provider === PROVIDER_GEMINI2;
-    const isGeminiThinking = isGemini && import_evalforge_types11.GEMINI_THINKING_MODEL_IDS.has(modelId);
+    const isGeminiThinking = isGemini && import_evalforge_types12.GEMINI_THINKING_MODEL_IDS.has(modelId);
     const supportsThinking = isAnthropic || isResponsesAPI || isGeminiThinking;
     const thinkingBudgetTokens = cfg.thinkingBudgetTokens ?? 1e4;
     const reasoningEffort = cfg.reasoningEffort ?? "high";
@@ -10635,7 +10637,7 @@ async function executeWithAiSdk(context) {
               targetId: traceContext.targetId,
               targetName: traceContext.targetName,
               stepNumber: stepTimestamps.length,
-              type: isToolStep ? import_evalforge_types11.LiveTraceEventType.TOOL_USE : import_evalforge_types11.LiveTraceEventType.COMPLETION,
+              type: isToolStep ? import_evalforge_types12.LiveTraceEventType.TOOL_USE : import_evalforge_types12.LiveTraceEventType.COMPLETION,
               toolName: firstToolCall?.toolName,
               toolArgs: firstToolCall ? (JSON.stringify(firstToolCall.input) ?? "").slice(0, 500) : void 0,
               outputPreview: step.text?.slice(0, 500),
@@ -10840,7 +10842,7 @@ function buildLLMTrace2(steps, totalDurationMs, totalUsage, modelId, provider, e
       id: (0, import_crypto4.randomUUID)(),
       stepNumber: i + 1,
       turnIndex: i,
-      type: step.toolCalls.length > 0 ? import_evalforge_types11.LLMStepType.TOOL_USE : import_evalforge_types11.LLMStepType.COMPLETION,
+      type: step.toolCalls.length > 0 ? import_evalforge_types12.LLMStepType.TOOL_USE : import_evalforge_types12.LLMStepType.COMPLETION,
       model: modelId,
       provider,
       startedAt: new Date(stepStartedAt).toISOString(),
@@ -10890,7 +10892,7 @@ function emitStartEvent(traceContext, startTime) {
       targetId: traceContext.targetId,
       targetName: traceContext.targetName,
       stepNumber: 0,
-      type: import_evalforge_types11.LiveTraceEventType.PROGRESS,
+      type: import_evalforge_types12.LiveTraceEventType.PROGRESS,
       outputPreview: "Starting Simple Agent execution...",
       elapsedMs: Date.now() - startTime,
       timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -10908,7 +10910,7 @@ function emitCompletionEvent(traceContext, stepNumber) {
       targetId: traceContext.targetId,
       targetName: traceContext.targetName,
       stepNumber,
-      type: import_evalforge_types11.LiveTraceEventType.COMPLETION,
+      type: import_evalforge_types12.LiveTraceEventType.COMPLETION,
       outputPreview: "Scenario execution completed",
       timestamp: (/* @__PURE__ */ new Date()).toISOString(),
       isComplete: true
@@ -11678,11 +11680,11 @@ function substituteVariables(prompt, variables) {
 }
 // src/run-scenario/run-agent-with-context.ts
-var import_evalforge_types12 = require("@wix/evalforge-types");
-var DEFAULT_AGENT_COMMAND = import_evalforge_types12.AgentRunCommand.CLAUDE;
+var import_evalforge_types13 = require("@wix/evalforge-types");
+var DEFAULT_AGENT_COMMAND = import_evalforge_types13.AgentRunCommand.CLAUDE;
 async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir, pushEvent) {
   const agent = evalData.agent ?? void 0;
-  const isSDK = agent?.agentType === import_evalforge_types12.AgentType.SDK;
+  const isSDK = agent?.agentType === import_evalforge_types13.AgentType.SDK;
   const identifier = isSDK ? simpleAgentAdapter.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
   const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -11767,14 +11769,14 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
   const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
   if (template) {
     console.log(
-      (0, import_evalforge_types13.formatTraceEventLine)({
+      (0, import_evalforge_types14.formatTraceEventLine)({
         evalRunId: evalRunId2,
         scenarioId: scenario.id,
         scenarioName: scenario.name,
         targetId,
         targetName,
         stepNumber: 0,
-        type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
+        type: import_evalforge_types14.LiveTraceEventType.PROGRESS,
         outputPreview: "Setting up environment (installing dependencies)...",
         elapsedMs: 0,
         timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -11814,7 +11816,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     })),
     durationMs: partialResult.duration
   };
-  const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
+  const defaultJudgeModel = import_evalforge_types14.DEFAULT_JUDGE_MODEL;
   const assertionContext = {
     workDir,
     defaultJudgeModel,
@@ -11829,10 +11831,10 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     assertionContext
   ) : [];
   const passed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
+    (r) => r.status === import_evalforge_types14.AssertionResultStatus.PASSED
   ).length;
   const failed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
+    (r) => r.status === import_evalforge_types14.AssertionResultStatus.FAILED
   ).length;
   const total = assertionResults.length;
   const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
@@ -11908,7 +11910,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
 }
 // src/error-reporter.ts
-var import_evalforge_types14 = require("@wix/evalforge-types");
+var import_evalforge_types15 = require("@wix/evalforge-types");
 function formatError(error, phase, context) {
   const timestamp2 = (/* @__PURE__ */ new Date()).toISOString();
   if (error instanceof Error) {
@@ -12151,7 +12153,7 @@ async function runEvaluation(projectId2, evalRunId2) {
     totalExecutions
   };
   const allFailed = totalExecutions > 0 && erroredExecutions === totalExecutions;
-  const finalStatus = allFailed ? import_evalforge_types15.EvalStatus.FAILED : import_evalforge_types15.EvalStatus.COMPLETED;
+  const finalStatus = allFailed ? import_evalforge_types16.EvalStatus.FAILED : import_evalforge_types16.EvalStatus.COMPLETED;
   const jobErrorOnAllFailed = allFailed ? truncateForJobError(
     firstErrorMessage ?? `All ${totalExecutions} executions errored without an error message`
   ) : void 0;
@@ -12205,7 +12207,7 @@ runEvaluation(projectId, evalRunId).then(() => {
       grpcAuthToken: config.grpcAuthToken
     });
     await api.updateEvalRun(projectId, evalRunId, {
-      status: import_evalforge_types15.EvalStatus.FAILED,
+      status: import_evalforge_types16.EvalStatus.FAILED,
       completedAt: (/* @__PURE__ */ new Date()).toISOString(),
       jobError,
       jobStatus: "FAILED"
@@ -12230,7 +12232,7 @@ runEvaluation(projectId, evalRunId).then(() => {
           grpcAuthToken
         });
         await api.updateEvalRun(projectId, evalRunId, {
-          status: import_evalforge_types15.EvalStatus.FAILED,
+          status: import_evalforge_types16.EvalStatus.FAILED,
           completedAt: (/* @__PURE__ */ new Date()).toISOString(),
           jobError: `Config load failed, then: ${jobError}`,
           jobStatus: "FAILED"