npm - @wix/evalforge-evaluator - Versions diffs - 0.114.0 → 0.116.0 - Mend

@wix/evalforge-evaluator 0.114.0 → 0.116.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +98 -46
package/build/index.js.map +3 -3
package/build/index.mjs +98 -46
package/build/index.mjs.map +3 -3
package/build/types/run-scenario/agents/claude-code/claude-code-adapter.d.ts +5 -0
package/build/types/run-scenario/agents/claude-code/execute.d.ts +6 -0
package/build/types/run-scenario/agents/opencode/execute.d.ts +8 -0
package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts +1 -0
package/build/types/run-scenario/file-diff.d.ts +10 -2
package/package.json +5 -5

package/build/index.js CHANGED Viewed

@@ -1199,6 +1199,31 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
     outputPreview: `Message type: ${message.type}`
   };
 }
+async function prepareClaudeCodeEnvironment(cwd, skills, options) {
+  const { mkdir: mkdirAsync, writeFile: writeFile6 } = await import("fs/promises");
+  const claudeDir = `${cwd}/.claude`;
+  await mkdirAsync(claudeDir, { recursive: true });
+  await writeFile6(`${claudeDir}/settings.json`, "{}", {
+    flag: "wx"
+  }).catch(() => {
+  });
+  if (options.mcps && options.mcps.length > 0) {
+    await writeMcpToFilesystem(cwd, options.mcps);
+  }
+  if (options.subAgents && options.subAgents.length > 0) {
+    await writeSubAgentsToFilesystem(cwd, options.subAgents);
+  }
+  if (options.rules && options.rules.length > 0) {
+    await writeRulesToFilesystem(cwd, options.rules);
+  }
+  try {
+    await writeSkillsToFilesystem(cwd, skills);
+  } catch (writeError) {
+    throw new Error(
+      `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
+    );
+  }
+}
 async function executeWithClaudeCode(skills, scenario, options) {
   const skillNames = skills.map((s) => s.name).join(", ");
   console.log("[executeWithClaudeCode] Starting execution", {
@@ -1222,29 +1247,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
   }
   const startTime = /* @__PURE__ */ new Date();
   const allMessages = [];
-  const { mkdir: mkdirAsync, writeFile: writeFile6 } = await import("fs/promises");
-  const claudeDir = `${options.cwd}/.claude`;
-  await mkdirAsync(claudeDir, { recursive: true });
-  await writeFile6(`${claudeDir}/settings.json`, "{}", {
-    flag: "wx"
-  }).catch(() => {
-  });
-  if (options.mcps && options.mcps.length > 0) {
-    await writeMcpToFilesystem(options.cwd, options.mcps);
-  }
-  if (options.subAgents && options.subAgents.length > 0) {
-    await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
-  }
-  if (options.rules && options.rules.length > 0) {
-    await writeRulesToFilesystem(options.cwd, options.rules);
-  }
-  try {
-    await writeSkillsToFilesystem(options.cwd, skills);
-  } catch (writeError) {
-    throw new Error(
-      `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
-    );
-  }
   const sdkEnv = buildSdkEnvironment(options);
   let traceStepNumber = 0;
   const traceContext = options.traceContext;
@@ -2086,6 +2088,17 @@ var ClaudeCodeAdapter = class {
   id = "claude-code";
   name = "Claude Code";
   supportedCommands = [import_evalforge_types5.AgentRunCommand.CLAUDE];
+  /**
+   * Write infrastructure files (settings, MCPs, sub-agents, rules, skills)
+   * before the baseline snapshot is taken.
+   */
+  async prepareEnvironment(context) {
+    await prepareClaudeCodeEnvironment(context.cwd, context.skills, {
+      mcps: context.mcps,
+      subAgents: context.subAgents,
+      rules: context.rules
+    });
+  }
   /**
    * Execute a skill using the Claude Code SDK.
    *
@@ -2149,6 +2162,7 @@ defaultRegistry.register(claudeCodeAdapter);
 var import_evalforge_types9 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/execute.ts
+var import_os3 = require("os");
 var import_evalforge_types8 = require("@wix/evalforge-types");
 // src/run-scenario/agents/opencode/write-skills.ts
@@ -2651,6 +2665,13 @@ function buildConversation2(messages) {
 // src/run-scenario/agents/opencode/execute.ts
 var DEFAULT_MODEL3 = `anthropic/${import_evalforge_types8.ClaudeModel.CLAUDE_4_5_SONNET_1_0}`;
+function ensureOpenCodeInPath() {
+  const opencodeBin = `${(0, import_os3.homedir)()}/.opencode/bin`;
+  const currentPath = process.env.PATH || "";
+  if (!currentPath.includes(opencodeBin)) {
+    process.env.PATH = `${opencodeBin}:${currentPath}`;
+  }
+}
 function extractToolAction(toolName, args) {
   if (!toolName) return "Using tool...";
   const a = args;
@@ -2728,37 +2749,39 @@ function createTraceEventFromPart(part, context, stepNumber, isComplete) {
       return null;
   }
 }
-async function executeWithOpenCode(skills, scenario, options) {
-  const skillNames = skills.map((s) => s.name).join(", ");
-  console.log("[executeWithOpenCode] Starting execution", {
-    skillCount: skills.length,
-    skillNames,
-    scenarioId: scenario.id,
-    scenarioName: scenario.name,
-    cwd: options.cwd,
-    aiGatewayUrl: options.aiGatewayUrl,
-    hasAiGatewayHeaders: !!options.aiGatewayHeaders,
-    model: options.model
-  });
-  const startTime = /* @__PURE__ */ new Date();
+async function prepareOpenCodeEnvironment(cwd, skills, options) {
   if (options.mcps && options.mcps.length > 0) {
     console.log(
       `[MCP] ${options.mcps.length} MCP(s) will be configured inline`
     );
   }
   if (options.subAgents && options.subAgents.length > 0) {
-    await writeSubAgentsToFilesystem2(options.cwd, options.subAgents);
+    await writeSubAgentsToFilesystem2(cwd, options.subAgents);
   }
   if (options.rules && options.rules.length > 0) {
-    await writeRulesToFilesystem(options.cwd, options.rules);
+    await writeRulesToFilesystem(cwd, options.rules);
   }
   try {
-    await writeSkillsToFilesystem2(options.cwd, skills);
+    await writeSkillsToFilesystem2(cwd, skills);
   } catch (writeError) {
     throw new Error(
       `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
     );
   }
+}
+async function executeWithOpenCode(skills, scenario, options) {
+  const skillNames = skills.map((s) => s.name).join(", ");
+  console.log("[executeWithOpenCode] Starting execution", {
+    skillCount: skills.length,
+    skillNames,
+    scenarioId: scenario.id,
+    scenarioName: scenario.name,
+    cwd: options.cwd,
+    aiGatewayUrl: options.aiGatewayUrl,
+    hasAiGatewayHeaders: !!options.aiGatewayHeaders,
+    model: options.model
+  });
+  const startTime = /* @__PURE__ */ new Date();
   const maxTurns = options.maxTurns ?? 10;
   const { config, providerID, modelID } = await buildOpenCodeConfig({
     model: options.model,
@@ -2806,6 +2829,7 @@ async function executeWithOpenCode(skills, scenario, options) {
   }
   let server;
   try {
+    ensureOpenCodeInPath();
     console.log("[SDK-DEBUG] Starting OpenCode server...");
     server = await createOpencodeServer({
       config,
@@ -3137,6 +3161,13 @@ var OpenCodeAdapter = class {
   id = "opencode";
   name = "OpenCode";
   supportedCommands = [import_evalforge_types9.AgentRunCommand.OPENCODE];
+  async prepareEnvironment(context) {
+    await prepareOpenCodeEnvironment(context.cwd, context.skills, {
+      mcps: context.mcps,
+      subAgents: context.subAgents,
+      rules: context.rules
+    });
+  }
   async execute(context) {
     const {
       skills,
@@ -4264,6 +4295,15 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
 };
 // src/run-scenario/file-diff.ts
+function deriveInfrastructurePaths(prePrep, postPrep) {
+  const infraPaths = /* @__PURE__ */ new Set();
+  for (const path2 of Object.keys(postPrep)) {
+    if (prePrep[path2] === void 0 || prePrep[path2] !== postPrep[path2]) {
+      infraPaths.add(path2);
+    }
+  }
+  return infraPaths;
+}
 var IGNORED_PATTERNS = [
   "node_modules",
   ".git",
@@ -4367,7 +4407,7 @@ function generateDiffLines(before, after) {
   }
   return result;
 }
-function diffSnapshots(before, after) {
+function diffSnapshots(before, after, infrastructurePaths) {
   const diffs = [];
   const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
   for (const path2 of allPaths) {
@@ -4381,7 +4421,8 @@ function diffSnapshots(before, after) {
       path: path2,
       expected: beforeContent,
       actual: afterContent,
-      diffLines: diffLines2
+      diffLines: diffLines2,
+      ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
     });
   }
   const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
@@ -4405,7 +4446,7 @@ function diffSnapshots(before, after) {
   result.sort((a, b) => a.path.localeCompare(b.path));
   return result;
 }
-function extractTemplateFiles(before, after) {
+function extractTemplateFiles(before, after, infrastructurePaths) {
   const files = [];
   const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
   for (const path2 of allPaths) {
@@ -4425,7 +4466,8 @@ function extractTemplateFiles(before, after) {
     files.push({
       path: path2,
       content: afterContent,
-      status
+      status,
+      ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
     });
   }
   files.sort((a, b) => a.path.localeCompare(b.path));
@@ -4441,7 +4483,6 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
   const identifier = isSDK ? simpleAgentAdapter.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
   const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
-  const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
   const targetId = evalData.evalRun.presetId ?? agent?.id ?? evalData.evalRun.id;
   const targetName = evalData.presetName || agent?.name || "";
   const executionContext = {
@@ -4466,11 +4507,22 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
     rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
     systemPrompt: agent?.systemPrompt
   };
+  const hasPrepare = !!adapter.prepareEnvironment;
+  const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};
+  if (hasPrepare) {
+    await adapter.prepareEnvironment(executionContext);
+  }
+  const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
+  const infrastructurePaths = hasPrepare ? deriveInfrastructurePaths(prePrepSnapshot, beforeSnapshot) : /* @__PURE__ */ new Set();
   const { outputText, durationMs, llmTrace, conversation } = await adapter.execute(executionContext);
   const completedAt = (/* @__PURE__ */ new Date()).toISOString();
   const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
-  const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
-  const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
+  const fileDiffs = diffSnapshots(
+    beforeSnapshot,
+    afterSnapshot,
+    infrastructurePaths
+  );
+  const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot, infrastructurePaths) : void 0;
   return {
     id: (0, import_crypto4.randomUUID)(),
     targetId,