@wix/evalforge-evaluator 0.115.0 → 0.116.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1191,6 +1191,31 @@ function createTraceEventFromAnyMessage(message, context, stepNumber, isComplete
1191
1191
  outputPreview: `Message type: ${message.type}`
1192
1192
  };
1193
1193
  }
1194
+ async function prepareClaudeCodeEnvironment(cwd, skills, options) {
1195
+ const { mkdir: mkdirAsync, writeFile: writeFile6 } = await import("fs/promises");
1196
+ const claudeDir = `${cwd}/.claude`;
1197
+ await mkdirAsync(claudeDir, { recursive: true });
1198
+ await writeFile6(`${claudeDir}/settings.json`, "{}", {
1199
+ flag: "wx"
1200
+ }).catch(() => {
1201
+ });
1202
+ if (options.mcps && options.mcps.length > 0) {
1203
+ await writeMcpToFilesystem(cwd, options.mcps);
1204
+ }
1205
+ if (options.subAgents && options.subAgents.length > 0) {
1206
+ await writeSubAgentsToFilesystem(cwd, options.subAgents);
1207
+ }
1208
+ if (options.rules && options.rules.length > 0) {
1209
+ await writeRulesToFilesystem(cwd, options.rules);
1210
+ }
1211
+ try {
1212
+ await writeSkillsToFilesystem(cwd, skills);
1213
+ } catch (writeError) {
1214
+ throw new Error(
1215
+ `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
1216
+ );
1217
+ }
1218
+ }
1194
1219
  async function executeWithClaudeCode(skills, scenario, options) {
1195
1220
  const skillNames = skills.map((s) => s.name).join(", ");
1196
1221
  console.log("[executeWithClaudeCode] Starting execution", {
@@ -1214,29 +1239,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
1214
1239
  }
1215
1240
  const startTime = /* @__PURE__ */ new Date();
1216
1241
  const allMessages = [];
1217
- const { mkdir: mkdirAsync, writeFile: writeFile6 } = await import("fs/promises");
1218
- const claudeDir = `${options.cwd}/.claude`;
1219
- await mkdirAsync(claudeDir, { recursive: true });
1220
- await writeFile6(`${claudeDir}/settings.json`, "{}", {
1221
- flag: "wx"
1222
- }).catch(() => {
1223
- });
1224
- if (options.mcps && options.mcps.length > 0) {
1225
- await writeMcpToFilesystem(options.cwd, options.mcps);
1226
- }
1227
- if (options.subAgents && options.subAgents.length > 0) {
1228
- await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
1229
- }
1230
- if (options.rules && options.rules.length > 0) {
1231
- await writeRulesToFilesystem(options.cwd, options.rules);
1232
- }
1233
- try {
1234
- await writeSkillsToFilesystem(options.cwd, skills);
1235
- } catch (writeError) {
1236
- throw new Error(
1237
- `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
1238
- );
1239
- }
1240
1242
  const sdkEnv = buildSdkEnvironment(options);
1241
1243
  let traceStepNumber = 0;
1242
1244
  const traceContext = options.traceContext;
@@ -2078,6 +2080,17 @@ var ClaudeCodeAdapter = class {
2078
2080
  id = "claude-code";
2079
2081
  name = "Claude Code";
2080
2082
  supportedCommands = [AgentRunCommand.CLAUDE];
2083
+ /**
2084
+ * Write infrastructure files (settings, MCPs, sub-agents, rules, skills)
2085
+ * before the baseline snapshot is taken.
2086
+ */
2087
+ async prepareEnvironment(context) {
2088
+ await prepareClaudeCodeEnvironment(context.cwd, context.skills, {
2089
+ mcps: context.mcps,
2090
+ subAgents: context.subAgents,
2091
+ rules: context.rules
2092
+ });
2093
+ }
2081
2094
  /**
2082
2095
  * Execute a skill using the Claude Code SDK.
2083
2096
  *
@@ -2737,37 +2750,39 @@ function createTraceEventFromPart(part, context, stepNumber, isComplete) {
2737
2750
  return null;
2738
2751
  }
2739
2752
  }
2740
- async function executeWithOpenCode(skills, scenario, options) {
2741
- const skillNames = skills.map((s) => s.name).join(", ");
2742
- console.log("[executeWithOpenCode] Starting execution", {
2743
- skillCount: skills.length,
2744
- skillNames,
2745
- scenarioId: scenario.id,
2746
- scenarioName: scenario.name,
2747
- cwd: options.cwd,
2748
- aiGatewayUrl: options.aiGatewayUrl,
2749
- hasAiGatewayHeaders: !!options.aiGatewayHeaders,
2750
- model: options.model
2751
- });
2752
- const startTime = /* @__PURE__ */ new Date();
2753
+ async function prepareOpenCodeEnvironment(cwd, skills, options) {
2753
2754
  if (options.mcps && options.mcps.length > 0) {
2754
2755
  console.log(
2755
2756
  `[MCP] ${options.mcps.length} MCP(s) will be configured inline`
2756
2757
  );
2757
2758
  }
2758
2759
  if (options.subAgents && options.subAgents.length > 0) {
2759
- await writeSubAgentsToFilesystem2(options.cwd, options.subAgents);
2760
+ await writeSubAgentsToFilesystem2(cwd, options.subAgents);
2760
2761
  }
2761
2762
  if (options.rules && options.rules.length > 0) {
2762
- await writeRulesToFilesystem(options.cwd, options.rules);
2763
+ await writeRulesToFilesystem(cwd, options.rules);
2763
2764
  }
2764
2765
  try {
2765
- await writeSkillsToFilesystem2(options.cwd, skills);
2766
+ await writeSkillsToFilesystem2(cwd, skills);
2766
2767
  } catch (writeError) {
2767
2768
  throw new Error(
2768
2769
  `Failed to write skills to filesystem: ${writeError instanceof Error ? writeError.message : String(writeError)}`
2769
2770
  );
2770
2771
  }
2772
+ }
2773
+ async function executeWithOpenCode(skills, scenario, options) {
2774
+ const skillNames = skills.map((s) => s.name).join(", ");
2775
+ console.log("[executeWithOpenCode] Starting execution", {
2776
+ skillCount: skills.length,
2777
+ skillNames,
2778
+ scenarioId: scenario.id,
2779
+ scenarioName: scenario.name,
2780
+ cwd: options.cwd,
2781
+ aiGatewayUrl: options.aiGatewayUrl,
2782
+ hasAiGatewayHeaders: !!options.aiGatewayHeaders,
2783
+ model: options.model
2784
+ });
2785
+ const startTime = /* @__PURE__ */ new Date();
2771
2786
  const maxTurns = options.maxTurns ?? 10;
2772
2787
  const { config, providerID, modelID } = await buildOpenCodeConfig({
2773
2788
  model: options.model,
@@ -3147,6 +3162,13 @@ var OpenCodeAdapter = class {
3147
3162
  id = "opencode";
3148
3163
  name = "OpenCode";
3149
3164
  supportedCommands = [AgentRunCommand2.OPENCODE];
3165
+ async prepareEnvironment(context) {
3166
+ await prepareOpenCodeEnvironment(context.cwd, context.skills, {
3167
+ mcps: context.mcps,
3168
+ subAgents: context.subAgents,
3169
+ rules: context.rules
3170
+ });
3171
+ }
3150
3172
  async execute(context) {
3151
3173
  const {
3152
3174
  skills,
@@ -4282,6 +4304,15 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
4282
4304
  };
4283
4305
 
4284
4306
  // src/run-scenario/file-diff.ts
4307
+ function deriveInfrastructurePaths(prePrep, postPrep) {
4308
+ const infraPaths = /* @__PURE__ */ new Set();
4309
+ for (const path2 of Object.keys(postPrep)) {
4310
+ if (prePrep[path2] === void 0 || prePrep[path2] !== postPrep[path2]) {
4311
+ infraPaths.add(path2);
4312
+ }
4313
+ }
4314
+ return infraPaths;
4315
+ }
4285
4316
  var IGNORED_PATTERNS = [
4286
4317
  "node_modules",
4287
4318
  ".git",
@@ -4385,7 +4416,7 @@ function generateDiffLines(before, after) {
4385
4416
  }
4386
4417
  return result;
4387
4418
  }
4388
- function diffSnapshots(before, after) {
4419
+ function diffSnapshots(before, after, infrastructurePaths) {
4389
4420
  const diffs = [];
4390
4421
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4391
4422
  for (const path2 of allPaths) {
@@ -4399,7 +4430,8 @@ function diffSnapshots(before, after) {
4399
4430
  path: path2,
4400
4431
  expected: beforeContent,
4401
4432
  actual: afterContent,
4402
- diffLines: diffLines2
4433
+ diffLines: diffLines2,
4434
+ ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
4403
4435
  });
4404
4436
  }
4405
4437
  const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
@@ -4423,7 +4455,7 @@ function diffSnapshots(before, after) {
4423
4455
  result.sort((a, b) => a.path.localeCompare(b.path));
4424
4456
  return result;
4425
4457
  }
4426
- function extractTemplateFiles(before, after) {
4458
+ function extractTemplateFiles(before, after, infrastructurePaths) {
4427
4459
  const files = [];
4428
4460
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4429
4461
  for (const path2 of allPaths) {
@@ -4443,7 +4475,8 @@ function extractTemplateFiles(before, after) {
4443
4475
  files.push({
4444
4476
  path: path2,
4445
4477
  content: afterContent,
4446
- status
4478
+ status,
4479
+ ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
4447
4480
  });
4448
4481
  }
4449
4482
  files.sort((a, b) => a.path.localeCompare(b.path));
@@ -4459,7 +4492,6 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
4459
4492
  const identifier = isSDK ? simpleAgentAdapter.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
4460
4493
  const adapter = getAdapter(identifier);
4461
4494
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
4462
- const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
4463
4495
  const targetId = evalData.evalRun.presetId ?? agent?.id ?? evalData.evalRun.id;
4464
4496
  const targetName = evalData.presetName || agent?.name || "";
4465
4497
  const executionContext = {
@@ -4484,11 +4516,22 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
4484
4516
  rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
4485
4517
  systemPrompt: agent?.systemPrompt
4486
4518
  };
4519
+ const hasPrepare = !!adapter.prepareEnvironment;
4520
+ const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};
4521
+ if (hasPrepare) {
4522
+ await adapter.prepareEnvironment(executionContext);
4523
+ }
4524
+ const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
4525
+ const infrastructurePaths = hasPrepare ? deriveInfrastructurePaths(prePrepSnapshot, beforeSnapshot) : /* @__PURE__ */ new Set();
4487
4526
  const { outputText, durationMs, llmTrace, conversation } = await adapter.execute(executionContext);
4488
4527
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
4489
4528
  const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
4490
- const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
4491
- const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
4529
+ const fileDiffs = diffSnapshots(
4530
+ beforeSnapshot,
4531
+ afterSnapshot,
4532
+ infrastructurePaths
4533
+ );
4534
+ const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot, infrastructurePaths) : void 0;
4492
4535
  return {
4493
4536
  id: randomUUID4(),
4494
4537
  targetId,