@wix/evalforge-evaluator 0.99.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -382,9 +382,9 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
382
382
  const scenarios = await Promise.all(
383
383
  evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
384
384
  );
385
- let codeAgent = null;
385
+ let agent = null;
386
386
  if (evalRun.agentId) {
387
- codeAgent = await api.getAgent(projectId2, evalRun.agentId);
387
+ agent = await api.getAgent(projectId2, evalRun.agentId);
388
388
  }
389
389
  let skills = [];
390
390
  let skillsGroup = null;
@@ -485,7 +485,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
485
485
  const skillsGroupName = skillsGroup?.name ?? "";
486
486
  return {
487
487
  evalRun,
488
- codeAgent,
488
+ agent,
489
489
  skills,
490
490
  skillsGroup,
491
491
  skillsGroupName,
@@ -504,56 +504,18 @@ var import_eval_assertions = require("@wix/eval-assertions");
504
504
  var import_fs = require("fs");
505
505
  var import_os = require("os");
506
506
  var import_path2 = __toESM(require("path"));
507
- var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
507
+ var import_evalforge_github_client = require("@wix/evalforge-github-client");
508
508
 
509
- // src/run-scenario/agents/claude-code/write-skills.ts
509
+ // src/run-scenario/utils/write-files.ts
510
510
  var import_promises = require("fs/promises");
511
511
  var import_path = require("path");
512
- var import_evalforge_github_client = require("@wix/evalforge-github-client");
513
- async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
514
- await Promise.all(
515
- skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
516
- );
517
- }
518
- async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
519
- const skillName = skill.name;
520
- const skillDir = (0, import_path.join)(cwd, ".claude", "skills", skillName);
521
- await (0, import_promises.mkdir)(skillDir, { recursive: true });
522
- const version = skill.latestVersion;
523
- if (version?.files && version.files.length > 0) {
524
- await writeSkillFiles(skillDir, version.files);
525
- console.log(
526
- `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
527
- );
528
- } else if (skill.source) {
529
- try {
530
- const files = await fetchFn(skill.source, {
531
- userAgent: "EvalForge-Evaluator"
532
- });
533
- await writeSkillFiles(skillDir, files);
534
- console.log(
535
- `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
536
- );
537
- } catch (error) {
538
- const message = error instanceof Error ? error.message : "Unknown error";
539
- console.error(
540
- `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
541
- );
542
- throw new Error(
543
- `Failed to write skill ${skillName} to filesystem: ${message}`
544
- );
545
- }
546
- } else {
547
- throw new Error(`Skill ${skillName} has no files and no source configured`);
548
- }
549
- }
550
- async function writeSkillFiles(skillDir, files) {
551
- const resolvedBase = (0, import_path.resolve)(skillDir);
512
+ async function writeFilesToDirectory(targetDir, files) {
513
+ const resolvedBase = (0, import_path.resolve)(targetDir);
552
514
  for (const file of files) {
553
- const filePath = (0, import_path.resolve)(skillDir, file.path);
515
+ const filePath = (0, import_path.resolve)(targetDir, file.path);
554
516
  if (!filePath.startsWith(resolvedBase + import_path.sep) && filePath !== resolvedBase) {
555
517
  throw new Error(
556
- `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
518
+ `Path traversal detected: "${file.path}" resolves outside target directory`
557
519
  );
558
520
  }
559
521
  await (0, import_promises.mkdir)((0, import_path.dirname)(filePath), { recursive: true });
@@ -569,10 +531,10 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
569
531
  );
570
532
  return;
571
533
  }
572
- const files = await (0, import_evalforge_github_client2.fetchGitHubFolder)(template.source, {
534
+ const files = await (0, import_evalforge_github_client.fetchGitHubFolder)(template.source, {
573
535
  userAgent: "EvalForge-Evaluator"
574
536
  });
575
- await writeSkillFiles(workDir, files);
537
+ await writeFilesToDirectory(workDir, files);
576
538
  }
577
539
  function writeWixEnvFile(workDir) {
578
540
  const configPath = import_path2.default.join(workDir, "wix.config.json");
@@ -626,86 +588,76 @@ var import_crypto2 = require("crypto");
626
588
 
627
589
  // src/run-scenario/agents/registry.ts
628
590
  var AgentAdapterRegistry = class {
629
- /**
630
- * Map of run commands to their registered adapters.
631
- * Multiple commands can map to the same adapter.
632
- */
633
- adapters = /* @__PURE__ */ new Map();
634
- /**
635
- * Set of all registered adapter instances (for getAll).
636
- */
591
+ /** Map of CLI commands to their registered adapters. */
592
+ commandMap = /* @__PURE__ */ new Map();
593
+ /** Map of adapter IDs to their registered adapters. */
594
+ idMap = /* @__PURE__ */ new Map();
595
+ /** Set of all registered adapter instances (for getAll). */
637
596
  registeredAdapters = /* @__PURE__ */ new Set();
638
597
  /**
639
598
  * Register an agent adapter.
640
599
  *
641
- * The adapter will be registered for all commands in its supportedCommands array.
642
- * If a command is already registered, it will be overwritten with a warning.
643
- *
644
- * @param adapter - The adapter to register
600
+ * The adapter is registered by its ID and for all commands in its supportedCommands array.
601
+ * If a command or ID is already registered, it will be overwritten with a warning.
645
602
  */
646
603
  register(adapter) {
647
604
  this.registeredAdapters.add(adapter);
605
+ this.idMap.set(adapter.id, adapter);
648
606
  for (const command of adapter.supportedCommands) {
649
- if (this.adapters.has(command)) {
650
- const existing = this.adapters.get(command);
607
+ if (this.commandMap.has(command)) {
608
+ const existing = this.commandMap.get(command);
651
609
  console.warn(
652
610
  `[AgentAdapterRegistry] Command "${command}" already registered by adapter "${existing.id}". Overwriting with adapter "${adapter.id}".`
653
611
  );
654
612
  }
655
- this.adapters.set(command, adapter);
613
+ this.commandMap.set(command, adapter);
656
614
  }
657
615
  }
658
- /**
659
- * Get an adapter by run command.
660
- *
661
- * @param runCommand - The run command to look up
662
- * @returns The registered adapter, or undefined if not found
663
- */
664
- get(runCommand) {
665
- return this.adapters.get(runCommand);
616
+ /** Get an adapter by CLI command. */
617
+ getByCommand(command) {
618
+ return this.commandMap.get(command);
666
619
  }
667
- /**
668
- * Check if a command has a registered adapter.
669
- *
670
- * @param runCommand - The run command to check
671
- * @returns True if an adapter is registered for this command
672
- */
673
- has(runCommand) {
674
- return this.adapters.has(runCommand);
620
+ /** Get an adapter by adapter ID. */
621
+ getById(adapterId) {
622
+ return this.idMap.get(adapterId);
675
623
  }
676
624
  /**
677
- * Get all registered adapters.
678
- *
679
- * @returns Array of all unique registered adapters
625
+ * Unified lookup: tries CLI command first, then adapter ID.
626
+ * Use this when the identifier could be either a command or an adapter ID.
680
627
  */
628
+ resolve(identifier) {
629
+ return this.commandMap.get(identifier) ?? this.idMap.get(identifier);
630
+ }
631
+ /** Check if a command or adapter ID has a registered adapter. */
632
+ has(identifier) {
633
+ return this.commandMap.has(identifier) || this.idMap.has(identifier);
634
+ }
635
+ /** Get all registered adapters. */
681
636
  getAll() {
682
637
  return Array.from(this.registeredAdapters);
683
638
  }
684
- /**
685
- * Get all supported commands.
686
- *
687
- * @returns Array of all registered run commands
688
- */
639
+ /** Get all supported CLI commands. */
689
640
  getSupportedCommands() {
690
- return Array.from(this.adapters.keys());
641
+ return Array.from(this.commandMap.keys());
642
+ }
643
+ /** Get all registered adapter IDs. */
644
+ getAdapterIds() {
645
+ return Array.from(this.idMap.keys());
691
646
  }
692
647
  /**
693
648
  * Unregister an adapter by its ID.
694
- *
695
649
  * Removes the adapter and all its command mappings.
696
- *
697
- * @param adapterId - The ID of the adapter to remove
698
- * @returns True if the adapter was found and removed
699
650
  */
700
651
  unregister(adapterId) {
701
652
  let found = false;
702
653
  for (const adapter of this.registeredAdapters) {
703
654
  if (adapter.id === adapterId) {
704
655
  this.registeredAdapters.delete(adapter);
656
+ this.idMap.delete(adapterId);
705
657
  found = true;
706
658
  for (const command of adapter.supportedCommands) {
707
- if (this.adapters.get(command) === adapter) {
708
- this.adapters.delete(command);
659
+ if (this.commandMap.get(command) === adapter) {
660
+ this.commandMap.delete(command);
709
661
  }
710
662
  }
711
663
  break;
@@ -713,22 +665,21 @@ var AgentAdapterRegistry = class {
713
665
  }
714
666
  return found;
715
667
  }
716
- /**
717
- * Clear all registered adapters.
718
- * Primarily useful for testing.
719
- */
668
+ /** Clear all registered adapters. Primarily useful for testing. */
720
669
  clear() {
721
- this.adapters.clear();
670
+ this.commandMap.clear();
671
+ this.idMap.clear();
722
672
  this.registeredAdapters.clear();
723
673
  }
724
674
  };
725
675
  var defaultRegistry = new AgentAdapterRegistry();
726
- function getAdapter(runCommand) {
727
- const adapter = defaultRegistry.get(runCommand);
676
+ function getAdapter(identifier) {
677
+ const adapter = defaultRegistry.resolve(identifier);
728
678
  if (!adapter) {
729
- const supported = defaultRegistry.getSupportedCommands();
679
+ const commands = defaultRegistry.getSupportedCommands();
680
+ const ids = defaultRegistry.getAdapterIds();
730
681
  throw new Error(
731
- `No agent adapter registered for command "${runCommand}". Supported commands: ${supported.length > 0 ? supported.join(", ") : "(none registered)"}`
682
+ `No agent adapter registered for "${identifier}". Supported commands: ${commands.length > 0 ? commands.join(", ") : "(none)"}. Registered adapters: ${ids.length > 0 ? ids.join(", ") : "(none)"}`
732
683
  );
733
684
  }
734
685
  return adapter;
@@ -739,21 +690,65 @@ var import_evalforge_types4 = require("@wix/evalforge-types");
739
690
 
740
691
  // src/run-scenario/agents/claude-code/execute.ts
741
692
  var import_evalforge_types3 = require("@wix/evalforge-types");
693
+
694
+ // src/run-scenario/agents/claude-code/write-skills.ts
695
+ var import_promises2 = require("fs/promises");
696
+ var import_path3 = require("path");
697
+ var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
698
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
699
+ await Promise.all(
700
+ skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
701
+ );
702
+ }
703
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
704
+ const skillName = skill.name;
705
+ const skillDir = (0, import_path3.join)(cwd, ".claude", "skills", skillName);
706
+ await (0, import_promises2.mkdir)(skillDir, { recursive: true });
707
+ const version = skill.latestVersion;
708
+ if (version?.files && version.files.length > 0) {
709
+ await writeFilesToDirectory(skillDir, version.files);
710
+ console.log(
711
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
712
+ );
713
+ } else if (skill.source) {
714
+ try {
715
+ const files = await fetchFn(skill.source, {
716
+ userAgent: "EvalForge-Evaluator"
717
+ });
718
+ await writeFilesToDirectory(skillDir, files);
719
+ console.log(
720
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
721
+ );
722
+ } catch (error) {
723
+ const message = error instanceof Error ? error.message : "Unknown error";
724
+ console.error(
725
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
726
+ );
727
+ throw new Error(
728
+ `Failed to write skill ${skillName} to filesystem: ${message}`
729
+ );
730
+ }
731
+ } else {
732
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
733
+ }
734
+ }
735
+
736
+ // src/run-scenario/agents/claude-code/execute.ts
742
737
  var import_crypto = require("crypto");
743
738
 
744
739
  // src/run-scenario/agents/claude-code/write-mcp.ts
745
- var import_promises3 = require("fs/promises");
746
- var import_path4 = require("path");
740
+ var import_promises4 = require("fs/promises");
741
+ var import_path5 = require("path");
747
742
  var import_evalforge_types2 = require("@wix/evalforge-types");
748
743
 
749
744
  // src/run-scenario/agents/claude-code/resolve-mcp-placeholders.ts
750
- var import_promises2 = require("fs/promises");
751
- var import_path3 = require("path");
745
+ var import_promises3 = require("fs/promises");
746
+ var import_path4 = require("path");
752
747
  var import_os2 = require("os");
753
- var WIX_AUTH_FILE = (0, import_path3.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
748
+ var WIX_AUTH_FILE = (0, import_path4.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
754
749
  async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
755
750
  try {
756
- const content = await (0, import_promises2.readFile)(authFilePath, "utf-8");
751
+ const content = await (0, import_promises3.readFile)(authFilePath, "utf-8");
757
752
  const auth = JSON.parse(content);
758
753
  if (!auth.token || !auth.userInfo?.userId) {
759
754
  return {};
@@ -806,14 +801,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
806
801
  null,
807
802
  2
808
803
  );
809
- const filePath = (0, import_path4.join)(cwd, ".mcp.json");
810
- await (0, import_promises3.writeFile)(filePath, content, "utf8");
804
+ const filePath = (0, import_path5.join)(cwd, ".mcp.json");
805
+ await (0, import_promises4.writeFile)(filePath, content, "utf8");
811
806
  console.log(`[MCP] Written to ${filePath}`);
812
807
  }
813
808
 
814
809
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
815
- var import_promises4 = require("fs/promises");
816
- var import_path5 = require("path");
810
+ var import_promises5 = require("fs/promises");
811
+ var import_path6 = require("path");
817
812
  var AGENTS_DIR = ".claude/agents";
818
813
  function toAgentFilename(name, index, nameCount) {
819
814
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -823,20 +818,20 @@ function toAgentFilename(name, index, nameCount) {
823
818
  }
824
819
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
825
820
  if (subAgents.length === 0) return;
826
- const agentsDir = (0, import_path5.join)(cwd, AGENTS_DIR);
827
- await (0, import_promises4.mkdir)(agentsDir, { recursive: true });
821
+ const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
822
+ await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
828
823
  const nameCount = /* @__PURE__ */ new Map();
829
824
  for (const [i, agent] of subAgents.entries()) {
830
825
  const filename = toAgentFilename(agent.name, i, nameCount);
831
- const filePath = (0, import_path5.join)(agentsDir, `${filename}.md`);
832
- await (0, import_promises4.writeFile)(filePath, agent.subAgentMd, "utf8");
826
+ const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
827
+ await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
833
828
  }
834
829
  console.log(`[SubAgents] Written to ${agentsDir}`);
835
830
  }
836
831
 
837
832
  // src/run-scenario/agents/claude-code/write-rules.ts
838
- var import_promises5 = require("fs/promises");
839
- var import_path6 = require("path");
833
+ var import_promises6 = require("fs/promises");
834
+ var import_path7 = require("path");
840
835
  var CURSOR_RULES_DIR = ".cursor/rules";
841
836
  function toRuleFilename(name, index, nameCount) {
842
837
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -847,13 +842,13 @@ function toRuleFilename(name, index, nameCount) {
847
842
  async function appendToFile(filePath, content) {
848
843
  let existing = "";
849
844
  try {
850
- existing = await (0, import_promises5.readFile)(filePath, "utf8");
845
+ existing = await (0, import_promises6.readFile)(filePath, "utf8");
851
846
  } catch {
852
847
  }
853
848
  const merged = existing ? `${existing.trimEnd()}
854
849
 
855
850
  ${content}` : content;
856
- await (0, import_promises5.writeFile)(filePath, merged, "utf8");
851
+ await (0, import_promises6.writeFile)(filePath, merged, "utf8");
857
852
  }
858
853
  async function writeRulesToFilesystem(cwd, rules) {
859
854
  if (rules.length === 0) return;
@@ -862,21 +857,21 @@ async function writeRulesToFilesystem(cwd, rules) {
862
857
  for (const [i, rule] of rules.entries()) {
863
858
  switch (rule.ruleType) {
864
859
  case "claude-md": {
865
- await appendToFile((0, import_path6.join)(cwd, "CLAUDE.md"), rule.content);
860
+ await appendToFile((0, import_path7.join)(cwd, "CLAUDE.md"), rule.content);
866
861
  break;
867
862
  }
868
863
  case "agents-md": {
869
- await appendToFile((0, import_path6.join)(cwd, "AGENTS.md"), rule.content);
864
+ await appendToFile((0, import_path7.join)(cwd, "AGENTS.md"), rule.content);
870
865
  break;
871
866
  }
872
867
  case "cursor-rule": {
873
868
  if (!hasCursorRules) {
874
- await (0, import_promises5.mkdir)((0, import_path6.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
869
+ await (0, import_promises6.mkdir)((0, import_path7.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
875
870
  hasCursorRules = true;
876
871
  }
877
872
  const filename = toRuleFilename(rule.name, i, nameCount);
878
- const filePath = (0, import_path6.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
879
- await (0, import_promises5.writeFile)(filePath, rule.content, "utf8");
873
+ const filePath = (0, import_path7.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
874
+ await (0, import_promises6.writeFile)(filePath, rule.content, "utf8");
880
875
  break;
881
876
  }
882
877
  }
@@ -1911,7 +1906,7 @@ defaultRegistry.register(claudeCodeAdapter);
1911
1906
 
1912
1907
  // src/run-scenario/file-diff.ts
1913
1908
  var import_fs2 = require("fs");
1914
- var import_path7 = require("path");
1909
+ var import_path8 = require("path");
1915
1910
 
1916
1911
  // ../../node_modules/diff/lib/index.mjs
1917
1912
  function Diff() {
@@ -2527,8 +2522,8 @@ function snapshotDirectory(dir, baseDir) {
2527
2522
  }
2528
2523
  const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
2529
2524
  for (const entry of entries) {
2530
- const fullPath = (0, import_path7.join)(dir, entry.name);
2531
- const relativePath = (0, import_path7.relative)(base, fullPath);
2525
+ const fullPath = (0, import_path8.join)(dir, entry.name);
2526
+ const relativePath = (0, import_path8.relative)(base, fullPath);
2532
2527
  if (shouldIgnore(entry.name)) {
2533
2528
  continue;
2534
2529
  }
@@ -2640,14 +2635,17 @@ var import_evalforge_types5 = require("@wix/evalforge-types");
2640
2635
  var DEFAULT_AGENT_COMMAND = import_evalforge_types5.AgentRunCommand.CLAUDE;
2641
2636
  async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
2642
2637
  const skillsGroupId = evalData.evalRun.skillsGroupId;
2638
+ const agent = evalData.agent ?? void 0;
2639
+ const isSDK = agent?.agentType === import_evalforge_types5.AgentType.SDK;
2643
2640
  if (!skillsGroupId) {
2644
2641
  throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
2645
2642
  }
2646
- const agent = evalData.codeAgent ?? void 0;
2647
- const runCommand = agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2648
- const adapter = getAdapter(runCommand);
2643
+ const identifier = isSDK ? agent.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2644
+ const adapter = getAdapter(identifier);
2649
2645
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
2650
2646
  const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
2647
+ const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
2648
+ const targetName = evalData.skillsGroupName || agent?.name || "";
2651
2649
  const executionContext = {
2652
2650
  skills: evalData.skills,
2653
2651
  scenario,
@@ -2659,8 +2657,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2659
2657
  evalRunId: evalRunId2,
2660
2658
  scenarioId: scenario.id,
2661
2659
  scenarioName: scenario.name,
2662
- targetId: skillsGroupId,
2663
- targetName: evalData.skillsGroupName,
2660
+ targetId,
2661
+ targetName,
2664
2662
  tracePushUrl: config.tracePushUrl,
2665
2663
  routeHeader: config.routeHeader,
2666
2664
  authToken: config.authToken
@@ -2677,8 +2675,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2677
2675
  const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
2678
2676
  return {
2679
2677
  id: (0, import_crypto2.randomUUID)(),
2680
- targetId: skillsGroupId,
2681
- targetName: evalData.skillsGroupName,
2678
+ targetId,
2679
+ targetName,
2682
2680
  scenarioId: scenario.id,
2683
2681
  scenarioName: scenario.name,
2684
2682
  modelConfig: agent?.modelConfig,
@@ -2694,11 +2692,11 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2694
2692
 
2695
2693
  // src/run-scenario/index.ts
2696
2694
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
2697
- const skillsGroupId = evalData.evalRun.skillsGroupId;
2695
+ const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
2698
2696
  const workDir = await prepareWorkingDirectory(
2699
2697
  config,
2700
2698
  evalRunId2,
2701
- skillsGroupId,
2699
+ targetId,
2702
2700
  scenario.id,
2703
2701
  template
2704
2702
  );
@@ -2726,7 +2724,8 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2726
2724
  })),
2727
2725
  durationMs: partialResult.duration
2728
2726
  };
2729
- const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
2727
+ const { "x-wix-ai-gateway-stream": _ignored, ...judgeHeaders } = config.aiGatewayHeaders;
2728
+ void _ignored;
2730
2729
  const defaultJudgeModel = import_evalforge_types6.DEFAULT_JUDGE_MODEL;
2731
2730
  const assertionContext = {
2732
2731
  workDir,
@@ -2903,7 +2902,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2903
2902
  scenarioItemCount: evalData.scenarioItems.length,
2904
2903
  scenarios: evalData.scenarioItems.map((s) => s.scenario.name),
2905
2904
  skillsCount: evalData.skills.length,
2906
- hasCodeAgent: !!evalData.codeAgent,
2905
+ hasAgent: !!evalData.agent,
2907
2906
  timestamp: Date.now()
2908
2907
  })
2909
2908
  );
@@ -2921,14 +2920,14 @@ async function runEvaluation(projectId2, evalRunId2) {
2921
2920
  `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
2922
2921
  );
2923
2922
  }
2924
- const { codeAgent, skills, scenarioItems } = evalData;
2923
+ const { agent, skills, scenarioItems } = evalData;
2925
2924
  state.currentPhase = ExecutionPhase.VALIDATION;
2926
2925
  state.currentContext = {
2927
2926
  projectId: projectId2,
2928
2927
  evalRunId: evalRunId2,
2929
2928
  scenarioCount: scenarioItems.length,
2930
2929
  skillCount: skills.length,
2931
- hasAgent: !!codeAgent,
2930
+ hasAgent: !!agent,
2932
2931
  agentId: evalData.evalRun.agentId,
2933
2932
  skillsGroupId: evalData.evalRun.skillsGroupId
2934
2933
  };
@@ -2937,9 +2936,9 @@ async function runEvaluation(projectId2, evalRunId2) {
2937
2936
  `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
2938
2937
  );
2939
2938
  }
2940
- if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
2939
+ if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
2941
2940
  throw new Error(
2942
- `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2941
+ `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2943
2942
  );
2944
2943
  }
2945
2944
  let completedScenarios = 0;
@@ -2953,8 +2952,8 @@ async function runEvaluation(projectId2, evalRunId2) {
2953
2952
  scenarioName: scenario.name,
2954
2953
  skillsGroupId: evalData.evalRun.skillsGroupId,
2955
2954
  skillsGroupName: evalData.skillsGroupName,
2956
- agentId: codeAgent?.id,
2957
- agentName: codeAgent?.name,
2955
+ agentId: agent?.id,
2956
+ agentName: agent?.name,
2958
2957
  progress: `${completedScenarios + 1}/${totalScenarios}`
2959
2958
  };
2960
2959
  const skillNames = evalData.skills.map((s) => s.name).join(", ");
@@ -2962,7 +2961,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2962
2961
  "[Evaluator] Running scenario with skills group:",
2963
2962
  evalData.skillsGroupName,
2964
2963
  skillNames ? `(${skillNames})` : "",
2965
- codeAgent ? `with agent: ${codeAgent.name}` : "",
2964
+ agent ? `with agent: ${agent.name}` : "",
2966
2965
  `(${completedScenarios + 1}/${totalScenarios})`
2967
2966
  );
2968
2967
  try {