@wix/evalforge-evaluator 0.98.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -187,6 +187,53 @@ import {
187
187
  isSystemAssertionId,
188
188
  SYSTEM_ASSERTIONS
189
189
  } from "@wix/evalforge-types";
190
+
191
+ // src/resolve-placeholders.ts
192
+ var PLACEHOLDER_PATTERN = /\{\{([^}]+)\}\}/g;
193
+ function findPlaceholders(value) {
194
+ const keys = /* @__PURE__ */ new Set();
195
+ collectPlaceholders(value, keys);
196
+ return [...keys];
197
+ }
198
+ function collectPlaceholders(value, keys) {
199
+ if (typeof value === "string") {
200
+ for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
201
+ keys.add(match[1].trim());
202
+ }
203
+ } else if (Array.isArray(value)) {
204
+ for (const item of value) {
205
+ collectPlaceholders(item, keys);
206
+ }
207
+ } else if (typeof value === "object" && value !== null) {
208
+ for (const val of Object.values(value)) {
209
+ collectPlaceholders(val, keys);
210
+ }
211
+ }
212
+ }
213
+ function resolveValue(value, placeholders) {
214
+ if (typeof value === "string") {
215
+ return value.replace(PLACEHOLDER_PATTERN, (match, key) => {
216
+ const trimmed = key.trim();
217
+ return trimmed in placeholders ? placeholders[trimmed] : match;
218
+ });
219
+ }
220
+ if (Array.isArray(value)) {
221
+ return value.map((item) => resolveValue(item, placeholders));
222
+ }
223
+ if (typeof value === "object" && value !== null) {
224
+ const result = {};
225
+ for (const [k, v] of Object.entries(value)) {
226
+ result[k] = resolveValue(v, placeholders);
227
+ }
228
+ return result;
229
+ }
230
+ return value;
231
+ }
232
+ function resolvePlaceholdersInString(text, placeholders) {
233
+ return resolveValue(text, placeholders);
234
+ }
235
+
236
+ // src/fetch-evaluation-data.ts
190
237
  function parseSkillNamesFromParams(value) {
191
238
  if (typeof value !== "string") {
192
239
  return [];
@@ -202,13 +249,11 @@ function applyParamsToAssertion(assertion, params) {
202
249
  return assertion;
203
250
  }
204
251
  if (assertion.type === "llm_judge") {
205
- let prompt = assertion.prompt;
252
+ const stringParams = {};
206
253
  for (const [key, value] of Object.entries(params)) {
207
- const placeholder = `{{${key}}}`;
208
- const escapedPlaceholder = placeholder.replace(/[{}]/g, "\\$&");
209
- const replacement = String(value ?? "");
210
- prompt = prompt.replace(new RegExp(escapedPlaceholder, "g"), replacement);
254
+ stringParams[key] = String(value ?? "");
211
255
  }
256
+ const prompt = resolvePlaceholdersInString(assertion.prompt, stringParams);
212
257
  return {
213
258
  ...assertion,
214
259
  prompt,
@@ -317,9 +362,9 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
317
362
  const scenarios = await Promise.all(
318
363
  evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
319
364
  );
320
- let codeAgent = null;
365
+ let agent = null;
321
366
  if (evalRun.agentId) {
322
- codeAgent = await api.getAgent(projectId2, evalRun.agentId);
367
+ agent = await api.getAgent(projectId2, evalRun.agentId);
323
368
  }
324
369
  let skills = [];
325
370
  let skillsGroup = null;
@@ -420,7 +465,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
420
465
  const skillsGroupName = skillsGroup?.name ?? "";
421
466
  return {
422
467
  evalRun,
423
- codeAgent,
468
+ agent,
424
469
  skills,
425
470
  skillsGroup,
426
471
  skillsGroupName,
@@ -444,56 +489,18 @@ import {
444
489
  import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
445
490
  import { tmpdir } from "os";
446
491
  import path from "path";
447
- import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
492
+ import { fetchGitHubFolder } from "@wix/evalforge-github-client";
448
493
 
449
- // src/run-scenario/agents/claude-code/write-skills.ts
494
+ // src/run-scenario/utils/write-files.ts
450
495
  import { mkdir, writeFile } from "fs/promises";
451
- import { dirname, join, resolve, sep } from "path";
452
- import { fetchGitHubFolder } from "@wix/evalforge-github-client";
453
- async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder) {
454
- await Promise.all(
455
- skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
456
- );
457
- }
458
- async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder) {
459
- const skillName = skill.name;
460
- const skillDir = join(cwd, ".claude", "skills", skillName);
461
- await mkdir(skillDir, { recursive: true });
462
- const version = skill.latestVersion;
463
- if (version?.files && version.files.length > 0) {
464
- await writeSkillFiles(skillDir, version.files);
465
- console.log(
466
- `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
467
- );
468
- } else if (skill.source) {
469
- try {
470
- const files = await fetchFn(skill.source, {
471
- userAgent: "EvalForge-Evaluator"
472
- });
473
- await writeSkillFiles(skillDir, files);
474
- console.log(
475
- `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
476
- );
477
- } catch (error) {
478
- const message = error instanceof Error ? error.message : "Unknown error";
479
- console.error(
480
- `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
481
- );
482
- throw new Error(
483
- `Failed to write skill ${skillName} to filesystem: ${message}`
484
- );
485
- }
486
- } else {
487
- throw new Error(`Skill ${skillName} has no files and no source configured`);
488
- }
489
- }
490
- async function writeSkillFiles(skillDir, files) {
491
- const resolvedBase = resolve(skillDir);
496
+ import { dirname, resolve, sep } from "path";
497
+ async function writeFilesToDirectory(targetDir, files) {
498
+ const resolvedBase = resolve(targetDir);
492
499
  for (const file of files) {
493
- const filePath = resolve(skillDir, file.path);
500
+ const filePath = resolve(targetDir, file.path);
494
501
  if (!filePath.startsWith(resolvedBase + sep) && filePath !== resolvedBase) {
495
502
  throw new Error(
496
- `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
503
+ `Path traversal detected: "${file.path}" resolves outside target directory`
497
504
  );
498
505
  }
499
506
  await mkdir(dirname(filePath), { recursive: true });
@@ -509,10 +516,10 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
509
516
  );
510
517
  return;
511
518
  }
512
- const files = await fetchGitHubFolder2(template.source, {
519
+ const files = await fetchGitHubFolder(template.source, {
513
520
  userAgent: "EvalForge-Evaluator"
514
521
  });
515
- await writeSkillFiles(workDir, files);
522
+ await writeFilesToDirectory(workDir, files);
516
523
  }
517
524
  function writeWixEnvFile(workDir) {
518
525
  const configPath = path.join(workDir, "wix.config.json");
@@ -566,86 +573,76 @@ import { randomUUID as randomUUID2 } from "crypto";
566
573
 
567
574
  // src/run-scenario/agents/registry.ts
568
575
  var AgentAdapterRegistry = class {
569
- /**
570
- * Map of run commands to their registered adapters.
571
- * Multiple commands can map to the same adapter.
572
- */
573
- adapters = /* @__PURE__ */ new Map();
574
- /**
575
- * Set of all registered adapter instances (for getAll).
576
- */
576
+ /** Map of CLI commands to their registered adapters. */
577
+ commandMap = /* @__PURE__ */ new Map();
578
+ /** Map of adapter IDs to their registered adapters. */
579
+ idMap = /* @__PURE__ */ new Map();
580
+ /** Set of all registered adapter instances (for getAll). */
577
581
  registeredAdapters = /* @__PURE__ */ new Set();
578
582
  /**
579
583
  * Register an agent adapter.
580
584
  *
581
- * The adapter will be registered for all commands in its supportedCommands array.
582
- * If a command is already registered, it will be overwritten with a warning.
583
- *
584
- * @param adapter - The adapter to register
585
+ * The adapter is registered by its ID and for all commands in its supportedCommands array.
586
+ * If a command or ID is already registered, it will be overwritten with a warning.
585
587
  */
586
588
  register(adapter) {
587
589
  this.registeredAdapters.add(adapter);
590
+ this.idMap.set(adapter.id, adapter);
588
591
  for (const command of adapter.supportedCommands) {
589
- if (this.adapters.has(command)) {
590
- const existing = this.adapters.get(command);
592
+ if (this.commandMap.has(command)) {
593
+ const existing = this.commandMap.get(command);
591
594
  console.warn(
592
595
  `[AgentAdapterRegistry] Command "${command}" already registered by adapter "${existing.id}". Overwriting with adapter "${adapter.id}".`
593
596
  );
594
597
  }
595
- this.adapters.set(command, adapter);
598
+ this.commandMap.set(command, adapter);
596
599
  }
597
600
  }
598
- /**
599
- * Get an adapter by run command.
600
- *
601
- * @param runCommand - The run command to look up
602
- * @returns The registered adapter, or undefined if not found
603
- */
604
- get(runCommand) {
605
- return this.adapters.get(runCommand);
601
+ /** Get an adapter by CLI command. */
602
+ getByCommand(command) {
603
+ return this.commandMap.get(command);
606
604
  }
607
- /**
608
- * Check if a command has a registered adapter.
609
- *
610
- * @param runCommand - The run command to check
611
- * @returns True if an adapter is registered for this command
612
- */
613
- has(runCommand) {
614
- return this.adapters.has(runCommand);
605
+ /** Get an adapter by adapter ID. */
606
+ getById(adapterId) {
607
+ return this.idMap.get(adapterId);
615
608
  }
616
609
  /**
617
- * Get all registered adapters.
618
- *
619
- * @returns Array of all unique registered adapters
610
+ * Unified lookup: tries CLI command first, then adapter ID.
611
+ * Use this when the identifier could be either a command or an adapter ID.
620
612
  */
613
+ resolve(identifier) {
614
+ return this.commandMap.get(identifier) ?? this.idMap.get(identifier);
615
+ }
616
+ /** Check if a command or adapter ID has a registered adapter. */
617
+ has(identifier) {
618
+ return this.commandMap.has(identifier) || this.idMap.has(identifier);
619
+ }
620
+ /** Get all registered adapters. */
621
621
  getAll() {
622
622
  return Array.from(this.registeredAdapters);
623
623
  }
624
- /**
625
- * Get all supported commands.
626
- *
627
- * @returns Array of all registered run commands
628
- */
624
+ /** Get all supported CLI commands. */
629
625
  getSupportedCommands() {
630
- return Array.from(this.adapters.keys());
626
+ return Array.from(this.commandMap.keys());
627
+ }
628
+ /** Get all registered adapter IDs. */
629
+ getAdapterIds() {
630
+ return Array.from(this.idMap.keys());
631
631
  }
632
632
  /**
633
633
  * Unregister an adapter by its ID.
634
- *
635
634
  * Removes the adapter and all its command mappings.
636
- *
637
- * @param adapterId - The ID of the adapter to remove
638
- * @returns True if the adapter was found and removed
639
635
  */
640
636
  unregister(adapterId) {
641
637
  let found = false;
642
638
  for (const adapter of this.registeredAdapters) {
643
639
  if (adapter.id === adapterId) {
644
640
  this.registeredAdapters.delete(adapter);
641
+ this.idMap.delete(adapterId);
645
642
  found = true;
646
643
  for (const command of adapter.supportedCommands) {
647
- if (this.adapters.get(command) === adapter) {
648
- this.adapters.delete(command);
644
+ if (this.commandMap.get(command) === adapter) {
645
+ this.commandMap.delete(command);
649
646
  }
650
647
  }
651
648
  break;
@@ -653,22 +650,21 @@ var AgentAdapterRegistry = class {
653
650
  }
654
651
  return found;
655
652
  }
656
- /**
657
- * Clear all registered adapters.
658
- * Primarily useful for testing.
659
- */
653
+ /** Clear all registered adapters. Primarily useful for testing. */
660
654
  clear() {
661
- this.adapters.clear();
655
+ this.commandMap.clear();
656
+ this.idMap.clear();
662
657
  this.registeredAdapters.clear();
663
658
  }
664
659
  };
665
660
  var defaultRegistry = new AgentAdapterRegistry();
666
- function getAdapter(runCommand) {
667
- const adapter = defaultRegistry.get(runCommand);
661
+ function getAdapter(identifier) {
662
+ const adapter = defaultRegistry.resolve(identifier);
668
663
  if (!adapter) {
669
- const supported = defaultRegistry.getSupportedCommands();
664
+ const commands = defaultRegistry.getSupportedCommands();
665
+ const ids = defaultRegistry.getAdapterIds();
670
666
  throw new Error(
671
- `No agent adapter registered for command "${runCommand}". Supported commands: ${supported.length > 0 ? supported.join(", ") : "(none registered)"}`
667
+ `No agent adapter registered for "${identifier}". Supported commands: ${commands.length > 0 ? commands.join(", ") : "(none)"}. Registered adapters: ${ids.length > 0 ? ids.join(", ") : "(none)"}`
672
668
  );
673
669
  }
674
670
  return adapter;
@@ -685,12 +681,97 @@ import {
685
681
  LiveTraceEventType,
686
682
  TRACE_EVENT_PREFIX
687
683
  } from "@wix/evalforge-types";
684
+
685
+ // src/run-scenario/agents/claude-code/write-skills.ts
686
+ import { mkdir as mkdir2 } from "fs/promises";
687
+ import { join } from "path";
688
+ import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
689
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder2) {
690
+ await Promise.all(
691
+ skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
692
+ );
693
+ }
694
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2) {
695
+ const skillName = skill.name;
696
+ const skillDir = join(cwd, ".claude", "skills", skillName);
697
+ await mkdir2(skillDir, { recursive: true });
698
+ const version = skill.latestVersion;
699
+ if (version?.files && version.files.length > 0) {
700
+ await writeFilesToDirectory(skillDir, version.files);
701
+ console.log(
702
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
703
+ );
704
+ } else if (skill.source) {
705
+ try {
706
+ const files = await fetchFn(skill.source, {
707
+ userAgent: "EvalForge-Evaluator"
708
+ });
709
+ await writeFilesToDirectory(skillDir, files);
710
+ console.log(
711
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
712
+ );
713
+ } catch (error) {
714
+ const message = error instanceof Error ? error.message : "Unknown error";
715
+ console.error(
716
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
717
+ );
718
+ throw new Error(
719
+ `Failed to write skill ${skillName} to filesystem: ${message}`
720
+ );
721
+ }
722
+ } else {
723
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
724
+ }
725
+ }
726
+
727
+ // src/run-scenario/agents/claude-code/execute.ts
688
728
  import { randomUUID } from "crypto";
689
729
 
690
730
  // src/run-scenario/agents/claude-code/write-mcp.ts
691
731
  import { writeFile as writeFile2 } from "fs/promises";
692
- import { join as join2 } from "path";
732
+ import { join as join3 } from "path";
693
733
  import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
734
+
735
+ // src/run-scenario/agents/claude-code/resolve-mcp-placeholders.ts
736
+ import { readFile } from "fs/promises";
737
+ import { join as join2 } from "path";
738
+ import { homedir } from "os";
739
+ var WIX_AUTH_FILE = join2(homedir(), ".wix", "auth", "api-key.json");
740
+ async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
741
+ try {
742
+ const content = await readFile(authFilePath, "utf-8");
743
+ const auth = JSON.parse(content);
744
+ if (!auth.token || !auth.userInfo?.userId) {
745
+ return {};
746
+ }
747
+ return {
748
+ "wix-auth-token": auth.token,
749
+ "wix-auth-user-id": auth.userInfo.userId
750
+ };
751
+ } catch (err) {
752
+ console.warn(
753
+ `[MCP] Could not load Wix auth file: ${err.message}`
754
+ );
755
+ return {};
756
+ }
757
+ }
758
+ async function resolveMcpPlaceholders(mcpServers, authFilePath) {
759
+ const needed = findPlaceholders(mcpServers);
760
+ if (needed.length === 0) {
761
+ return mcpServers;
762
+ }
763
+ const placeholders = await loadWixAuthPlaceholders(authFilePath);
764
+ const unresolved = needed.filter((key) => !(key in placeholders));
765
+ if (unresolved.length > 0) {
766
+ throw new Error(
767
+ `MCP config contains unresolvable placeholders: ${unresolved.map((k) => `{{${k}}}`).join(", ")}. Ensure ~/.wix/auth/api-key.json exists (run \`npx @wix/cli login\`).`
768
+ );
769
+ }
770
+ console.log(`[MCP] Resolved ${needed.length} placeholder(s)`);
771
+ return resolveValue(mcpServers, placeholders);
772
+ }
773
+
774
+ // src/run-scenario/agents/claude-code/write-mcp.ts
694
775
  async function writeMcpToFilesystem(cwd, mcps) {
695
776
  if (mcps.length === 0) return;
696
777
  const mcpServers = {};
@@ -705,19 +786,20 @@ async function writeMcpToFilesystem(cwd, mcps) {
705
786
  mcpServers[key] = value;
706
787
  }
707
788
  }
789
+ const resolvedServers = await resolveMcpPlaceholders(mcpServers);
708
790
  const content = JSON.stringify(
709
- { [MCP_SERVERS_JSON_KEY]: mcpServers },
791
+ { [MCP_SERVERS_JSON_KEY]: resolvedServers },
710
792
  null,
711
793
  2
712
794
  );
713
- const filePath = join2(cwd, ".mcp.json");
795
+ const filePath = join3(cwd, ".mcp.json");
714
796
  await writeFile2(filePath, content, "utf8");
715
797
  console.log(`[MCP] Written to ${filePath}`);
716
798
  }
717
799
 
718
800
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
719
- import { mkdir as mkdir2, writeFile as writeFile3 } from "fs/promises";
720
- import { join as join3 } from "path";
801
+ import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
802
+ import { join as join4 } from "path";
721
803
  var AGENTS_DIR = ".claude/agents";
722
804
  function toAgentFilename(name, index, nameCount) {
723
805
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -727,20 +809,20 @@ function toAgentFilename(name, index, nameCount) {
727
809
  }
728
810
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
729
811
  if (subAgents.length === 0) return;
730
- const agentsDir = join3(cwd, AGENTS_DIR);
731
- await mkdir2(agentsDir, { recursive: true });
812
+ const agentsDir = join4(cwd, AGENTS_DIR);
813
+ await mkdir3(agentsDir, { recursive: true });
732
814
  const nameCount = /* @__PURE__ */ new Map();
733
815
  for (const [i, agent] of subAgents.entries()) {
734
816
  const filename = toAgentFilename(agent.name, i, nameCount);
735
- const filePath = join3(agentsDir, `${filename}.md`);
817
+ const filePath = join4(agentsDir, `${filename}.md`);
736
818
  await writeFile3(filePath, agent.subAgentMd, "utf8");
737
819
  }
738
820
  console.log(`[SubAgents] Written to ${agentsDir}`);
739
821
  }
740
822
 
741
823
  // src/run-scenario/agents/claude-code/write-rules.ts
742
- import { mkdir as mkdir3, writeFile as writeFile4, readFile } from "fs/promises";
743
- import { join as join4 } from "path";
824
+ import { mkdir as mkdir4, writeFile as writeFile4, readFile as readFile2 } from "fs/promises";
825
+ import { join as join5 } from "path";
744
826
  var CURSOR_RULES_DIR = ".cursor/rules";
745
827
  function toRuleFilename(name, index, nameCount) {
746
828
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -751,7 +833,7 @@ function toRuleFilename(name, index, nameCount) {
751
833
  async function appendToFile(filePath, content) {
752
834
  let existing = "";
753
835
  try {
754
- existing = await readFile(filePath, "utf8");
836
+ existing = await readFile2(filePath, "utf8");
755
837
  } catch {
756
838
  }
757
839
  const merged = existing ? `${existing.trimEnd()}
@@ -766,20 +848,20 @@ async function writeRulesToFilesystem(cwd, rules) {
766
848
  for (const [i, rule] of rules.entries()) {
767
849
  switch (rule.ruleType) {
768
850
  case "claude-md": {
769
- await appendToFile(join4(cwd, "CLAUDE.md"), rule.content);
851
+ await appendToFile(join5(cwd, "CLAUDE.md"), rule.content);
770
852
  break;
771
853
  }
772
854
  case "agents-md": {
773
- await appendToFile(join4(cwd, "AGENTS.md"), rule.content);
855
+ await appendToFile(join5(cwd, "AGENTS.md"), rule.content);
774
856
  break;
775
857
  }
776
858
  case "cursor-rule": {
777
859
  if (!hasCursorRules) {
778
- await mkdir3(join4(cwd, CURSOR_RULES_DIR), { recursive: true });
860
+ await mkdir4(join5(cwd, CURSOR_RULES_DIR), { recursive: true });
779
861
  hasCursorRules = true;
780
862
  }
781
863
  const filename = toRuleFilename(rule.name, i, nameCount);
782
- const filePath = join4(cwd, CURSOR_RULES_DIR, `${filename}.md`);
864
+ const filePath = join5(cwd, CURSOR_RULES_DIR, `${filename}.md`);
783
865
  await writeFile4(filePath, rule.content, "utf8");
784
866
  break;
785
867
  }
@@ -1815,7 +1897,7 @@ defaultRegistry.register(claudeCodeAdapter);
1815
1897
 
1816
1898
  // src/run-scenario/file-diff.ts
1817
1899
  import { readdirSync, readFileSync as readFileSync2, statSync, existsSync as existsSync2 } from "fs";
1818
- import { join as join6, relative } from "path";
1900
+ import { join as join7, relative } from "path";
1819
1901
 
1820
1902
  // ../../node_modules/diff/lib/index.mjs
1821
1903
  function Diff() {
@@ -1991,7 +2073,7 @@ Diff.prototype = {
1991
2073
  tokenize: function tokenize(value) {
1992
2074
  return Array.from(value);
1993
2075
  },
1994
- join: function join5(chars) {
2076
+ join: function join6(chars) {
1995
2077
  return chars.join("");
1996
2078
  },
1997
2079
  postProcess: function postProcess(changeObjects) {
@@ -2431,7 +2513,7 @@ function snapshotDirectory(dir, baseDir) {
2431
2513
  }
2432
2514
  const entries = readdirSync(dir, { withFileTypes: true });
2433
2515
  for (const entry of entries) {
2434
- const fullPath = join6(dir, entry.name);
2516
+ const fullPath = join7(dir, entry.name);
2435
2517
  const relativePath = relative(base, fullPath);
2436
2518
  if (shouldIgnore(entry.name)) {
2437
2519
  continue;
@@ -2540,18 +2622,21 @@ function extractTemplateFiles(before, after) {
2540
2622
  }
2541
2623
 
2542
2624
  // src/run-scenario/run-agent-with-context.ts
2543
- import { AgentRunCommand as AgentRunCommand2 } from "@wix/evalforge-types";
2625
+ import { AgentRunCommand as AgentRunCommand2, AgentType } from "@wix/evalforge-types";
2544
2626
  var DEFAULT_AGENT_COMMAND = AgentRunCommand2.CLAUDE;
2545
2627
  async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
2546
2628
  const skillsGroupId = evalData.evalRun.skillsGroupId;
2629
+ const agent = evalData.agent ?? void 0;
2630
+ const isSDK = agent?.agentType === AgentType.SDK;
2547
2631
  if (!skillsGroupId) {
2548
2632
  throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
2549
2633
  }
2550
- const agent = evalData.codeAgent ?? void 0;
2551
- const runCommand = agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2552
- const adapter = getAdapter(runCommand);
2634
+ const identifier = isSDK ? agent.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2635
+ const adapter = getAdapter(identifier);
2553
2636
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
2554
2637
  const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
2638
+ const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
2639
+ const targetName = evalData.skillsGroupName || agent?.name || "";
2555
2640
  const executionContext = {
2556
2641
  skills: evalData.skills,
2557
2642
  scenario,
@@ -2563,8 +2648,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2563
2648
  evalRunId: evalRunId2,
2564
2649
  scenarioId: scenario.id,
2565
2650
  scenarioName: scenario.name,
2566
- targetId: skillsGroupId,
2567
- targetName: evalData.skillsGroupName,
2651
+ targetId,
2652
+ targetName,
2568
2653
  tracePushUrl: config.tracePushUrl,
2569
2654
  routeHeader: config.routeHeader,
2570
2655
  authToken: config.authToken
@@ -2581,8 +2666,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2581
2666
  const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
2582
2667
  return {
2583
2668
  id: randomUUID2(),
2584
- targetId: skillsGroupId,
2585
- targetName: evalData.skillsGroupName,
2669
+ targetId,
2670
+ targetName,
2586
2671
  scenarioId: scenario.id,
2587
2672
  scenarioName: scenario.name,
2588
2673
  modelConfig: agent?.modelConfig,
@@ -2598,11 +2683,11 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2598
2683
 
2599
2684
  // src/run-scenario/index.ts
2600
2685
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
2601
- const skillsGroupId = evalData.evalRun.skillsGroupId;
2686
+ const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
2602
2687
  const workDir = await prepareWorkingDirectory(
2603
2688
  config,
2604
2689
  evalRunId2,
2605
- skillsGroupId,
2690
+ targetId,
2606
2691
  scenario.id,
2607
2692
  template
2608
2693
  );
@@ -2630,7 +2715,8 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2630
2715
  })),
2631
2716
  durationMs: partialResult.duration
2632
2717
  };
2633
- const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
2718
+ const { "x-wix-ai-gateway-stream": _ignored, ...judgeHeaders } = config.aiGatewayHeaders;
2719
+ void _ignored;
2634
2720
  const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
2635
2721
  const assertionContext = {
2636
2722
  workDir,
@@ -2807,7 +2893,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2807
2893
  scenarioItemCount: evalData.scenarioItems.length,
2808
2894
  scenarios: evalData.scenarioItems.map((s) => s.scenario.name),
2809
2895
  skillsCount: evalData.skills.length,
2810
- hasCodeAgent: !!evalData.codeAgent,
2896
+ hasAgent: !!evalData.agent,
2811
2897
  timestamp: Date.now()
2812
2898
  })
2813
2899
  );
@@ -2825,14 +2911,14 @@ async function runEvaluation(projectId2, evalRunId2) {
2825
2911
  `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
2826
2912
  );
2827
2913
  }
2828
- const { codeAgent, skills, scenarioItems } = evalData;
2914
+ const { agent, skills, scenarioItems } = evalData;
2829
2915
  state.currentPhase = ExecutionPhase.VALIDATION;
2830
2916
  state.currentContext = {
2831
2917
  projectId: projectId2,
2832
2918
  evalRunId: evalRunId2,
2833
2919
  scenarioCount: scenarioItems.length,
2834
2920
  skillCount: skills.length,
2835
- hasAgent: !!codeAgent,
2921
+ hasAgent: !!agent,
2836
2922
  agentId: evalData.evalRun.agentId,
2837
2923
  skillsGroupId: evalData.evalRun.skillsGroupId
2838
2924
  };
@@ -2841,9 +2927,9 @@ async function runEvaluation(projectId2, evalRunId2) {
2841
2927
  `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
2842
2928
  );
2843
2929
  }
2844
- if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
2930
+ if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
2845
2931
  throw new Error(
2846
- `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2932
+ `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2847
2933
  );
2848
2934
  }
2849
2935
  let completedScenarios = 0;
@@ -2857,8 +2943,8 @@ async function runEvaluation(projectId2, evalRunId2) {
2857
2943
  scenarioName: scenario.name,
2858
2944
  skillsGroupId: evalData.evalRun.skillsGroupId,
2859
2945
  skillsGroupName: evalData.skillsGroupName,
2860
- agentId: codeAgent?.id,
2861
- agentName: codeAgent?.name,
2946
+ agentId: agent?.id,
2947
+ agentName: agent?.name,
2862
2948
  progress: `${completedScenarios + 1}/${totalScenarios}`
2863
2949
  };
2864
2950
  const skillNames = evalData.skills.map((s) => s.name).join(", ");
@@ -2866,7 +2952,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2866
2952
  "[Evaluator] Running scenario with skills group:",
2867
2953
  evalData.skillsGroupName,
2868
2954
  skillNames ? `(${skillNames})` : "",
2869
- codeAgent ? `with agent: ${codeAgent.name}` : "",
2955
+ agent ? `with agent: ${agent.name}` : "",
2870
2956
  `(${completedScenarios + 1}/${totalScenarios})`
2871
2957
  );
2872
2958
  try {