@wix/evalforge-evaluator 0.98.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -207,6 +207,53 @@ function createApiClient(serverUrl, options = "") {
207
207
 
208
208
  // src/fetch-evaluation-data.ts
209
209
  var import_evalforge_types = require("@wix/evalforge-types");
210
+
211
+ // src/resolve-placeholders.ts
212
+ var PLACEHOLDER_PATTERN = /\{\{([^}]+)\}\}/g;
213
+ function findPlaceholders(value) {
214
+ const keys = /* @__PURE__ */ new Set();
215
+ collectPlaceholders(value, keys);
216
+ return [...keys];
217
+ }
218
+ function collectPlaceholders(value, keys) {
219
+ if (typeof value === "string") {
220
+ for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
221
+ keys.add(match[1].trim());
222
+ }
223
+ } else if (Array.isArray(value)) {
224
+ for (const item of value) {
225
+ collectPlaceholders(item, keys);
226
+ }
227
+ } else if (typeof value === "object" && value !== null) {
228
+ for (const val of Object.values(value)) {
229
+ collectPlaceholders(val, keys);
230
+ }
231
+ }
232
+ }
233
+ function resolveValue(value, placeholders) {
234
+ if (typeof value === "string") {
235
+ return value.replace(PLACEHOLDER_PATTERN, (match, key) => {
236
+ const trimmed = key.trim();
237
+ return trimmed in placeholders ? placeholders[trimmed] : match;
238
+ });
239
+ }
240
+ if (Array.isArray(value)) {
241
+ return value.map((item) => resolveValue(item, placeholders));
242
+ }
243
+ if (typeof value === "object" && value !== null) {
244
+ const result = {};
245
+ for (const [k, v] of Object.entries(value)) {
246
+ result[k] = resolveValue(v, placeholders);
247
+ }
248
+ return result;
249
+ }
250
+ return value;
251
+ }
252
+ function resolvePlaceholdersInString(text, placeholders) {
253
+ return resolveValue(text, placeholders);
254
+ }
255
+
256
+ // src/fetch-evaluation-data.ts
210
257
  function parseSkillNamesFromParams(value) {
211
258
  if (typeof value !== "string") {
212
259
  return [];
@@ -222,13 +269,11 @@ function applyParamsToAssertion(assertion, params) {
222
269
  return assertion;
223
270
  }
224
271
  if (assertion.type === "llm_judge") {
225
- let prompt = assertion.prompt;
272
+ const stringParams = {};
226
273
  for (const [key, value] of Object.entries(params)) {
227
- const placeholder = `{{${key}}}`;
228
- const escapedPlaceholder = placeholder.replace(/[{}]/g, "\\$&");
229
- const replacement = String(value ?? "");
230
- prompt = prompt.replace(new RegExp(escapedPlaceholder, "g"), replacement);
274
+ stringParams[key] = String(value ?? "");
231
275
  }
276
+ const prompt = resolvePlaceholdersInString(assertion.prompt, stringParams);
232
277
  return {
233
278
  ...assertion,
234
279
  prompt,
@@ -337,9 +382,9 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
337
382
  const scenarios = await Promise.all(
338
383
  evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
339
384
  );
340
- let codeAgent = null;
385
+ let agent = null;
341
386
  if (evalRun.agentId) {
342
- codeAgent = await api.getAgent(projectId2, evalRun.agentId);
387
+ agent = await api.getAgent(projectId2, evalRun.agentId);
343
388
  }
344
389
  let skills = [];
345
390
  let skillsGroup = null;
@@ -440,7 +485,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
440
485
  const skillsGroupName = skillsGroup?.name ?? "";
441
486
  return {
442
487
  evalRun,
443
- codeAgent,
488
+ agent,
444
489
  skills,
445
490
  skillsGroup,
446
491
  skillsGroupName,
@@ -459,56 +504,18 @@ var import_eval_assertions = require("@wix/eval-assertions");
459
504
  var import_fs = require("fs");
460
505
  var import_os = require("os");
461
506
  var import_path2 = __toESM(require("path"));
462
- var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
507
+ var import_evalforge_github_client = require("@wix/evalforge-github-client");
463
508
 
464
- // src/run-scenario/agents/claude-code/write-skills.ts
509
+ // src/run-scenario/utils/write-files.ts
465
510
  var import_promises = require("fs/promises");
466
511
  var import_path = require("path");
467
- var import_evalforge_github_client = require("@wix/evalforge-github-client");
468
- async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
469
- await Promise.all(
470
- skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
471
- );
472
- }
473
- async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client.fetchGitHubFolder) {
474
- const skillName = skill.name;
475
- const skillDir = (0, import_path.join)(cwd, ".claude", "skills", skillName);
476
- await (0, import_promises.mkdir)(skillDir, { recursive: true });
477
- const version = skill.latestVersion;
478
- if (version?.files && version.files.length > 0) {
479
- await writeSkillFiles(skillDir, version.files);
480
- console.log(
481
- `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
482
- );
483
- } else if (skill.source) {
484
- try {
485
- const files = await fetchFn(skill.source, {
486
- userAgent: "EvalForge-Evaluator"
487
- });
488
- await writeSkillFiles(skillDir, files);
489
- console.log(
490
- `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
491
- );
492
- } catch (error) {
493
- const message = error instanceof Error ? error.message : "Unknown error";
494
- console.error(
495
- `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
496
- );
497
- throw new Error(
498
- `Failed to write skill ${skillName} to filesystem: ${message}`
499
- );
500
- }
501
- } else {
502
- throw new Error(`Skill ${skillName} has no files and no source configured`);
503
- }
504
- }
505
- async function writeSkillFiles(skillDir, files) {
506
- const resolvedBase = (0, import_path.resolve)(skillDir);
512
+ async function writeFilesToDirectory(targetDir, files) {
513
+ const resolvedBase = (0, import_path.resolve)(targetDir);
507
514
  for (const file of files) {
508
- const filePath = (0, import_path.resolve)(skillDir, file.path);
515
+ const filePath = (0, import_path.resolve)(targetDir, file.path);
509
516
  if (!filePath.startsWith(resolvedBase + import_path.sep) && filePath !== resolvedBase) {
510
517
  throw new Error(
511
- `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
518
+ `Path traversal detected: "${file.path}" resolves outside target directory`
512
519
  );
513
520
  }
514
521
  await (0, import_promises.mkdir)((0, import_path.dirname)(filePath), { recursive: true });
@@ -524,10 +531,10 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
524
531
  );
525
532
  return;
526
533
  }
527
- const files = await (0, import_evalforge_github_client2.fetchGitHubFolder)(template.source, {
534
+ const files = await (0, import_evalforge_github_client.fetchGitHubFolder)(template.source, {
528
535
  userAgent: "EvalForge-Evaluator"
529
536
  });
530
- await writeSkillFiles(workDir, files);
537
+ await writeFilesToDirectory(workDir, files);
531
538
  }
532
539
  function writeWixEnvFile(workDir) {
533
540
  const configPath = import_path2.default.join(workDir, "wix.config.json");
@@ -581,86 +588,76 @@ var import_crypto2 = require("crypto");
581
588
 
582
589
  // src/run-scenario/agents/registry.ts
583
590
  var AgentAdapterRegistry = class {
584
- /**
585
- * Map of run commands to their registered adapters.
586
- * Multiple commands can map to the same adapter.
587
- */
588
- adapters = /* @__PURE__ */ new Map();
589
- /**
590
- * Set of all registered adapter instances (for getAll).
591
- */
591
+ /** Map of CLI commands to their registered adapters. */
592
+ commandMap = /* @__PURE__ */ new Map();
593
+ /** Map of adapter IDs to their registered adapters. */
594
+ idMap = /* @__PURE__ */ new Map();
595
+ /** Set of all registered adapter instances (for getAll). */
592
596
  registeredAdapters = /* @__PURE__ */ new Set();
593
597
  /**
594
598
  * Register an agent adapter.
595
599
  *
596
- * The adapter will be registered for all commands in its supportedCommands array.
597
- * If a command is already registered, it will be overwritten with a warning.
598
- *
599
- * @param adapter - The adapter to register
600
+ * The adapter is registered by its ID and for all commands in its supportedCommands array.
601
+ * If a command or ID is already registered, it will be overwritten with a warning.
600
602
  */
601
603
  register(adapter) {
602
604
  this.registeredAdapters.add(adapter);
605
+ this.idMap.set(adapter.id, adapter);
603
606
  for (const command of adapter.supportedCommands) {
604
- if (this.adapters.has(command)) {
605
- const existing = this.adapters.get(command);
607
+ if (this.commandMap.has(command)) {
608
+ const existing = this.commandMap.get(command);
606
609
  console.warn(
607
610
  `[AgentAdapterRegistry] Command "${command}" already registered by adapter "${existing.id}". Overwriting with adapter "${adapter.id}".`
608
611
  );
609
612
  }
610
- this.adapters.set(command, adapter);
613
+ this.commandMap.set(command, adapter);
611
614
  }
612
615
  }
613
- /**
614
- * Get an adapter by run command.
615
- *
616
- * @param runCommand - The run command to look up
617
- * @returns The registered adapter, or undefined if not found
618
- */
619
- get(runCommand) {
620
- return this.adapters.get(runCommand);
616
+ /** Get an adapter by CLI command. */
617
+ getByCommand(command) {
618
+ return this.commandMap.get(command);
621
619
  }
622
- /**
623
- * Check if a command has a registered adapter.
624
- *
625
- * @param runCommand - The run command to check
626
- * @returns True if an adapter is registered for this command
627
- */
628
- has(runCommand) {
629
- return this.adapters.has(runCommand);
620
+ /** Get an adapter by adapter ID. */
621
+ getById(adapterId) {
622
+ return this.idMap.get(adapterId);
630
623
  }
631
624
  /**
632
- * Get all registered adapters.
633
- *
634
- * @returns Array of all unique registered adapters
625
+ * Unified lookup: tries CLI command first, then adapter ID.
626
+ * Use this when the identifier could be either a command or an adapter ID.
635
627
  */
628
+ resolve(identifier) {
629
+ return this.commandMap.get(identifier) ?? this.idMap.get(identifier);
630
+ }
631
+ /** Check if a command or adapter ID has a registered adapter. */
632
+ has(identifier) {
633
+ return this.commandMap.has(identifier) || this.idMap.has(identifier);
634
+ }
635
+ /** Get all registered adapters. */
636
636
  getAll() {
637
637
  return Array.from(this.registeredAdapters);
638
638
  }
639
- /**
640
- * Get all supported commands.
641
- *
642
- * @returns Array of all registered run commands
643
- */
639
+ /** Get all supported CLI commands. */
644
640
  getSupportedCommands() {
645
- return Array.from(this.adapters.keys());
641
+ return Array.from(this.commandMap.keys());
642
+ }
643
+ /** Get all registered adapter IDs. */
644
+ getAdapterIds() {
645
+ return Array.from(this.idMap.keys());
646
646
  }
647
647
  /**
648
648
  * Unregister an adapter by its ID.
649
- *
650
649
  * Removes the adapter and all its command mappings.
651
- *
652
- * @param adapterId - The ID of the adapter to remove
653
- * @returns True if the adapter was found and removed
654
650
  */
655
651
  unregister(adapterId) {
656
652
  let found = false;
657
653
  for (const adapter of this.registeredAdapters) {
658
654
  if (adapter.id === adapterId) {
659
655
  this.registeredAdapters.delete(adapter);
656
+ this.idMap.delete(adapterId);
660
657
  found = true;
661
658
  for (const command of adapter.supportedCommands) {
662
- if (this.adapters.get(command) === adapter) {
663
- this.adapters.delete(command);
659
+ if (this.commandMap.get(command) === adapter) {
660
+ this.commandMap.delete(command);
664
661
  }
665
662
  }
666
663
  break;
@@ -668,22 +665,21 @@ var AgentAdapterRegistry = class {
668
665
  }
669
666
  return found;
670
667
  }
671
- /**
672
- * Clear all registered adapters.
673
- * Primarily useful for testing.
674
- */
668
+ /** Clear all registered adapters. Primarily useful for testing. */
675
669
  clear() {
676
- this.adapters.clear();
670
+ this.commandMap.clear();
671
+ this.idMap.clear();
677
672
  this.registeredAdapters.clear();
678
673
  }
679
674
  };
680
675
  var defaultRegistry = new AgentAdapterRegistry();
681
- function getAdapter(runCommand) {
682
- const adapter = defaultRegistry.get(runCommand);
676
+ function getAdapter(identifier) {
677
+ const adapter = defaultRegistry.resolve(identifier);
683
678
  if (!adapter) {
684
- const supported = defaultRegistry.getSupportedCommands();
679
+ const commands = defaultRegistry.getSupportedCommands();
680
+ const ids = defaultRegistry.getAdapterIds();
685
681
  throw new Error(
686
- `No agent adapter registered for command "${runCommand}". Supported commands: ${supported.length > 0 ? supported.join(", ") : "(none registered)"}`
682
+ `No agent adapter registered for "${identifier}". Supported commands: ${commands.length > 0 ? commands.join(", ") : "(none)"}. Registered adapters: ${ids.length > 0 ? ids.join(", ") : "(none)"}`
687
683
  );
688
684
  }
689
685
  return adapter;
@@ -694,12 +690,97 @@ var import_evalforge_types4 = require("@wix/evalforge-types");
694
690
 
695
691
  // src/run-scenario/agents/claude-code/execute.ts
696
692
  var import_evalforge_types3 = require("@wix/evalforge-types");
697
- var import_crypto = require("crypto");
698
693
 
699
- // src/run-scenario/agents/claude-code/write-mcp.ts
694
+ // src/run-scenario/agents/claude-code/write-skills.ts
700
695
  var import_promises2 = require("fs/promises");
701
696
  var import_path3 = require("path");
697
+ var import_evalforge_github_client2 = require("@wix/evalforge-github-client");
698
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
699
+ await Promise.all(
700
+ skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
701
+ );
702
+ }
703
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client2.fetchGitHubFolder) {
704
+ const skillName = skill.name;
705
+ const skillDir = (0, import_path3.join)(cwd, ".claude", "skills", skillName);
706
+ await (0, import_promises2.mkdir)(skillDir, { recursive: true });
707
+ const version = skill.latestVersion;
708
+ if (version?.files && version.files.length > 0) {
709
+ await writeFilesToDirectory(skillDir, version.files);
710
+ console.log(
711
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
712
+ );
713
+ } else if (skill.source) {
714
+ try {
715
+ const files = await fetchFn(skill.source, {
716
+ userAgent: "EvalForge-Evaluator"
717
+ });
718
+ await writeFilesToDirectory(skillDir, files);
719
+ console.log(
720
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
721
+ );
722
+ } catch (error) {
723
+ const message = error instanceof Error ? error.message : "Unknown error";
724
+ console.error(
725
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
726
+ );
727
+ throw new Error(
728
+ `Failed to write skill ${skillName} to filesystem: ${message}`
729
+ );
730
+ }
731
+ } else {
732
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
733
+ }
734
+ }
735
+
736
+ // src/run-scenario/agents/claude-code/execute.ts
737
+ var import_crypto = require("crypto");
738
+
739
+ // src/run-scenario/agents/claude-code/write-mcp.ts
740
+ var import_promises4 = require("fs/promises");
741
+ var import_path5 = require("path");
702
742
  var import_evalforge_types2 = require("@wix/evalforge-types");
743
+
744
+ // src/run-scenario/agents/claude-code/resolve-mcp-placeholders.ts
745
+ var import_promises3 = require("fs/promises");
746
+ var import_path4 = require("path");
747
+ var import_os2 = require("os");
748
+ var WIX_AUTH_FILE = (0, import_path4.join)((0, import_os2.homedir)(), ".wix", "auth", "api-key.json");
749
+ async function loadWixAuthPlaceholders(authFilePath = WIX_AUTH_FILE) {
750
+ try {
751
+ const content = await (0, import_promises3.readFile)(authFilePath, "utf-8");
752
+ const auth = JSON.parse(content);
753
+ if (!auth.token || !auth.userInfo?.userId) {
754
+ return {};
755
+ }
756
+ return {
757
+ "wix-auth-token": auth.token,
758
+ "wix-auth-user-id": auth.userInfo.userId
759
+ };
760
+ } catch (err) {
761
+ console.warn(
762
+ `[MCP] Could not load Wix auth file: ${err.message}`
763
+ );
764
+ return {};
765
+ }
766
+ }
767
+ async function resolveMcpPlaceholders(mcpServers, authFilePath) {
768
+ const needed = findPlaceholders(mcpServers);
769
+ if (needed.length === 0) {
770
+ return mcpServers;
771
+ }
772
+ const placeholders = await loadWixAuthPlaceholders(authFilePath);
773
+ const unresolved = needed.filter((key) => !(key in placeholders));
774
+ if (unresolved.length > 0) {
775
+ throw new Error(
776
+ `MCP config contains unresolvable placeholders: ${unresolved.map((k) => `{{${k}}}`).join(", ")}. Ensure ~/.wix/auth/api-key.json exists (run \`npx @wix/cli login\`).`
777
+ );
778
+ }
779
+ console.log(`[MCP] Resolved ${needed.length} placeholder(s)`);
780
+ return resolveValue(mcpServers, placeholders);
781
+ }
782
+
783
+ // src/run-scenario/agents/claude-code/write-mcp.ts
703
784
  async function writeMcpToFilesystem(cwd, mcps) {
704
785
  if (mcps.length === 0) return;
705
786
  const mcpServers = {};
@@ -714,19 +795,20 @@ async function writeMcpToFilesystem(cwd, mcps) {
714
795
  mcpServers[key] = value;
715
796
  }
716
797
  }
798
+ const resolvedServers = await resolveMcpPlaceholders(mcpServers);
717
799
  const content = JSON.stringify(
718
- { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: mcpServers },
800
+ { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: resolvedServers },
719
801
  null,
720
802
  2
721
803
  );
722
- const filePath = (0, import_path3.join)(cwd, ".mcp.json");
723
- await (0, import_promises2.writeFile)(filePath, content, "utf8");
804
+ const filePath = (0, import_path5.join)(cwd, ".mcp.json");
805
+ await (0, import_promises4.writeFile)(filePath, content, "utf8");
724
806
  console.log(`[MCP] Written to ${filePath}`);
725
807
  }
726
808
 
727
809
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
728
- var import_promises3 = require("fs/promises");
729
- var import_path4 = require("path");
810
+ var import_promises5 = require("fs/promises");
811
+ var import_path6 = require("path");
730
812
  var AGENTS_DIR = ".claude/agents";
731
813
  function toAgentFilename(name, index, nameCount) {
732
814
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -736,20 +818,20 @@ function toAgentFilename(name, index, nameCount) {
736
818
  }
737
819
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
738
820
  if (subAgents.length === 0) return;
739
- const agentsDir = (0, import_path4.join)(cwd, AGENTS_DIR);
740
- await (0, import_promises3.mkdir)(agentsDir, { recursive: true });
821
+ const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
822
+ await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
741
823
  const nameCount = /* @__PURE__ */ new Map();
742
824
  for (const [i, agent] of subAgents.entries()) {
743
825
  const filename = toAgentFilename(agent.name, i, nameCount);
744
- const filePath = (0, import_path4.join)(agentsDir, `${filename}.md`);
745
- await (0, import_promises3.writeFile)(filePath, agent.subAgentMd, "utf8");
826
+ const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
827
+ await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
746
828
  }
747
829
  console.log(`[SubAgents] Written to ${agentsDir}`);
748
830
  }
749
831
 
750
832
  // src/run-scenario/agents/claude-code/write-rules.ts
751
- var import_promises4 = require("fs/promises");
752
- var import_path5 = require("path");
833
+ var import_promises6 = require("fs/promises");
834
+ var import_path7 = require("path");
753
835
  var CURSOR_RULES_DIR = ".cursor/rules";
754
836
  function toRuleFilename(name, index, nameCount) {
755
837
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
@@ -760,13 +842,13 @@ function toRuleFilename(name, index, nameCount) {
760
842
  async function appendToFile(filePath, content) {
761
843
  let existing = "";
762
844
  try {
763
- existing = await (0, import_promises4.readFile)(filePath, "utf8");
845
+ existing = await (0, import_promises6.readFile)(filePath, "utf8");
764
846
  } catch {
765
847
  }
766
848
  const merged = existing ? `${existing.trimEnd()}
767
849
 
768
850
  ${content}` : content;
769
- await (0, import_promises4.writeFile)(filePath, merged, "utf8");
851
+ await (0, import_promises6.writeFile)(filePath, merged, "utf8");
770
852
  }
771
853
  async function writeRulesToFilesystem(cwd, rules) {
772
854
  if (rules.length === 0) return;
@@ -775,21 +857,21 @@ async function writeRulesToFilesystem(cwd, rules) {
775
857
  for (const [i, rule] of rules.entries()) {
776
858
  switch (rule.ruleType) {
777
859
  case "claude-md": {
778
- await appendToFile((0, import_path5.join)(cwd, "CLAUDE.md"), rule.content);
860
+ await appendToFile((0, import_path7.join)(cwd, "CLAUDE.md"), rule.content);
779
861
  break;
780
862
  }
781
863
  case "agents-md": {
782
- await appendToFile((0, import_path5.join)(cwd, "AGENTS.md"), rule.content);
864
+ await appendToFile((0, import_path7.join)(cwd, "AGENTS.md"), rule.content);
783
865
  break;
784
866
  }
785
867
  case "cursor-rule": {
786
868
  if (!hasCursorRules) {
787
- await (0, import_promises4.mkdir)((0, import_path5.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
869
+ await (0, import_promises6.mkdir)((0, import_path7.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
788
870
  hasCursorRules = true;
789
871
  }
790
872
  const filename = toRuleFilename(rule.name, i, nameCount);
791
- const filePath = (0, import_path5.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
792
- await (0, import_promises4.writeFile)(filePath, rule.content, "utf8");
873
+ const filePath = (0, import_path7.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
874
+ await (0, import_promises6.writeFile)(filePath, rule.content, "utf8");
793
875
  break;
794
876
  }
795
877
  }
@@ -1824,7 +1906,7 @@ defaultRegistry.register(claudeCodeAdapter);
1824
1906
 
1825
1907
  // src/run-scenario/file-diff.ts
1826
1908
  var import_fs2 = require("fs");
1827
- var import_path6 = require("path");
1909
+ var import_path8 = require("path");
1828
1910
 
1829
1911
  // ../../node_modules/diff/lib/index.mjs
1830
1912
  function Diff() {
@@ -2000,7 +2082,7 @@ Diff.prototype = {
2000
2082
  tokenize: function tokenize(value) {
2001
2083
  return Array.from(value);
2002
2084
  },
2003
- join: function join5(chars) {
2085
+ join: function join6(chars) {
2004
2086
  return chars.join("");
2005
2087
  },
2006
2088
  postProcess: function postProcess(changeObjects) {
@@ -2440,8 +2522,8 @@ function snapshotDirectory(dir, baseDir) {
2440
2522
  }
2441
2523
  const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
2442
2524
  for (const entry of entries) {
2443
- const fullPath = (0, import_path6.join)(dir, entry.name);
2444
- const relativePath = (0, import_path6.relative)(base, fullPath);
2525
+ const fullPath = (0, import_path8.join)(dir, entry.name);
2526
+ const relativePath = (0, import_path8.relative)(base, fullPath);
2445
2527
  if (shouldIgnore(entry.name)) {
2446
2528
  continue;
2447
2529
  }
@@ -2553,14 +2635,17 @@ var import_evalforge_types5 = require("@wix/evalforge-types");
2553
2635
  var DEFAULT_AGENT_COMMAND = import_evalforge_types5.AgentRunCommand.CLAUDE;
2554
2636
  async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
2555
2637
  const skillsGroupId = evalData.evalRun.skillsGroupId;
2638
+ const agent = evalData.agent ?? void 0;
2639
+ const isSDK = agent?.agentType === import_evalforge_types5.AgentType.SDK;
2556
2640
  if (!skillsGroupId) {
2557
2641
  throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
2558
2642
  }
2559
- const agent = evalData.codeAgent ?? void 0;
2560
- const runCommand = agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2561
- const adapter = getAdapter(runCommand);
2643
+ const identifier = isSDK ? agent.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
2644
+ const adapter = getAdapter(identifier);
2562
2645
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
2563
2646
  const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
2647
+ const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
2648
+ const targetName = evalData.skillsGroupName || agent?.name || "";
2564
2649
  const executionContext = {
2565
2650
  skills: evalData.skills,
2566
2651
  scenario,
@@ -2572,8 +2657,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2572
2657
  evalRunId: evalRunId2,
2573
2658
  scenarioId: scenario.id,
2574
2659
  scenarioName: scenario.name,
2575
- targetId: skillsGroupId,
2576
- targetName: evalData.skillsGroupName,
2660
+ targetId,
2661
+ targetName,
2577
2662
  tracePushUrl: config.tracePushUrl,
2578
2663
  routeHeader: config.routeHeader,
2579
2664
  authToken: config.authToken
@@ -2590,8 +2675,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2590
2675
  const templateFiles = workDir ? extractTemplateFiles(beforeSnapshot, afterSnapshot) : void 0;
2591
2676
  return {
2592
2677
  id: (0, import_crypto2.randomUUID)(),
2593
- targetId: skillsGroupId,
2594
- targetName: evalData.skillsGroupName,
2678
+ targetId,
2679
+ targetName,
2595
2680
  scenarioId: scenario.id,
2596
2681
  scenarioName: scenario.name,
2597
2682
  modelConfig: agent?.modelConfig,
@@ -2607,11 +2692,11 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2607
2692
 
2608
2693
  // src/run-scenario/index.ts
2609
2694
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
2610
- const skillsGroupId = evalData.evalRun.skillsGroupId;
2695
+ const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
2611
2696
  const workDir = await prepareWorkingDirectory(
2612
2697
  config,
2613
2698
  evalRunId2,
2614
- skillsGroupId,
2699
+ targetId,
2615
2700
  scenario.id,
2616
2701
  template
2617
2702
  );
@@ -2639,7 +2724,8 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2639
2724
  })),
2640
2725
  durationMs: partialResult.duration
2641
2726
  };
2642
- const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
2727
+ const { "x-wix-ai-gateway-stream": _ignored, ...judgeHeaders } = config.aiGatewayHeaders;
2728
+ void _ignored;
2643
2729
  const defaultJudgeModel = import_evalforge_types6.DEFAULT_JUDGE_MODEL;
2644
2730
  const assertionContext = {
2645
2731
  workDir,
@@ -2816,7 +2902,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2816
2902
  scenarioItemCount: evalData.scenarioItems.length,
2817
2903
  scenarios: evalData.scenarioItems.map((s) => s.scenario.name),
2818
2904
  skillsCount: evalData.skills.length,
2819
- hasCodeAgent: !!evalData.codeAgent,
2905
+ hasAgent: !!evalData.agent,
2820
2906
  timestamp: Date.now()
2821
2907
  })
2822
2908
  );
@@ -2834,14 +2920,14 @@ async function runEvaluation(projectId2, evalRunId2) {
2834
2920
  `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
2835
2921
  );
2836
2922
  }
2837
- const { codeAgent, skills, scenarioItems } = evalData;
2923
+ const { agent, skills, scenarioItems } = evalData;
2838
2924
  state.currentPhase = ExecutionPhase.VALIDATION;
2839
2925
  state.currentContext = {
2840
2926
  projectId: projectId2,
2841
2927
  evalRunId: evalRunId2,
2842
2928
  scenarioCount: scenarioItems.length,
2843
2929
  skillCount: skills.length,
2844
- hasAgent: !!codeAgent,
2930
+ hasAgent: !!agent,
2845
2931
  agentId: evalData.evalRun.agentId,
2846
2932
  skillsGroupId: evalData.evalRun.skillsGroupId
2847
2933
  };
@@ -2850,9 +2936,9 @@ async function runEvaluation(projectId2, evalRunId2) {
2850
2936
  `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
2851
2937
  );
2852
2938
  }
2853
- if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
2939
+ if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
2854
2940
  throw new Error(
2855
- `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2941
+ `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
2856
2942
  );
2857
2943
  }
2858
2944
  let completedScenarios = 0;
@@ -2866,8 +2952,8 @@ async function runEvaluation(projectId2, evalRunId2) {
2866
2952
  scenarioName: scenario.name,
2867
2953
  skillsGroupId: evalData.evalRun.skillsGroupId,
2868
2954
  skillsGroupName: evalData.skillsGroupName,
2869
- agentId: codeAgent?.id,
2870
- agentName: codeAgent?.name,
2955
+ agentId: agent?.id,
2956
+ agentName: agent?.name,
2871
2957
  progress: `${completedScenarios + 1}/${totalScenarios}`
2872
2958
  };
2873
2959
  const skillNames = evalData.skills.map((s) => s.name).join(", ");
@@ -2875,7 +2961,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2875
2961
  "[Evaluator] Running scenario with skills group:",
2876
2962
  evalData.skillsGroupName,
2877
2963
  skillNames ? `(${skillNames})` : "",
2878
- codeAgent ? `with agent: ${codeAgent.name}` : "",
2964
+ agent ? `with agent: ${agent.name}` : "",
2879
2965
  `(${completedScenarios + 1}/${totalScenarios})`
2880
2966
  );
2881
2967
  try {