@wix/evalforge-evaluator 0.182.0 → 0.183.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -7487,42 +7487,117 @@ import {
7487
7487
  // src/run-scenario/agents/claude-code/write-skills.ts
7488
7488
  import { mkdir as mkdir3 } from "fs/promises";
7489
7489
  import { join } from "path";
7490
- import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
7491
- async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder2) {
7490
+
7491
+ // src/run-scenario/agents/shared/resolve-capability-content.ts
7492
+ import {
7493
+ fetchGitHubFile as fetchGitHubFile2,
7494
+ fetchGitHubFolder as fetchGitHubFolder2
7495
+ } from "@wix/evalforge-github-client";
7496
+ import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
7497
+ var USER_AGENT = "EvalForge-Evaluator";
7498
+ async function resolveSkillFiles(skill, fetchFn = fetchGitHubFolder2) {
7499
+ const version = skill.latestVersion;
7500
+ if (version?.files && version.files.length > 0) {
7501
+ console.log(
7502
+ `[Skill] ${skill.name}: using ${version.files.length} file(s) from snapshot`
7503
+ );
7504
+ return version.files;
7505
+ }
7506
+ if (skill.source) {
7507
+ const files = await fetchFn(skill.source, { userAgent: USER_AGENT });
7508
+ console.log(
7509
+ `[Skill] ${skill.name}: fetched ${files.length} file(s) from GitHub (live)`
7510
+ );
7511
+ return files;
7512
+ }
7513
+ throw new Error(`Skill ${skill.name} has no files and no source configured`);
7514
+ }
7515
+ async function fetchSourceFile(label, noun, name, source, fetchFn) {
7516
+ try {
7517
+ const content = await fetchFn(source, { userAgent: USER_AGENT });
7518
+ console.log(
7519
+ `[${label}] Fetched "${name}" from ${source.owner}/${source.repo}/${source.path}@${source.ref}`
7520
+ );
7521
+ return content;
7522
+ } catch (error) {
7523
+ const message = error instanceof Error ? error.message : "Unknown error";
7524
+ console.error(`[${label}] "${name}": GitHub fetch failed: ${message}`);
7525
+ throw new Error(
7526
+ `Failed to fetch ${noun} "${name}" from GitHub: ${message}`
7527
+ );
7528
+ }
7529
+ }
7530
+ async function resolveSubAgentMd(agent, fetchFn = fetchGitHubFile2) {
7531
+ if (agent.source) {
7532
+ return fetchSourceFile(
7533
+ "SubAgents",
7534
+ "sub-agent",
7535
+ agent.name,
7536
+ agent.source,
7537
+ fetchFn
7538
+ );
7539
+ }
7540
+ if (!agent.subAgentMd) {
7541
+ console.warn(
7542
+ `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
7543
+ );
7544
+ }
7545
+ return agent.subAgentMd;
7546
+ }
7547
+ async function resolveRuleText(rule, fetchFn = fetchGitHubFile2) {
7548
+ if (!rule.source) {
7549
+ return rule.content;
7550
+ }
7551
+ return fetchSourceFile("Rules", "rule", rule.name, rule.source, fetchFn);
7552
+ }
7553
+ async function resolveMcpConfig(mcp, fetchFn = fetchGitHubFile2) {
7554
+ if (!mcp.source) {
7555
+ return mcp.config;
7556
+ }
7557
+ const raw = await fetchSourceFile(
7558
+ "MCP",
7559
+ "MCP",
7560
+ mcp.name,
7561
+ mcp.source,
7562
+ fetchFn
7563
+ );
7564
+ let parsed;
7565
+ try {
7566
+ parsed = JSON.parse(raw);
7567
+ } catch (error) {
7568
+ const message = error instanceof Error ? error.message : "Unknown error";
7569
+ throw new Error(
7570
+ `MCP "${mcp.name}" GitHub source is not valid JSON: ${message}`
7571
+ );
7572
+ }
7573
+ if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
7574
+ throw new Error(`MCP "${mcp.name}" GitHub source must be a JSON object`);
7575
+ }
7576
+ const obj = parsed;
7577
+ const servers = obj[MCP_SERVERS_JSON_KEY];
7578
+ if (typeof servers === "object" && servers !== null && !Array.isArray(servers)) {
7579
+ return servers;
7580
+ }
7581
+ return obj;
7582
+ }
7583
+
7584
+ // src/run-scenario/agents/claude-code/write-skills.ts
7585
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn) {
7492
7586
  await Promise.all(
7493
7587
  skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
7494
7588
  );
7495
7589
  }
7496
- async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2) {
7497
- const skillName = skill.name;
7498
- const skillDir = join(cwd, ".claude", "skills", skillName);
7590
+ async function writeSkillToFilesystem(cwd, skill, fetchFn) {
7591
+ const skillDir = join(cwd, ".claude", "skills", skill.name);
7499
7592
  await mkdir3(skillDir, { recursive: true });
7500
- const version = skill.latestVersion;
7501
- if (version?.files && version.files.length > 0) {
7502
- await writeFilesToDirectory(skillDir, version.files);
7503
- console.log(
7504
- `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
7593
+ try {
7594
+ const files = await resolveSkillFiles(skill, fetchFn);
7595
+ await writeFilesToDirectory(skillDir, files);
7596
+ } catch (error) {
7597
+ const message = error instanceof Error ? error.message : "Unknown error";
7598
+ throw new Error(
7599
+ `Failed to write skill ${skill.name} to filesystem: ${message}`
7505
7600
  );
7506
- } else if (skill.source) {
7507
- try {
7508
- const files = await fetchFn(skill.source, {
7509
- userAgent: "EvalForge-Evaluator"
7510
- });
7511
- await writeFilesToDirectory(skillDir, files);
7512
- console.log(
7513
- `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
7514
- );
7515
- } catch (error) {
7516
- const message = error instanceof Error ? error.message : "Unknown error";
7517
- console.error(
7518
- `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
7519
- );
7520
- throw new Error(
7521
- `Failed to write skill ${skillName} to filesystem: ${message}`
7522
- );
7523
- }
7524
- } else {
7525
- throw new Error(`Skill ${skillName} has no files and no source configured`);
7526
7601
  }
7527
7602
  }
7528
7603
 
@@ -7540,7 +7615,7 @@ import { randomUUID } from "crypto";
7540
7615
  // src/run-scenario/agents/claude-code/write-mcp.ts
7541
7616
  import { writeFile as writeFile3 } from "fs/promises";
7542
7617
  import { join as join3 } from "path";
7543
- import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
7618
+ import { MCP_SERVERS_JSON_KEY as MCP_SERVERS_JSON_KEY2 } from "@wix/evalforge-types";
7544
7619
 
7545
7620
  // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
7546
7621
  import { readFile } from "fs/promises";
@@ -7585,11 +7660,11 @@ async function resolveMcpPlaceholders(mcpServers, options = {}) {
7585
7660
  }
7586
7661
 
7587
7662
  // src/run-scenario/agents/claude-code/write-mcp.ts
7588
- async function writeMcpToFilesystem(cwd, mcps) {
7663
+ async function writeMcpToFilesystem(cwd, mcps, fetchFn) {
7589
7664
  if (mcps.length === 0) return;
7590
7665
  const mcpServers = {};
7591
7666
  for (const mcp of mcps) {
7592
- const config = mcp.config;
7667
+ const config = await resolveMcpConfig(mcp, fetchFn);
7593
7668
  for (const [key, value] of Object.entries(config)) {
7594
7669
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
7595
7670
  throw new Error(
@@ -7601,7 +7676,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
7601
7676
  }
7602
7677
  const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
7603
7678
  const content = JSON.stringify(
7604
- { [MCP_SERVERS_JSON_KEY]: resolvedServers },
7679
+ { [MCP_SERVERS_JSON_KEY2]: resolvedServers },
7605
7680
  null,
7606
7681
  2
7607
7682
  );
@@ -7613,9 +7688,6 @@ async function writeMcpToFilesystem(cwd, mcps) {
7613
7688
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
7614
7689
  import { mkdir as mkdir4, writeFile as writeFile4 } from "fs/promises";
7615
7690
  import { join as join4 } from "path";
7616
- import {
7617
- fetchGitHubFile as fetchGitHubFile2
7618
- } from "@wix/evalforge-github-client";
7619
7691
  var AGENTS_DIR = ".claude/agents";
7620
7692
  function toAgentFilename(name, index, nameCount) {
7621
7693
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -7623,34 +7695,7 @@ function toAgentFilename(name, index, nameCount) {
7623
7695
  nameCount.set(base, count + 1);
7624
7696
  return count === 0 ? base : `${base}-${count + 1}`;
7625
7697
  }
7626
- async function resolveSubAgentContent(agent, fetchFn) {
7627
- if (agent.source) {
7628
- try {
7629
- const content = await fetchFn(agent.source, {
7630
- userAgent: "EvalForge-Evaluator"
7631
- });
7632
- console.log(
7633
- `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
7634
- );
7635
- return content;
7636
- } catch (error) {
7637
- const message = error instanceof Error ? error.message : "Unknown error";
7638
- console.error(
7639
- `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
7640
- );
7641
- throw new Error(
7642
- `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
7643
- );
7644
- }
7645
- }
7646
- if (!agent.subAgentMd) {
7647
- console.warn(
7648
- `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
7649
- );
7650
- }
7651
- return agent.subAgentMd;
7652
- }
7653
- async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = fetchGitHubFile2) {
7698
+ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn) {
7654
7699
  if (subAgents.length === 0) return;
7655
7700
  const agentsDir = join4(cwd, AGENTS_DIR);
7656
7701
  await mkdir4(agentsDir, { recursive: true });
@@ -7658,7 +7703,7 @@ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = fetchGitHubF
7658
7703
  for (const [i, agent] of subAgents.entries()) {
7659
7704
  const filename = toAgentFilename(agent.name, i, nameCount);
7660
7705
  const filePath = join4(agentsDir, `${filename}.md`);
7661
- const content = await resolveSubAgentContent(agent, fetchFn);
7706
+ const content = await resolveSubAgentMd(agent, fetchFn);
7662
7707
  await writeFile4(filePath, content, "utf8");
7663
7708
  }
7664
7709
  console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -7708,18 +7753,19 @@ function validateGenericDirectory(dir, cwd) {
7708
7753
  }
7709
7754
  return trimmed;
7710
7755
  }
7711
- async function writeRulesToFilesystem(cwd, rules) {
7756
+ async function writeRulesToFilesystem(cwd, rules, fetchFn) {
7712
7757
  if (rules.length === 0) return;
7713
7758
  const nameCount = /* @__PURE__ */ new Map();
7714
7759
  let hasCursorRules = false;
7715
7760
  for (const [i, rule] of rules.entries()) {
7761
+ const content = await resolveRuleText(rule, fetchFn);
7716
7762
  switch (rule.ruleType) {
7717
7763
  case "claude-md": {
7718
- await appendToFile(join5(cwd, "CLAUDE.md"), rule.content);
7764
+ await appendToFile(join5(cwd, "CLAUDE.md"), content);
7719
7765
  break;
7720
7766
  }
7721
7767
  case "agents-md": {
7722
- await appendToFile(join5(cwd, "AGENTS.md"), rule.content);
7768
+ await appendToFile(join5(cwd, "AGENTS.md"), content);
7723
7769
  break;
7724
7770
  }
7725
7771
  case "cursor-rule": {
@@ -7729,7 +7775,7 @@ async function writeRulesToFilesystem(cwd, rules) {
7729
7775
  }
7730
7776
  const filename = toRuleFilename(rule.name, i, nameCount);
7731
7777
  const filePath = join5(cwd, CURSOR_RULES_DIR, `${filename}.md`);
7732
- await writeFile5(filePath, rule.content, "utf8");
7778
+ await writeFile5(filePath, content, "utf8");
7733
7779
  break;
7734
7780
  }
7735
7781
  case "generic": {
@@ -7740,7 +7786,7 @@ async function writeRulesToFilesystem(cwd, rules) {
7740
7786
  const dirPath = join5(cwd, directory);
7741
7787
  await mkdir5(dirPath, { recursive: true });
7742
7788
  const filename = toRuleFilename(rule.name, i, nameCount);
7743
- await writeFile5(join5(dirPath, `${filename}.md`), rule.content, "utf8");
7789
+ await writeFile5(join5(dirPath, `${filename}.md`), content, "utf8");
7744
7790
  break;
7745
7791
  }
7746
7792
  default: {
@@ -9003,51 +9049,28 @@ function tryParseJson(text) {
9003
9049
  // src/run-scenario/agents/opencode/write-skills.ts
9004
9050
  import { mkdir as mkdir6 } from "fs/promises";
9005
9051
  import { join as join6 } from "path";
9006
- import { fetchGitHubFolder as fetchGitHubFolder3 } from "@wix/evalforge-github-client";
9007
- async function writeSkillsToFilesystem2(cwd, skills, fetchFn = fetchGitHubFolder3) {
9052
+ async function writeSkillsToFilesystem2(cwd, skills, fetchFn) {
9008
9053
  await Promise.all(
9009
9054
  skills.map((skill) => writeSkillToFilesystem2(cwd, skill, fetchFn))
9010
9055
  );
9011
9056
  }
9012
9057
  async function writeSkillToFilesystem2(cwd, skill, fetchFn) {
9013
- const skillName = skill.name;
9014
- const skillDir = join6(cwd, ".opencode", "skills", skillName);
9058
+ const skillDir = join6(cwd, ".opencode", "skills", skill.name);
9015
9059
  await mkdir6(skillDir, { recursive: true });
9016
- const version = skill.latestVersion;
9017
- if (version?.files && version.files.length > 0) {
9018
- await writeFilesToDirectory(skillDir, version.files);
9019
- console.log(
9020
- `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
9060
+ try {
9061
+ const files = await resolveSkillFiles(skill, fetchFn);
9062
+ await writeFilesToDirectory(skillDir, files);
9063
+ } catch (error) {
9064
+ const message = error instanceof Error ? error.message : "Unknown error";
9065
+ throw new Error(
9066
+ `Failed to write skill ${skill.name} to filesystem: ${message}`
9021
9067
  );
9022
- } else if (skill.source) {
9023
- try {
9024
- const files = await fetchFn(skill.source, {
9025
- userAgent: "EvalForge-Evaluator"
9026
- });
9027
- await writeFilesToDirectory(skillDir, files);
9028
- console.log(
9029
- `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
9030
- );
9031
- } catch (error) {
9032
- const message = error instanceof Error ? error.message : "Unknown error";
9033
- console.error(
9034
- `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
9035
- );
9036
- throw new Error(
9037
- `Failed to write skill ${skillName} to filesystem: ${message}`
9038
- );
9039
- }
9040
- } else {
9041
- throw new Error(`Skill ${skillName} has no files and no source configured`);
9042
9068
  }
9043
9069
  }
9044
9070
 
9045
9071
  // src/run-scenario/agents/opencode/write-sub-agents.ts
9046
9072
  import { mkdir as mkdir7, writeFile as writeFile6 } from "fs/promises";
9047
9073
  import { join as join7 } from "path";
9048
- import {
9049
- fetchGitHubFile as fetchGitHubFile3
9050
- } from "@wix/evalforge-github-client";
9051
9074
  var AGENTS_DIR2 = ".opencode/agents";
9052
9075
  function toAgentFilename2(name, index, nameCount) {
9053
9076
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -9055,34 +9078,7 @@ function toAgentFilename2(name, index, nameCount) {
9055
9078
  nameCount.set(base, count + 1);
9056
9079
  return count === 0 ? base : `${base}-${count + 1}`;
9057
9080
  }
9058
- async function resolveSubAgentContent2(agent, fetchFn) {
9059
- if (agent.source) {
9060
- try {
9061
- const content = await fetchFn(agent.source, {
9062
- userAgent: "EvalForge-Evaluator"
9063
- });
9064
- console.log(
9065
- `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
9066
- );
9067
- return content;
9068
- } catch (error) {
9069
- const message = error instanceof Error ? error.message : "Unknown error";
9070
- console.error(
9071
- `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
9072
- );
9073
- throw new Error(
9074
- `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
9075
- );
9076
- }
9077
- }
9078
- if (!agent.subAgentMd) {
9079
- console.warn(
9080
- `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
9081
- );
9082
- }
9083
- return agent.subAgentMd;
9084
- }
9085
- async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = fetchGitHubFile3) {
9081
+ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn) {
9086
9082
  if (subAgents.length === 0) return;
9087
9083
  const agentsDir = join7(cwd, AGENTS_DIR2);
9088
9084
  await mkdir7(agentsDir, { recursive: true });
@@ -9090,7 +9086,7 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = fetchGitHub
9090
9086
  for (const [i, agent] of subAgents.entries()) {
9091
9087
  const filename = toAgentFilename2(agent.name, i, nameCount);
9092
9088
  const filePath = join7(agentsDir, `${filename}.md`);
9093
- const content = await resolveSubAgentContent2(agent, fetchFn);
9089
+ const content = await resolveSubAgentMd(agent, fetchFn);
9094
9090
  await writeFile6(filePath, content, "utf8");
9095
9091
  }
9096
9092
  console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -9192,7 +9188,7 @@ async function buildOpenCodeEnv(options) {
9192
9188
  if (options.mcps && options.mcps.length > 0) {
9193
9189
  const mcpServers = {};
9194
9190
  for (const mcpEntity of options.mcps) {
9195
- const entityConfig = mcpEntity.config;
9191
+ const entityConfig = await resolveMcpConfig(mcpEntity);
9196
9192
  for (const [key, value] of Object.entries(entityConfig)) {
9197
9193
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
9198
9194
  throw new Error(