@wix/evalforge-evaluator 0.184.0 → 0.186.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -6821,21 +6821,37 @@ function createApiClient(serverUrl, options = "") {
6821
6821
  // The legacy REST endpoint enriched the capability with its latest version
6822
6822
  // server-side; ambassador's GetCapability returns the bare entity, so we
6823
6823
  // compose it with GetLatestCapabilityVersion in parallel here.
6824
+ //
6825
+ // The latest-version fetch is BEST-EFFORT: a failure must not drop the whole
6826
+ // capability. Otherwise one broken snapshot fetch makes the capability (e.g.
6827
+ // an MCP) silently vanish from the run. Runs that pin a version still resolve
6828
+ // their content via getCapabilityVersion downstream.
6824
6829
  async getCapability(projectId2, id) {
6825
- const [capRes, versionRes] = await Promise.all([
6830
+ const [capResult, versionResult] = await Promise.allSettled([
6826
6831
  httpClient.request(getCapability({ projectId: projectId2, capabilityId: id })),
6827
6832
  httpClient.request(
6828
6833
  getLatestCapabilityVersion({ projectId: projectId2, capabilityId: id })
6829
6834
  )
6830
6835
  ]);
6831
- const capability = capRes.data.capability;
6836
+ if (capResult.status === "rejected") {
6837
+ throw capResult.reason;
6838
+ }
6839
+ const capability = capResult.value.data.capability;
6832
6840
  if (!capability) {
6833
6841
  throw new Error(`Capability ${id} not found in project ${projectId2}`);
6834
6842
  }
6835
- const latestVersion = versionRes.data.capabilityVersion ? capabilityVersionFromProto(
6836
- versionRes.data.capabilityVersion,
6837
- projectId2
6838
- ) : void 0;
6843
+ let latestVersion;
6844
+ if (versionResult.status === "fulfilled" && versionResult.value.data.capabilityVersion) {
6845
+ latestVersion = capabilityVersionFromProto(
6846
+ versionResult.value.data.capabilityVersion,
6847
+ projectId2
6848
+ );
6849
+ } else if (versionResult.status === "rejected") {
6850
+ const reason = versionResult.reason instanceof Error ? versionResult.reason.message : String(versionResult.reason);
6851
+ console.warn(
6852
+ `[Capabilities] getLatestCapabilityVersion(${id}) failed; loading capability without a snapshot (pinned versions still resolve): ${reason}`
6853
+ );
6854
+ }
6839
6855
  return { ...capabilityFromProto(capability), latestVersion };
6840
6856
  },
6841
6857
  async getCapabilityVersion(projectId2, capabilityId, versionId) {
@@ -7487,117 +7503,42 @@ import {
7487
7503
  // src/run-scenario/agents/claude-code/write-skills.ts
7488
7504
  import { mkdir as mkdir3 } from "fs/promises";
7489
7505
  import { join } from "path";
7490
-
7491
- // src/run-scenario/agents/shared/resolve-capability-content.ts
7492
- import {
7493
- fetchGitHubFile as fetchGitHubFile2,
7494
- fetchGitHubFolder as fetchGitHubFolder2
7495
- } from "@wix/evalforge-github-client";
7496
- import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
7497
- var USER_AGENT = "EvalForge-Evaluator";
7498
- async function resolveSkillFiles(skill, fetchFn = fetchGitHubFolder2) {
7499
- const version = skill.latestVersion;
7500
- if (version?.files && version.files.length > 0) {
7501
- console.log(
7502
- `[Skill] ${skill.name}: using ${version.files.length} file(s) from snapshot`
7503
- );
7504
- return version.files;
7505
- }
7506
- if (skill.source) {
7507
- const files = await fetchFn(skill.source, { userAgent: USER_AGENT });
7508
- console.log(
7509
- `[Skill] ${skill.name}: fetched ${files.length} file(s) from GitHub (live)`
7510
- );
7511
- return files;
7512
- }
7513
- throw new Error(`Skill ${skill.name} has no files and no source configured`);
7514
- }
7515
- async function fetchSourceFile(label, noun, name, source, fetchFn) {
7516
- try {
7517
- const content = await fetchFn(source, { userAgent: USER_AGENT });
7518
- console.log(
7519
- `[${label}] Fetched "${name}" from ${source.owner}/${source.repo}/${source.path}@${source.ref}`
7520
- );
7521
- return content;
7522
- } catch (error) {
7523
- const message = error instanceof Error ? error.message : "Unknown error";
7524
- console.error(`[${label}] "${name}": GitHub fetch failed: ${message}`);
7525
- throw new Error(
7526
- `Failed to fetch ${noun} "${name}" from GitHub: ${message}`
7527
- );
7528
- }
7529
- }
7530
- async function resolveSubAgentMd(agent, fetchFn = fetchGitHubFile2) {
7531
- if (agent.source) {
7532
- return fetchSourceFile(
7533
- "SubAgents",
7534
- "sub-agent",
7535
- agent.name,
7536
- agent.source,
7537
- fetchFn
7538
- );
7539
- }
7540
- if (!agent.subAgentMd) {
7541
- console.warn(
7542
- `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
7543
- );
7544
- }
7545
- return agent.subAgentMd;
7546
- }
7547
- async function resolveRuleText(rule, fetchFn = fetchGitHubFile2) {
7548
- if (!rule.source) {
7549
- return rule.content;
7550
- }
7551
- return fetchSourceFile("Rules", "rule", rule.name, rule.source, fetchFn);
7552
- }
7553
- async function resolveMcpConfig(mcp, fetchFn = fetchGitHubFile2) {
7554
- if (!mcp.source) {
7555
- return mcp.config;
7556
- }
7557
- const raw = await fetchSourceFile(
7558
- "MCP",
7559
- "MCP",
7560
- mcp.name,
7561
- mcp.source,
7562
- fetchFn
7563
- );
7564
- let parsed;
7565
- try {
7566
- parsed = JSON.parse(raw);
7567
- } catch (error) {
7568
- const message = error instanceof Error ? error.message : "Unknown error";
7569
- throw new Error(
7570
- `MCP "${mcp.name}" GitHub source is not valid JSON: ${message}`
7571
- );
7572
- }
7573
- if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
7574
- throw new Error(`MCP "${mcp.name}" GitHub source must be a JSON object`);
7575
- }
7576
- const obj = parsed;
7577
- const servers = obj[MCP_SERVERS_JSON_KEY];
7578
- if (typeof servers === "object" && servers !== null && !Array.isArray(servers)) {
7579
- return servers;
7580
- }
7581
- return obj;
7582
- }
7583
-
7584
- // src/run-scenario/agents/claude-code/write-skills.ts
7585
- async function writeSkillsToFilesystem(cwd, skills, fetchFn) {
7506
+ import { fetchGitHubFolder as fetchGitHubFolder2 } from "@wix/evalforge-github-client";
7507
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchGitHubFolder2) {
7586
7508
  await Promise.all(
7587
7509
  skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
7588
7510
  );
7589
7511
  }
7590
- async function writeSkillToFilesystem(cwd, skill, fetchFn) {
7591
- const skillDir = join(cwd, ".claude", "skills", skill.name);
7512
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2) {
7513
+ const skillName = skill.name;
7514
+ const skillDir = join(cwd, ".claude", "skills", skillName);
7592
7515
  await mkdir3(skillDir, { recursive: true });
7593
- try {
7594
- const files = await resolveSkillFiles(skill, fetchFn);
7595
- await writeFilesToDirectory(skillDir, files);
7596
- } catch (error) {
7597
- const message = error instanceof Error ? error.message : "Unknown error";
7598
- throw new Error(
7599
- `Failed to write skill ${skill.name} to filesystem: ${message}`
7516
+ const version = skill.latestVersion;
7517
+ if (version?.files && version.files.length > 0) {
7518
+ await writeFilesToDirectory(skillDir, version.files);
7519
+ console.log(
7520
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
7600
7521
  );
7522
+ } else if (skill.source) {
7523
+ try {
7524
+ const files = await fetchFn(skill.source, {
7525
+ userAgent: "EvalForge-Evaluator"
7526
+ });
7527
+ await writeFilesToDirectory(skillDir, files);
7528
+ console.log(
7529
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
7530
+ );
7531
+ } catch (error) {
7532
+ const message = error instanceof Error ? error.message : "Unknown error";
7533
+ console.error(
7534
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
7535
+ );
7536
+ throw new Error(
7537
+ `Failed to write skill ${skillName} to filesystem: ${message}`
7538
+ );
7539
+ }
7540
+ } else {
7541
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
7601
7542
  }
7602
7543
  }
7603
7544
 
@@ -7615,7 +7556,7 @@ import { randomUUID } from "crypto";
7615
7556
  // src/run-scenario/agents/claude-code/write-mcp.ts
7616
7557
  import { writeFile as writeFile3 } from "fs/promises";
7617
7558
  import { join as join3 } from "path";
7618
- import { MCP_SERVERS_JSON_KEY as MCP_SERVERS_JSON_KEY2 } from "@wix/evalforge-types";
7559
+ import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
7619
7560
 
7620
7561
  // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
7621
7562
  import { readFile } from "fs/promises";
@@ -7660,11 +7601,11 @@ async function resolveMcpPlaceholders(mcpServers, options = {}) {
7660
7601
  }
7661
7602
 
7662
7603
  // src/run-scenario/agents/claude-code/write-mcp.ts
7663
- async function writeMcpToFilesystem(cwd, mcps, fetchFn) {
7604
+ async function writeMcpToFilesystem(cwd, mcps) {
7664
7605
  if (mcps.length === 0) return;
7665
7606
  const mcpServers = {};
7666
7607
  for (const mcp of mcps) {
7667
- const config = await resolveMcpConfig(mcp, fetchFn);
7608
+ const config = mcp.config;
7668
7609
  for (const [key, value] of Object.entries(config)) {
7669
7610
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
7670
7611
  throw new Error(
@@ -7676,7 +7617,7 @@ async function writeMcpToFilesystem(cwd, mcps, fetchFn) {
7676
7617
  }
7677
7618
  const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
7678
7619
  const content = JSON.stringify(
7679
- { [MCP_SERVERS_JSON_KEY2]: resolvedServers },
7620
+ { [MCP_SERVERS_JSON_KEY]: resolvedServers },
7680
7621
  null,
7681
7622
  2
7682
7623
  );
@@ -7688,6 +7629,9 @@ async function writeMcpToFilesystem(cwd, mcps, fetchFn) {
7688
7629
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
7689
7630
  import { mkdir as mkdir4, writeFile as writeFile4 } from "fs/promises";
7690
7631
  import { join as join4 } from "path";
7632
+ import {
7633
+ fetchGitHubFile as fetchGitHubFile2
7634
+ } from "@wix/evalforge-github-client";
7691
7635
  var AGENTS_DIR = ".claude/agents";
7692
7636
  function toAgentFilename(name, index, nameCount) {
7693
7637
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -7695,7 +7639,34 @@ function toAgentFilename(name, index, nameCount) {
7695
7639
  nameCount.set(base, count + 1);
7696
7640
  return count === 0 ? base : `${base}-${count + 1}`;
7697
7641
  }
7698
- async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn) {
7642
+ async function resolveSubAgentContent(agent, fetchFn) {
7643
+ if (agent.source) {
7644
+ try {
7645
+ const content = await fetchFn(agent.source, {
7646
+ userAgent: "EvalForge-Evaluator"
7647
+ });
7648
+ console.log(
7649
+ `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
7650
+ );
7651
+ return content;
7652
+ } catch (error) {
7653
+ const message = error instanceof Error ? error.message : "Unknown error";
7654
+ console.error(
7655
+ `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
7656
+ );
7657
+ throw new Error(
7658
+ `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
7659
+ );
7660
+ }
7661
+ }
7662
+ if (!agent.subAgentMd) {
7663
+ console.warn(
7664
+ `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
7665
+ );
7666
+ }
7667
+ return agent.subAgentMd;
7668
+ }
7669
+ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = fetchGitHubFile2) {
7699
7670
  if (subAgents.length === 0) return;
7700
7671
  const agentsDir = join4(cwd, AGENTS_DIR);
7701
7672
  await mkdir4(agentsDir, { recursive: true });
@@ -7703,7 +7674,7 @@ async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn) {
7703
7674
  for (const [i, agent] of subAgents.entries()) {
7704
7675
  const filename = toAgentFilename(agent.name, i, nameCount);
7705
7676
  const filePath = join4(agentsDir, `${filename}.md`);
7706
- const content = await resolveSubAgentMd(agent, fetchFn);
7677
+ const content = await resolveSubAgentContent(agent, fetchFn);
7707
7678
  await writeFile4(filePath, content, "utf8");
7708
7679
  }
7709
7680
  console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -7753,19 +7724,18 @@ function validateGenericDirectory(dir, cwd) {
7753
7724
  }
7754
7725
  return trimmed;
7755
7726
  }
7756
- async function writeRulesToFilesystem(cwd, rules, fetchFn) {
7727
+ async function writeRulesToFilesystem(cwd, rules) {
7757
7728
  if (rules.length === 0) return;
7758
7729
  const nameCount = /* @__PURE__ */ new Map();
7759
7730
  let hasCursorRules = false;
7760
7731
  for (const [i, rule] of rules.entries()) {
7761
- const content = await resolveRuleText(rule, fetchFn);
7762
7732
  switch (rule.ruleType) {
7763
7733
  case "claude-md": {
7764
- await appendToFile(join5(cwd, "CLAUDE.md"), content);
7734
+ await appendToFile(join5(cwd, "CLAUDE.md"), rule.content);
7765
7735
  break;
7766
7736
  }
7767
7737
  case "agents-md": {
7768
- await appendToFile(join5(cwd, "AGENTS.md"), content);
7738
+ await appendToFile(join5(cwd, "AGENTS.md"), rule.content);
7769
7739
  break;
7770
7740
  }
7771
7741
  case "cursor-rule": {
@@ -7775,7 +7745,7 @@ async function writeRulesToFilesystem(cwd, rules, fetchFn) {
7775
7745
  }
7776
7746
  const filename = toRuleFilename(rule.name, i, nameCount);
7777
7747
  const filePath = join5(cwd, CURSOR_RULES_DIR, `${filename}.md`);
7778
- await writeFile5(filePath, content, "utf8");
7748
+ await writeFile5(filePath, rule.content, "utf8");
7779
7749
  break;
7780
7750
  }
7781
7751
  case "generic": {
@@ -7786,7 +7756,7 @@ async function writeRulesToFilesystem(cwd, rules, fetchFn) {
7786
7756
  const dirPath = join5(cwd, directory);
7787
7757
  await mkdir5(dirPath, { recursive: true });
7788
7758
  const filename = toRuleFilename(rule.name, i, nameCount);
7789
- await writeFile5(join5(dirPath, `${filename}.md`), content, "utf8");
7759
+ await writeFile5(join5(dirPath, `${filename}.md`), rule.content, "utf8");
7790
7760
  break;
7791
7761
  }
7792
7762
  default: {
@@ -9054,28 +9024,51 @@ function tryParseJson(text) {
9054
9024
  // src/run-scenario/agents/opencode/write-skills.ts
9055
9025
  import { mkdir as mkdir6 } from "fs/promises";
9056
9026
  import { join as join6 } from "path";
9057
- async function writeSkillsToFilesystem2(cwd, skills, fetchFn) {
9027
+ import { fetchGitHubFolder as fetchGitHubFolder3 } from "@wix/evalforge-github-client";
9028
+ async function writeSkillsToFilesystem2(cwd, skills, fetchFn = fetchGitHubFolder3) {
9058
9029
  await Promise.all(
9059
9030
  skills.map((skill) => writeSkillToFilesystem2(cwd, skill, fetchFn))
9060
9031
  );
9061
9032
  }
9062
9033
  async function writeSkillToFilesystem2(cwd, skill, fetchFn) {
9063
- const skillDir = join6(cwd, ".opencode", "skills", skill.name);
9034
+ const skillName = skill.name;
9035
+ const skillDir = join6(cwd, ".opencode", "skills", skillName);
9064
9036
  await mkdir6(skillDir, { recursive: true });
9065
- try {
9066
- const files = await resolveSkillFiles(skill, fetchFn);
9067
- await writeFilesToDirectory(skillDir, files);
9068
- } catch (error) {
9069
- const message = error instanceof Error ? error.message : "Unknown error";
9070
- throw new Error(
9071
- `Failed to write skill ${skill.name} to filesystem: ${message}`
9037
+ const version = skill.latestVersion;
9038
+ if (version?.files && version.files.length > 0) {
9039
+ await writeFilesToDirectory(skillDir, version.files);
9040
+ console.log(
9041
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
9072
9042
  );
9043
+ } else if (skill.source) {
9044
+ try {
9045
+ const files = await fetchFn(skill.source, {
9046
+ userAgent: "EvalForge-Evaluator"
9047
+ });
9048
+ await writeFilesToDirectory(skillDir, files);
9049
+ console.log(
9050
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
9051
+ );
9052
+ } catch (error) {
9053
+ const message = error instanceof Error ? error.message : "Unknown error";
9054
+ console.error(
9055
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
9056
+ );
9057
+ throw new Error(
9058
+ `Failed to write skill ${skillName} to filesystem: ${message}`
9059
+ );
9060
+ }
9061
+ } else {
9062
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
9073
9063
  }
9074
9064
  }
9075
9065
 
9076
9066
  // src/run-scenario/agents/opencode/write-sub-agents.ts
9077
9067
  import { mkdir as mkdir7, writeFile as writeFile6 } from "fs/promises";
9078
9068
  import { join as join7 } from "path";
9069
+ import {
9070
+ fetchGitHubFile as fetchGitHubFile3
9071
+ } from "@wix/evalforge-github-client";
9079
9072
  var AGENTS_DIR2 = ".opencode/agents";
9080
9073
  function toAgentFilename2(name, index, nameCount) {
9081
9074
  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -9083,7 +9076,34 @@ function toAgentFilename2(name, index, nameCount) {
9083
9076
  nameCount.set(base, count + 1);
9084
9077
  return count === 0 ? base : `${base}-${count + 1}`;
9085
9078
  }
9086
- async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn) {
9079
+ async function resolveSubAgentContent2(agent, fetchFn) {
9080
+ if (agent.source) {
9081
+ try {
9082
+ const content = await fetchFn(agent.source, {
9083
+ userAgent: "EvalForge-Evaluator"
9084
+ });
9085
+ console.log(
9086
+ `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
9087
+ );
9088
+ return content;
9089
+ } catch (error) {
9090
+ const message = error instanceof Error ? error.message : "Unknown error";
9091
+ console.error(
9092
+ `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
9093
+ );
9094
+ throw new Error(
9095
+ `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
9096
+ );
9097
+ }
9098
+ }
9099
+ if (!agent.subAgentMd) {
9100
+ console.warn(
9101
+ `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
9102
+ );
9103
+ }
9104
+ return agent.subAgentMd;
9105
+ }
9106
+ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = fetchGitHubFile3) {
9087
9107
  if (subAgents.length === 0) return;
9088
9108
  const agentsDir = join7(cwd, AGENTS_DIR2);
9089
9109
  await mkdir7(agentsDir, { recursive: true });
@@ -9091,7 +9111,7 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn) {
9091
9111
  for (const [i, agent] of subAgents.entries()) {
9092
9112
  const filename = toAgentFilename2(agent.name, i, nameCount);
9093
9113
  const filePath = join7(agentsDir, `${filename}.md`);
9094
- const content = await resolveSubAgentMd(agent, fetchFn);
9114
+ const content = await resolveSubAgentContent2(agent, fetchFn);
9095
9115
  await writeFile6(filePath, content, "utf8");
9096
9116
  }
9097
9117
  console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -9193,7 +9213,7 @@ async function buildOpenCodeEnv(options) {
9193
9213
  if (options.mcps && options.mcps.length > 0) {
9194
9214
  const mcpServers = {};
9195
9215
  for (const mcpEntity of options.mcps) {
9196
- const entityConfig = await resolveMcpConfig(mcpEntity);
9216
+ const entityConfig = mcpEntity.config;
9197
9217
  for (const [key, value] of Object.entries(entityConfig)) {
9198
9218
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
9199
9219
  throw new Error(