npm - @wix/evalforge-evaluator - Versions diffs - 0.105.0 → 0.107.0 - Mend

@wix/evalforge-evaluator 0.105.0 → 0.107.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +77 -38
package/build/index.js.map +3 -3
package/build/index.mjs +79 -38
package/build/index.mjs.map +3 -3
package/build/types/api-client.d.ts +2 -2
package/build/types/fetch-evaluation-data.d.ts +6 -7
package/build/types/run-scenario/agents/claude-code/write-sub-agents.d.ts +8 -4
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +1 -1
package/package.json +4 -4

package/build/index.js CHANGED Viewed

@@ -838,9 +838,6 @@ function createApiClient(serverUrl, options = "") {
     getEvalRun(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/eval-runs/${id}`);
     },
-    getSkillsGroup(projectId2, id) {
-      return fetchJson(`/projects/${projectId2}/skills-groups/${id}`);
-    },
     getScenario(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
     },
@@ -872,6 +869,9 @@ function createApiClient(serverUrl, options = "") {
     getRule(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/rules/${id}`);
     },
+    getPreset(projectId2, id) {
+      return fetchJson(`/projects/${projectId2}/presets/${id}`);
+    },
     getAssertion(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/assertions/${id}`);
     },
@@ -1074,17 +1074,14 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
     agent = await api.getAgent(projectId2, evalRun.agentId);
   }
   let skills = [];
-  let skillsGroup = null;
-  if (evalRun.skillsGroupId) {
-    skillsGroup = await api.getSkillsGroup(projectId2, evalRun.skillsGroupId);
-    if (skillsGroup.skillIds.length > 0) {
-      const fetchResults = await Promise.allSettled(
-        skillsGroup.skillIds.map((id) => api.getSkill(projectId2, id))
-      );
-      skills = fetchResults.filter(
-        (r) => r.status === "fulfilled"
-      ).map((r) => r.value).filter((s) => !s.deleted);
-    }
+  const resolvedSkillIds = evalRun.skillIds ?? [];
+  if (resolvedSkillIds.length > 0) {
+    const fetchResults = await Promise.allSettled(
+      resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
+    );
+    skills = fetchResults.filter(
+      (r) => r.status === "fulfilled"
+    ).map((r) => r.value).filter((s) => !s.deleted);
     if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
       skills = await Promise.all(
         skills.map(async (skill) => {
@@ -1169,13 +1166,22 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
       resolvedAssertions: resolvedAssertions.length > 0 ? resolvedAssertions : void 0
     };
   });
-  const skillsGroupName = skillsGroup?.name ?? "";
+  let presetName = "";
+  if (evalRun.presetId) {
+    try {
+      const preset = await api.getPreset(projectId2, evalRun.presetId);
+      presetName = preset.name;
+    } catch {
+      presetName = skills.length > 0 ? skills.map((s) => s.name).join(", ") : "";
+    }
+  } else if (skills.length > 0) {
+    presetName = skills.map((s) => s.name).join(", ");
+  }
   return {
     evalRun,
     agent,
     skills,
-    skillsGroup,
-    skillsGroupName,
+    presetName,
     mcps,
     subAgents,
     rules,
@@ -1499,6 +1505,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
 var import_promises5 = require("fs/promises");
 var import_path6 = require("path");
+var import_evalforge_github_client3 = require("@wix/evalforge-github-client");
 var AGENTS_DIR = ".claude/agents";
 function toAgentFilename(name26, index, nameCount) {
   const base = (name26 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -1506,7 +1513,34 @@ function toAgentFilename(name26, index, nameCount) {
   nameCount.set(base, count + 1);
   return count === 0 ? base : `${base}-${count + 1}`;
 }
-async function writeSubAgentsToFilesystem(cwd, subAgents) {
+async function resolveSubAgentContent(agent, fetchFn) {
+  if (agent.source) {
+    try {
+      const content = await fetchFn(agent.source, {
+        userAgent: "EvalForge-Evaluator"
+      });
+      console.log(
+        `[SubAgents] Fetched "${agent.name}" from ${agent.source.owner}/${agent.source.repo}/${agent.source.path}@${agent.source.ref}`
+      );
+      return content;
+    } catch (error48) {
+      const message = error48 instanceof Error ? error48.message : "Unknown error";
+      console.error(
+        `[SubAgents] "${agent.name}": GitHub fetch failed: ${message}`
+      );
+      throw new Error(
+        `Failed to fetch sub-agent "${agent.name}" from GitHub: ${message}`
+      );
+    }
+  }
+  if (!agent.subAgentMd) {
+    console.warn(
+      `[SubAgents] "${agent.name}" has empty inline content \u2013 the agent file will be blank`
+    );
+  }
+  return agent.subAgentMd;
+}
+async function writeSubAgentsToFilesystem(cwd, subAgents, fetchFn = import_evalforge_github_client3.fetchGitHubFile) {
   if (subAgents.length === 0) return;
   const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
   await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
@@ -1514,7 +1548,8 @@ async function writeSubAgentsToFilesystem(cwd, subAgents) {
   for (const [i, agent] of subAgents.entries()) {
     const filename = toAgentFilename(agent.name, i, nameCount);
     const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
-    await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
+    const content = await resolveSubAgentContent(agent, fetchFn);
+    await (0, import_promises5.writeFile)(filePath, content, "utf8");
   }
   console.log(`[SubAgents] Written to ${agentsDir}`);
 }
@@ -43710,18 +43745,20 @@ function extractTemplateFiles(before, after) {
 var import_evalforge_types7 = require("@wix/evalforge-types");
 var DEFAULT_AGENT_COMMAND = import_evalforge_types7.AgentRunCommand.CLAUDE;
 async function runAgentWithContext(config2, evalRunId2, scenario, evalData, workDir) {
-  const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const hasEntities = evalData.skills.length > 0 || evalData.mcps.length > 0 || evalData.subAgents.length > 0 || (evalData.rules?.length ?? 0) > 0;
+  if (!hasEntities) {
+    throw new Error(
+      `Eval run ${evalRunId2} has no entities configured: at least one skill, MCP, sub-agent, or rule is required.`
+    );
+  }
   const agent = evalData.agent ?? void 0;
   const isSDK = agent?.agentType === import_evalforge_types7.AgentType.SDK;
-  if (!skillsGroupId) {
-    throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
-  }
   const identifier = isSDK ? simpleAgentAdapter.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
   const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
   const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
-  const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
-  const targetName = evalData.skillsGroupName || agent?.name || "";
+  const targetId = evalData.evalRun.presetId ?? agent?.id ?? evalData.evalRun.id;
+  const targetName = evalData.presetName || agent?.name || "";
   const executionContext = {
     skills: evalData.skills,
     scenario,
@@ -43769,7 +43806,7 @@ async function runAgentWithContext(config2, evalRunId2, scenario, evalData, work
 // src/run-scenario/index.ts
 async function runScenario(config2, evalRunId2, scenario, evalData, template, resolvedAssertions) {
-  const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
+  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
   const workDir = await prepareWorkingDirectory(
     config2,
     evalRunId2,
@@ -44004,16 +44041,18 @@ async function runEvaluation(projectId2, evalRunId2) {
     skillCount: skills.length,
     hasAgent: !!agent,
     agentId: evalData.evalRun.agentId,
-    skillsGroupId: evalData.evalRun.skillsGroupId
+    presetId: evalData.evalRun.presetId,
+    skillIds: evalData.evalRun.skillIds
   };
-  if (scenarioItems.length > 0 && skills.length === 0) {
+  const hasEntities = skills.length > 0 || evalData.mcps.length > 0 || evalData.subAgents.length > 0 || evalData.rules.length > 0;
+  if (scenarioItems.length > 0 && !hasEntities) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no entities configured: at least one skill, MCP, sub-agent, or rule is required.`
     );
   }
-  if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
+  if (scenarioItems.length > 0 && hasEntities && !agent) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for evaluation runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
     );
   }
   let completedScenarios = 0;
@@ -44025,16 +44064,16 @@ async function runEvaluation(projectId2, evalRunId2) {
       evalRunId: evalRunId2,
       scenarioId: scenario.id,
       scenarioName: scenario.name,
-      skillsGroupId: evalData.evalRun.skillsGroupId,
-      skillsGroupName: evalData.skillsGroupName,
+      presetId: evalData.evalRun.presetId,
+      presetName: evalData.presetName,
       agentId: agent?.id,
       agentName: agent?.name,
       progress: `${completedScenarios + 1}/${totalScenarios}`
     };
     const skillNames = evalData.skills.map((s) => s.name).join(", ");
     console.log(
-      "[Evaluator] Running scenario with skills group:",
-      evalData.skillsGroupName,
+      "[Evaluator] Running scenario with preset:",
+      evalData.presetName,
       skillNames ? `(${skillNames})` : "",
       agent ? `with agent: ${agent.name}` : "",
       `(${completedScenarios + 1}/${totalScenarios})`
@@ -44060,8 +44099,8 @@ async function runEvaluation(projectId2, evalRunId2) {
       const errorMsg = err instanceof Error ? err.message : String(err);
       const errorStack = err instanceof Error ? err.stack : void 0;
       console.error(
-        "[Evaluator] Failed to run scenario with skills group:",
-        evalData.skillsGroupName,
+        "[Evaluator] Failed to run scenario with preset:",
+        evalData.presetName,
         "Error:",
         errorMsg
       );
@@ -44069,7 +44108,7 @@ async function runEvaluation(projectId2, evalRunId2) {
         console.error("[Evaluator] Stack trace:", errorStack);
       }
       throw new Error(
-        `[${state.currentPhase}] Failed to execute skills group "${evalData.skillsGroupName}" on scenario "${scenario.name}": ${errorMsg}`
+        `[${state.currentPhase}] Failed to execute preset "${evalData.presetName}" on scenario "${scenario.name}": ${errorMsg}`
       );
     }
   }