npm - @wix/evalforge-evaluator - Versions diffs - 0.104.0 → 0.106.0 - Mend

@wix/evalforge-evaluator 0.104.0 → 0.106.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/index.js +47 -37
package/build/index.js.map +2 -2
package/build/index.mjs +47 -37
package/build/index.mjs.map +2 -2
package/build/types/api-client.d.ts +2 -2
package/build/types/fetch-evaluation-data.d.ts +6 -7
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +1 -1
package/package.json +4 -4

package/build/index.js CHANGED Viewed

@@ -838,9 +838,6 @@ function createApiClient(serverUrl, options = "") {
     getEvalRun(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/eval-runs/${id}`);
     },
-    getSkillsGroup(projectId2, id) {
-      return fetchJson(`/projects/${projectId2}/skills-groups/${id}`);
-    },
     getScenario(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
     },
@@ -872,6 +869,9 @@ function createApiClient(serverUrl, options = "") {
     getRule(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/rules/${id}`);
     },
+    getPreset(projectId2, id) {
+      return fetchJson(`/projects/${projectId2}/presets/${id}`);
+    },
     getAssertion(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/assertions/${id}`);
     },
@@ -1067,24 +1067,21 @@ function customAssertionToAssertion(ca, params) {
 async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const evalRun = await api.getEvalRun(projectId2, evalRunId2);
   const scenarios = await Promise.all(
-    evalRun.scenarioIds.map((id) => api.getScenario(projectId2, id))
+    (evalRun.scenarioIds ?? []).map((id) => api.getScenario(projectId2, id))
   );
   let agent = null;
   if (evalRun.agentId) {
     agent = await api.getAgent(projectId2, evalRun.agentId);
   }
   let skills = [];
-  let skillsGroup = null;
-  if (evalRun.skillsGroupId) {
-    skillsGroup = await api.getSkillsGroup(projectId2, evalRun.skillsGroupId);
-    if (skillsGroup.skillIds.length > 0) {
-      const fetchResults = await Promise.allSettled(
-        skillsGroup.skillIds.map((id) => api.getSkill(projectId2, id))
-      );
-      skills = fetchResults.filter(
-        (r) => r.status === "fulfilled"
-      ).map((r) => r.value).filter((s) => !s.deleted);
-    }
+  const resolvedSkillIds = evalRun.skillIds ?? [];
+  if (resolvedSkillIds.length > 0) {
+    const fetchResults = await Promise.allSettled(
+      resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
+    );
+    skills = fetchResults.filter(
+      (r) => r.status === "fulfilled"
+    ).map((r) => r.value).filter((s) => !s.deleted);
     if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
       skills = await Promise.all(
         skills.map(async (skill) => {
@@ -1169,13 +1166,22 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
       resolvedAssertions: resolvedAssertions.length > 0 ? resolvedAssertions : void 0
     };
   });
-  const skillsGroupName = skillsGroup?.name ?? "";
+  let presetName = "";
+  if (evalRun.presetId) {
+    try {
+      const preset = await api.getPreset(projectId2, evalRun.presetId);
+      presetName = preset.name;
+    } catch {
+      presetName = skills.length > 0 ? skills.map((s) => s.name).join(", ") : "";
+    }
+  } else if (skills.length > 0) {
+    presetName = skills.map((s) => s.name).join(", ");
+  }
   return {
     evalRun,
     agent,
     skills,
-    skillsGroup,
-    skillsGroupName,
+    presetName,
     mcps,
     subAgents,
     rules,
@@ -43710,18 +43716,20 @@ function extractTemplateFiles(before, after) {
 var import_evalforge_types7 = require("@wix/evalforge-types");
 var DEFAULT_AGENT_COMMAND = import_evalforge_types7.AgentRunCommand.CLAUDE;
 async function runAgentWithContext(config2, evalRunId2, scenario, evalData, workDir) {
-  const skillsGroupId = evalData.evalRun.skillsGroupId;
+  const hasEntities = evalData.skills.length > 0 || evalData.mcps.length > 0 || evalData.subAgents.length > 0 || (evalData.rules?.length ?? 0) > 0;
+  if (!hasEntities) {
+    throw new Error(
+      `Eval run ${evalRunId2} has no entities configured: at least one skill, MCP, sub-agent, or rule is required.`
+    );
+  }
   const agent = evalData.agent ?? void 0;
   const isSDK = agent?.agentType === import_evalforge_types7.AgentType.SDK;
-  if (!skillsGroupId) {
-    throw new Error(`Eval run ${evalData.evalRun.id} has no skillsGroupId`);
-  }
   const identifier = isSDK ? simpleAgentAdapter.id : agent?.runCommand ?? DEFAULT_AGENT_COMMAND;
   const adapter = getAdapter(identifier);
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
   const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
-  const targetId = skillsGroupId ?? agent?.id ?? evalData.evalRun.id;
-  const targetName = evalData.skillsGroupName || agent?.name || "";
+  const targetId = evalData.evalRun.presetId ?? agent?.id ?? evalData.evalRun.id;
+  const targetName = evalData.presetName || agent?.name || "";
   const executionContext = {
     skills: evalData.skills,
     scenario,
@@ -43769,7 +43777,7 @@ async function runAgentWithContext(config2, evalRunId2, scenario, evalData, work
 // src/run-scenario/index.ts
 async function runScenario(config2, evalRunId2, scenario, evalData, template, resolvedAssertions) {
-  const targetId = evalData.evalRun.skillsGroupId ?? evalData.agent?.id ?? evalData.evalRun.id;
+  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
   const workDir = await prepareWorkingDirectory(
     config2,
     evalRunId2,
@@ -44004,16 +44012,18 @@ async function runEvaluation(projectId2, evalRunId2) {
     skillCount: skills.length,
     hasAgent: !!agent,
     agentId: evalData.evalRun.agentId,
-    skillsGroupId: evalData.evalRun.skillsGroupId
+    presetId: evalData.evalRun.presetId,
+    skillIds: evalData.evalRun.skillIds
   };
-  if (scenarioItems.length > 0 && skills.length === 0) {
+  const hasEntities = skills.length > 0 || evalData.mcps.length > 0 || evalData.subAgents.length > 0 || evalData.rules.length > 0;
+  if (scenarioItems.length > 0 && !hasEntities) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no entities configured: at least one skill, MCP, sub-agent, or rule is required.`
     );
   }
-  if (scenarioItems.length > 0 && skills.length > 0 && !agent) {
+  if (scenarioItems.length > 0 && hasEntities && !agent) {
     throw new Error(
-      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
+      `[${ExecutionPhase.VALIDATION}] Eval run has no agent: set agentId for evaluation runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
     );
   }
   let completedScenarios = 0;
@@ -44025,16 +44035,16 @@ async function runEvaluation(projectId2, evalRunId2) {
       evalRunId: evalRunId2,
       scenarioId: scenario.id,
       scenarioName: scenario.name,
-      skillsGroupId: evalData.evalRun.skillsGroupId,
-      skillsGroupName: evalData.skillsGroupName,
+      presetId: evalData.evalRun.presetId,
+      presetName: evalData.presetName,
       agentId: agent?.id,
       agentName: agent?.name,
       progress: `${completedScenarios + 1}/${totalScenarios}`
     };
     const skillNames = evalData.skills.map((s) => s.name).join(", ");
     console.log(
-      "[Evaluator] Running scenario with skills group:",
-      evalData.skillsGroupName,
+      "[Evaluator] Running scenario with preset:",
+      evalData.presetName,
       skillNames ? `(${skillNames})` : "",
       agent ? `with agent: ${agent.name}` : "",
       `(${completedScenarios + 1}/${totalScenarios})`
@@ -44060,8 +44070,8 @@ async function runEvaluation(projectId2, evalRunId2) {
       const errorMsg = err instanceof Error ? err.message : String(err);
       const errorStack = err instanceof Error ? err.stack : void 0;
       console.error(
-        "[Evaluator] Failed to run scenario with skills group:",
-        evalData.skillsGroupName,
+        "[Evaluator] Failed to run scenario with preset:",
+        evalData.presetName,
         "Error:",
         errorMsg
       );
@@ -44069,7 +44079,7 @@ async function runEvaluation(projectId2, evalRunId2) {
         console.error("[Evaluator] Stack trace:", errorStack);
       }
       throw new Error(
-        `[${state.currentPhase}] Failed to execute skills group "${evalData.skillsGroupName}" on scenario "${scenario.name}": ${errorMsg}`
+        `[${state.currentPhase}] Failed to execute preset "${evalData.presetName}" on scenario "${scenario.name}": ${errorMsg}`
       );
     }
   }