npm - @wix/evalforge-evaluator - Versions diffs - 0.87.0 → 0.89.0 - Mend

@wix/evalforge-evaluator 0.87.0 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/build/index.js +39 -18
package/build/index.js.map +4 -4
package/build/index.mjs +32 -11
package/build/index.mjs.map +4 -4
package/build/types/run-scenario/agents/claude-code/claude-code-adapter.d.ts +2 -1
package/build/types/run-scenario/agents/registry.d.ts +14 -14
package/package.json +4 -4

package/build/index.mjs CHANGED Viewed

@@ -213,7 +213,21 @@ function applyParamsToAssertion(assertion, params) {
         );
       }
     }
-    return { ...assertion, prompt, systemPrompt };
+    return {
+      ...assertion,
+      prompt,
+      systemPrompt,
+      ...params.model !== void 0 && { model: params.model },
+      ...params.maxTokens !== void 0 && {
+        maxTokens: params.maxTokens
+      },
+      ...params.temperature !== void 0 && {
+        temperature: params.temperature
+      },
+      ...params.minScore !== void 0 && {
+        minScore: params.minScore
+      }
+    };
   }
   if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
     return {
@@ -245,7 +259,10 @@ function resolveSystemAssertion(assertionId, params) {
         type: "llm_judge",
         prompt: params?.prompt ?? "",
         systemPrompt: params?.systemPrompt,
-        minScore: params?.minScore
+        minScore: params?.minScore,
+        model: params?.model,
+        maxTokens: params?.maxTokens,
+        temperature: params?.temperature
       };
       break;
     default:
@@ -381,7 +398,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
 // src/run-scenario/index.ts
 import {
   AssertionResultStatus,
-  AVAILABLE_MODEL_IDS
+  DEFAULT_JUDGE_MODEL
 } from "@wix/evalforge-types";
 import {
   evaluateAssertions as evaluateAssertionsBase
@@ -514,7 +531,7 @@ import { randomUUID as randomUUID2 } from "crypto";
 // src/run-scenario/agents/registry.ts
 var AgentAdapterRegistry = class {
   /**
-   * Map of command strings to their registered adapters.
+   * Map of run commands to their registered adapters.
    * Multiple commands can map to the same adapter.
    */
   adapters = /* @__PURE__ */ new Map();
@@ -543,9 +560,9 @@ var AgentAdapterRegistry = class {
     }
   }
   /**
-   * Get an adapter by command string.
+   * Get an adapter by run command.
    *
-   * @param runCommand - The command string to look up (e.g., 'claude', 'cursor')
+   * @param runCommand - The run command to look up
    * @returns The registered adapter, or undefined if not found
    */
   get(runCommand) {
@@ -554,7 +571,7 @@ var AgentAdapterRegistry = class {
   /**
    * Check if a command has a registered adapter.
    *
-   * @param runCommand - The command string to check
+   * @param runCommand - The run command to check
    * @returns True if an adapter is registered for this command
    */
   has(runCommand) {
@@ -571,7 +588,7 @@ var AgentAdapterRegistry = class {
   /**
    * Get all supported commands.
    *
-   * @returns Array of all registered command strings
+   * @returns Array of all registered run commands
    */
   getSupportedCommands() {
     return Array.from(this.adapters.keys());
@@ -621,6 +638,9 @@ function getAdapter(runCommand) {
   return adapter;
 }
+// src/run-scenario/agents/claude-code/claude-code-adapter.ts
+import { AgentRunCommand } from "@wix/evalforge-types";
 // src/run-scenario/agents/claude-code/execute.ts
 import {
   ClaudeModel,
@@ -1642,7 +1662,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
 var ClaudeCodeAdapter = class {
   id = "claude-code";
   name = "Claude Code";
-  supportedCommands = ["claude"];
+  supportedCommands = [AgentRunCommand.CLAUDE];
   /**
    * Execute a skill using the Claude Code SDK.
    *
@@ -2423,7 +2443,8 @@ function extractTemplateFiles(before, after) {
 }
 // src/run-scenario/run-agent-with-context.ts
-var DEFAULT_AGENT_COMMAND = "claude";
+import { AgentRunCommand as AgentRunCommand2 } from "@wix/evalforge-types";
+var DEFAULT_AGENT_COMMAND = AgentRunCommand2.CLAUDE;
 async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
   const skillsGroupId = evalData.evalRun.skillsGroupId;
   if (!skillsGroupId) {
@@ -2510,7 +2531,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     }))
   };
   const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
-  const defaultJudgeModel = AVAILABLE_MODEL_IDS[0];
+  const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
   const assertionContext = {
     workDir,
     defaultJudgeModel,