npm - @wix/evalforge-evaluator - Versions diffs - 0.87.0 → 0.89.0 - Mend

@wix/evalforge-evaluator 0.87.0 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/build/index.js +39 -18
package/build/index.js.map +4 -4
package/build/index.mjs +32 -11
package/build/index.mjs.map +4 -4
package/build/types/run-scenario/agents/claude-code/claude-code-adapter.d.ts +2 -1
package/build/types/run-scenario/agents/registry.d.ts +14 -14
package/package.json +4 -4

package/build/index.js CHANGED Viewed

@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
 ));
 // src/index.ts
-var import_evalforge_types6 = require("@wix/evalforge-types");
+var import_evalforge_types8 = require("@wix/evalforge-types");
 // src/config.ts
 function loadConfig() {
@@ -233,7 +233,21 @@ function applyParamsToAssertion(assertion, params) {
         );
       }
     }
-    return { ...assertion, prompt, systemPrompt };
+    return {
+      ...assertion,
+      prompt,
+      systemPrompt,
+      ...params.model !== void 0 && { model: params.model },
+      ...params.maxTokens !== void 0 && {
+        maxTokens: params.maxTokens
+      },
+      ...params.temperature !== void 0 && {
+        temperature: params.temperature
+      },
+      ...params.minScore !== void 0 && {
+        minScore: params.minScore
+      }
+    };
   }
   if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
     return {
@@ -265,7 +279,10 @@ function resolveSystemAssertion(assertionId, params) {
         type: "llm_judge",
         prompt: params?.prompt ?? "",
         systemPrompt: params?.systemPrompt,
-        minScore: params?.minScore
+        minScore: params?.minScore,
+        model: params?.model,
+        maxTokens: params?.maxTokens,
+        temperature: params?.temperature
       };
       break;
     default:
@@ -399,7 +416,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
 }
 // src/run-scenario/index.ts
-var import_evalforge_types4 = require("@wix/evalforge-types");
+var import_evalforge_types6 = require("@wix/evalforge-types");
 var import_eval_assertions = require("@wix/eval-assertions");
 // src/run-scenario/environment.ts
@@ -529,7 +546,7 @@ var import_crypto2 = require("crypto");
 // src/run-scenario/agents/registry.ts
 var AgentAdapterRegistry = class {
   /**
-   * Map of command strings to their registered adapters.
+   * Map of run commands to their registered adapters.
    * Multiple commands can map to the same adapter.
    */
   adapters = /* @__PURE__ */ new Map();
@@ -558,9 +575,9 @@ var AgentAdapterRegistry = class {
     }
   }
   /**
-   * Get an adapter by command string.
+   * Get an adapter by run command.
    *
-   * @param runCommand - The command string to look up (e.g., 'claude', 'cursor')
+   * @param runCommand - The run command to look up
    * @returns The registered adapter, or undefined if not found
    */
   get(runCommand) {
@@ -569,7 +586,7 @@ var AgentAdapterRegistry = class {
   /**
    * Check if a command has a registered adapter.
    *
-   * @param runCommand - The command string to check
+   * @param runCommand - The run command to check
    * @returns True if an adapter is registered for this command
    */
   has(runCommand) {
@@ -586,7 +603,7 @@ var AgentAdapterRegistry = class {
   /**
    * Get all supported commands.
    *
-   * @returns Array of all registered command strings
+   * @returns Array of all registered run commands
    */
   getSupportedCommands() {
     return Array.from(this.adapters.keys());
@@ -636,6 +653,9 @@ function getAdapter(runCommand) {
   return adapter;
 }
+// src/run-scenario/agents/claude-code/claude-code-adapter.ts
+var import_evalforge_types4 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/execute.ts
 var import_evalforge_types3 = require("@wix/evalforge-types");
 var import_crypto = require("crypto");
@@ -1652,7 +1672,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
 var ClaudeCodeAdapter = class {
   id = "claude-code";
   name = "Claude Code";
-  supportedCommands = ["claude"];
+  supportedCommands = [import_evalforge_types4.AgentRunCommand.CLAUDE];
   /**
    * Execute a skill using the Claude Code SDK.
    *
@@ -2433,7 +2453,8 @@ function extractTemplateFiles(before, after) {
 }
 // src/run-scenario/run-agent-with-context.ts
-var DEFAULT_AGENT_COMMAND = "claude";
+var import_evalforge_types5 = require("@wix/evalforge-types");
+var DEFAULT_AGENT_COMMAND = import_evalforge_types5.AgentRunCommand.CLAUDE;
 async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
   const skillsGroupId = evalData.evalRun.skillsGroupId;
   if (!skillsGroupId) {
@@ -2520,7 +2541,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     }))
   };
   const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
-  const defaultJudgeModel = import_evalforge_types4.AVAILABLE_MODEL_IDS[0];
+  const defaultJudgeModel = import_evalforge_types6.DEFAULT_JUDGE_MODEL;
   const assertionContext = {
     workDir,
     defaultJudgeModel,
@@ -2535,10 +2556,10 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
     assertionContext
   ) : [];
   const passed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types4.AssertionResultStatus.PASSED
+    (r) => r.status === import_evalforge_types6.AssertionResultStatus.PASSED
   ).length;
   const failed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types4.AssertionResultStatus.FAILED
+    (r) => r.status === import_evalforge_types6.AssertionResultStatus.FAILED
   ).length;
   const total = assertionResults.length;
   const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
@@ -2552,7 +2573,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
 }
 // src/error-reporter.ts
-var import_evalforge_types5 = require("@wix/evalforge-types");
+var import_evalforge_types7 = require("@wix/evalforge-types");
 function formatError(error, phase, context) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString();
   if (error instanceof Error) {
@@ -2801,7 +2822,7 @@ async function runEvaluation(projectId2, evalRunId2) {
   };
   try {
     await api.updateEvalRun(projectId2, evalRunId2, {
-      status: import_evalforge_types6.EvalStatus.COMPLETED,
+      status: import_evalforge_types8.EvalStatus.COMPLETED,
       completedAt: (/* @__PURE__ */ new Date()).toISOString()
     });
   } catch (updateErr) {
@@ -2842,7 +2863,7 @@ runEvaluation(projectId, evalRunId).then(() => {
       authToken: config.authToken
     });
     await api.updateEvalRun(projectId, evalRunId, {
-      status: import_evalforge_types6.EvalStatus.FAILED,
+      status: import_evalforge_types8.EvalStatus.FAILED,
       completedAt: (/* @__PURE__ */ new Date()).toISOString(),
       jobError,
       jobStatus: "FAILED"
@@ -2865,7 +2886,7 @@ runEvaluation(projectId, evalRunId).then(() => {
           authToken
         });
         await api.updateEvalRun(projectId, evalRunId, {
-          status: import_evalforge_types6.EvalStatus.FAILED,
+          status: import_evalforge_types8.EvalStatus.FAILED,
           completedAt: (/* @__PURE__ */ new Date()).toISOString(),
           jobError: `Config load failed, then: ${jobError}`,
           jobStatus: "FAILED"