npm - @wix/evalforge-evaluator - Versions diffs - 0.53.0 → 0.55.0 - Mend

@wix/evalforge-evaluator 0.53.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/build/index.js CHANGED Viewed

@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
 ));
 // src/index.ts
-var import_evalforge_types5 = require("@wix/evalforge-types");
+var import_evalforge_types6 = require("@wix/evalforge-types");
 // src/config.ts
 function loadConfig() {
@@ -349,7 +349,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
 }
 // src/run-scenario/index.ts
-var import_evalforge_types3 = require("@wix/evalforge-types");
+var import_evalforge_types4 = require("@wix/evalforge-types");
 var import_eval_assertions = require("@wix/eval-assertions");
 // src/run-scenario/environment.ts
@@ -6337,6 +6337,9 @@ function getAdapter(runCommand) {
   return adapter;
 }
+// src/run-scenario/agents/claude-code/claude-code-adapter.ts
+var import_evalforge_types3 = require("@wix/evalforge-types");
 // src/run-scenario/agents/claude-code/execute.ts
 var import_evalforge_types2 = require("@wix/evalforge-types");
 var import_crypto = require("crypto");
@@ -7310,10 +7313,11 @@ var ClaudeCodeAdapter = class {
       aiGatewayHeaders,
       traceContext
     } = context;
+    const modelForSdk = modelConfig?.model ? import_evalforge_types3.AVAILABLE_MODELS_MAP[modelConfig.model]?.providerModelId ?? modelConfig.model : void 0;
     const options = {
       cwd,
       systemPrompt: skill.skillMd,
-      model: modelConfig?.model,
+      model: modelForSdk,
       temperature: modelConfig?.temperature,
       maxTokens: modelConfig?.maxTokens,
       aiGatewayUrl,
@@ -8185,10 +8189,10 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
     assertionContext
   ) : [];
   const passed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types3.AssertionResultStatus.PASSED
+    (r) => r.status === import_evalforge_types4.AssertionResultStatus.PASSED
   ).length;
   const failed = assertionResults.filter(
-    (r) => r.status === import_evalforge_types3.AssertionResultStatus.FAILED
+    (r) => r.status === import_evalforge_types4.AssertionResultStatus.FAILED
   ).length;
   const total = assertionResults.length;
   const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
@@ -8202,7 +8206,7 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
 }
 // src/error-reporter.ts
-var import_evalforge_types4 = require("@wix/evalforge-types");
+var import_evalforge_types5 = require("@wix/evalforge-types");
 function formatError(error, phase, context) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString();
   if (error instanceof Error) {
@@ -8451,7 +8455,7 @@ async function runEvaluation(projectId2, evalRunId2) {
   };
   try {
     await api.updateEvalRun(projectId2, evalRunId2, {
-      status: import_evalforge_types5.EvalStatus.COMPLETED,
+      status: import_evalforge_types6.EvalStatus.COMPLETED,
       completedAt: (/* @__PURE__ */ new Date()).toISOString()
     });
   } catch (updateErr) {
@@ -8492,7 +8496,7 @@ runEvaluation(projectId, evalRunId).then(() => {
       authToken: config.authToken
     });
     await api.updateEvalRun(projectId, evalRunId, {
-      status: import_evalforge_types5.EvalStatus.FAILED,
+      status: import_evalforge_types6.EvalStatus.FAILED,
       completedAt: (/* @__PURE__ */ new Date()).toISOString(),
       jobError,
       jobStatus: "FAILED"
@@ -8515,7 +8519,7 @@ runEvaluation(projectId, evalRunId).then(() => {
           authToken
         });
         await api.updateEvalRun(projectId, evalRunId, {
-          status: import_evalforge_types5.EvalStatus.FAILED,
+          status: import_evalforge_types6.EvalStatus.FAILED,
           completedAt: (/* @__PURE__ */ new Date()).toISOString(),
           jobError: `Config load failed, then: ${jobError}`,
           jobStatus: "FAILED"