@wix/evalforge-evaluator 0.53.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/index.ts
27
- var import_evalforge_types5 = require("@wix/evalforge-types");
27
+ var import_evalforge_types6 = require("@wix/evalforge-types");
28
28
 
29
29
  // src/config.ts
30
30
  function loadConfig() {
@@ -349,7 +349,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
349
349
  }
350
350
 
351
351
  // src/run-scenario/index.ts
352
- var import_evalforge_types3 = require("@wix/evalforge-types");
352
+ var import_evalforge_types4 = require("@wix/evalforge-types");
353
353
  var import_eval_assertions = require("@wix/eval-assertions");
354
354
 
355
355
  // src/run-scenario/environment.ts
@@ -6337,6 +6337,9 @@ function getAdapter(runCommand) {
6337
6337
  return adapter;
6338
6338
  }
6339
6339
 
6340
+ // src/run-scenario/agents/claude-code/claude-code-adapter.ts
6341
+ var import_evalforge_types3 = require("@wix/evalforge-types");
6342
+
6340
6343
  // src/run-scenario/agents/claude-code/execute.ts
6341
6344
  var import_evalforge_types2 = require("@wix/evalforge-types");
6342
6345
  var import_crypto = require("crypto");
@@ -7310,10 +7313,11 @@ var ClaudeCodeAdapter = class {
7310
7313
  aiGatewayHeaders,
7311
7314
  traceContext
7312
7315
  } = context;
7316
+ const modelForSdk = modelConfig?.model ? import_evalforge_types3.AVAILABLE_MODELS_MAP[modelConfig.model]?.providerModelId ?? modelConfig.model : void 0;
7313
7317
  const options = {
7314
7318
  cwd,
7315
7319
  systemPrompt: skill.skillMd,
7316
- model: modelConfig?.model,
7320
+ model: modelForSdk,
7317
7321
  temperature: modelConfig?.temperature,
7318
7322
  maxTokens: modelConfig?.maxTokens,
7319
7323
  aiGatewayUrl,
@@ -8185,10 +8189,10 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
8185
8189
  assertionContext
8186
8190
  ) : [];
8187
8191
  const passed = assertionResults.filter(
8188
- (r) => r.status === import_evalforge_types3.AssertionResultStatus.PASSED
8192
+ (r) => r.status === import_evalforge_types4.AssertionResultStatus.PASSED
8189
8193
  ).length;
8190
8194
  const failed = assertionResults.filter(
8191
- (r) => r.status === import_evalforge_types3.AssertionResultStatus.FAILED
8195
+ (r) => r.status === import_evalforge_types4.AssertionResultStatus.FAILED
8192
8196
  ).length;
8193
8197
  const total = assertionResults.length;
8194
8198
  const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
@@ -8202,7 +8206,7 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
8202
8206
  }
8203
8207
 
8204
8208
  // src/error-reporter.ts
8205
- var import_evalforge_types4 = require("@wix/evalforge-types");
8209
+ var import_evalforge_types5 = require("@wix/evalforge-types");
8206
8210
  function formatError(error, phase, context) {
8207
8211
  const timestamp = (/* @__PURE__ */ new Date()).toISOString();
8208
8212
  if (error instanceof Error) {
@@ -8451,7 +8455,7 @@ async function runEvaluation(projectId2, evalRunId2) {
8451
8455
  };
8452
8456
  try {
8453
8457
  await api.updateEvalRun(projectId2, evalRunId2, {
8454
- status: import_evalforge_types5.EvalStatus.COMPLETED,
8458
+ status: import_evalforge_types6.EvalStatus.COMPLETED,
8455
8459
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
8456
8460
  });
8457
8461
  } catch (updateErr) {
@@ -8492,7 +8496,7 @@ runEvaluation(projectId, evalRunId).then(() => {
8492
8496
  authToken: config.authToken
8493
8497
  });
8494
8498
  await api.updateEvalRun(projectId, evalRunId, {
8495
- status: import_evalforge_types5.EvalStatus.FAILED,
8499
+ status: import_evalforge_types6.EvalStatus.FAILED,
8496
8500
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
8497
8501
  jobError,
8498
8502
  jobStatus: "FAILED"
@@ -8515,7 +8519,7 @@ runEvaluation(projectId, evalRunId).then(() => {
8515
8519
  authToken
8516
8520
  });
8517
8521
  await api.updateEvalRun(projectId, evalRunId, {
8518
- status: import_evalforge_types5.EvalStatus.FAILED,
8522
+ status: import_evalforge_types6.EvalStatus.FAILED,
8519
8523
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
8520
8524
  jobError: `Config load failed, then: ${jobError}`,
8521
8525
  jobStatus: "FAILED"