@wix/evalforge-evaluator 0.88.0 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/index.ts
27
- var import_evalforge_types6 = require("@wix/evalforge-types");
27
+ var import_evalforge_types8 = require("@wix/evalforge-types");
28
28
 
29
29
  // src/config.ts
30
30
  function loadConfig() {
@@ -416,7 +416,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
416
416
  }
417
417
 
418
418
  // src/run-scenario/index.ts
419
- var import_evalforge_types4 = require("@wix/evalforge-types");
419
+ var import_evalforge_types6 = require("@wix/evalforge-types");
420
420
  var import_eval_assertions = require("@wix/eval-assertions");
421
421
 
422
422
  // src/run-scenario/environment.ts
@@ -546,7 +546,7 @@ var import_crypto2 = require("crypto");
546
546
  // src/run-scenario/agents/registry.ts
547
547
  var AgentAdapterRegistry = class {
548
548
  /**
549
- * Map of command strings to their registered adapters.
549
+ * Map of run commands to their registered adapters.
550
550
  * Multiple commands can map to the same adapter.
551
551
  */
552
552
  adapters = /* @__PURE__ */ new Map();
@@ -575,9 +575,9 @@ var AgentAdapterRegistry = class {
575
575
  }
576
576
  }
577
577
  /**
578
- * Get an adapter by command string.
578
+ * Get an adapter by run command.
579
579
  *
580
- * @param runCommand - The command string to look up (e.g., 'claude', 'cursor')
580
+ * @param runCommand - The run command to look up
581
581
  * @returns The registered adapter, or undefined if not found
582
582
  */
583
583
  get(runCommand) {
@@ -586,7 +586,7 @@ var AgentAdapterRegistry = class {
586
586
  /**
587
587
  * Check if a command has a registered adapter.
588
588
  *
589
- * @param runCommand - The command string to check
589
+ * @param runCommand - The run command to check
590
590
  * @returns True if an adapter is registered for this command
591
591
  */
592
592
  has(runCommand) {
@@ -603,7 +603,7 @@ var AgentAdapterRegistry = class {
603
603
  /**
604
604
  * Get all supported commands.
605
605
  *
606
- * @returns Array of all registered command strings
606
+ * @returns Array of all registered run commands
607
607
  */
608
608
  getSupportedCommands() {
609
609
  return Array.from(this.adapters.keys());
@@ -653,6 +653,9 @@ function getAdapter(runCommand) {
653
653
  return adapter;
654
654
  }
655
655
 
656
+ // src/run-scenario/agents/claude-code/claude-code-adapter.ts
657
+ var import_evalforge_types4 = require("@wix/evalforge-types");
658
+
656
659
  // src/run-scenario/agents/claude-code/execute.ts
657
660
  var import_evalforge_types3 = require("@wix/evalforge-types");
658
661
  var import_crypto = require("crypto");
@@ -1669,7 +1672,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
1669
1672
  var ClaudeCodeAdapter = class {
1670
1673
  id = "claude-code";
1671
1674
  name = "Claude Code";
1672
- supportedCommands = ["claude"];
1675
+ supportedCommands = [import_evalforge_types4.AgentRunCommand.CLAUDE];
1673
1676
  /**
1674
1677
  * Execute a skill using the Claude Code SDK.
1675
1678
  *
@@ -2450,7 +2453,8 @@ function extractTemplateFiles(before, after) {
2450
2453
  }
2451
2454
 
2452
2455
  // src/run-scenario/run-agent-with-context.ts
2453
- var DEFAULT_AGENT_COMMAND = "claude";
2456
+ var import_evalforge_types5 = require("@wix/evalforge-types");
2457
+ var DEFAULT_AGENT_COMMAND = import_evalforge_types5.AgentRunCommand.CLAUDE;
2454
2458
  async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
2455
2459
  const skillsGroupId = evalData.evalRun.skillsGroupId;
2456
2460
  if (!skillsGroupId) {
@@ -2537,7 +2541,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2537
2541
  }))
2538
2542
  };
2539
2543
  const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
2540
- const defaultJudgeModel = import_evalforge_types4.DEFAULT_JUDGE_MODEL;
2544
+ const defaultJudgeModel = import_evalforge_types6.DEFAULT_JUDGE_MODEL;
2541
2545
  const assertionContext = {
2542
2546
  workDir,
2543
2547
  defaultJudgeModel,
@@ -2552,10 +2556,10 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2552
2556
  assertionContext
2553
2557
  ) : [];
2554
2558
  const passed = assertionResults.filter(
2555
- (r) => r.status === import_evalforge_types4.AssertionResultStatus.PASSED
2559
+ (r) => r.status === import_evalforge_types6.AssertionResultStatus.PASSED
2556
2560
  ).length;
2557
2561
  const failed = assertionResults.filter(
2558
- (r) => r.status === import_evalforge_types4.AssertionResultStatus.FAILED
2562
+ (r) => r.status === import_evalforge_types6.AssertionResultStatus.FAILED
2559
2563
  ).length;
2560
2564
  const total = assertionResults.length;
2561
2565
  const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
@@ -2569,7 +2573,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2569
2573
  }
2570
2574
 
2571
2575
  // src/error-reporter.ts
2572
- var import_evalforge_types5 = require("@wix/evalforge-types");
2576
+ var import_evalforge_types7 = require("@wix/evalforge-types");
2573
2577
  function formatError(error, phase, context) {
2574
2578
  const timestamp = (/* @__PURE__ */ new Date()).toISOString();
2575
2579
  if (error instanceof Error) {
@@ -2818,7 +2822,7 @@ async function runEvaluation(projectId2, evalRunId2) {
2818
2822
  };
2819
2823
  try {
2820
2824
  await api.updateEvalRun(projectId2, evalRunId2, {
2821
- status: import_evalforge_types6.EvalStatus.COMPLETED,
2825
+ status: import_evalforge_types8.EvalStatus.COMPLETED,
2822
2826
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2823
2827
  });
2824
2828
  } catch (updateErr) {
@@ -2859,7 +2863,7 @@ runEvaluation(projectId, evalRunId).then(() => {
2859
2863
  authToken: config.authToken
2860
2864
  });
2861
2865
  await api.updateEvalRun(projectId, evalRunId, {
2862
- status: import_evalforge_types6.EvalStatus.FAILED,
2866
+ status: import_evalforge_types8.EvalStatus.FAILED,
2863
2867
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
2864
2868
  jobError,
2865
2869
  jobStatus: "FAILED"
@@ -2882,7 +2886,7 @@ runEvaluation(projectId, evalRunId).then(() => {
2882
2886
  authToken
2883
2887
  });
2884
2888
  await api.updateEvalRun(projectId, evalRunId, {
2885
- status: import_evalforge_types6.EvalStatus.FAILED,
2889
+ status: import_evalforge_types8.EvalStatus.FAILED,
2886
2890
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
2887
2891
  jobError: `Config load failed, then: ${jobError}`,
2888
2892
  jobStatus: "FAILED"