@wix/evalforge-evaluator 0.87.0 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -213,7 +213,21 @@ function applyParamsToAssertion(assertion, params) {
213
213
  );
214
214
  }
215
215
  }
216
- return { ...assertion, prompt, systemPrompt };
216
+ return {
217
+ ...assertion,
218
+ prompt,
219
+ systemPrompt,
220
+ ...params.model !== void 0 && { model: params.model },
221
+ ...params.maxTokens !== void 0 && {
222
+ maxTokens: params.maxTokens
223
+ },
224
+ ...params.temperature !== void 0 && {
225
+ temperature: params.temperature
226
+ },
227
+ ...params.minScore !== void 0 && {
228
+ minScore: params.minScore
229
+ }
230
+ };
217
231
  }
218
232
  if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
219
233
  return {
@@ -245,7 +259,10 @@ function resolveSystemAssertion(assertionId, params) {
245
259
  type: "llm_judge",
246
260
  prompt: params?.prompt ?? "",
247
261
  systemPrompt: params?.systemPrompt,
248
- minScore: params?.minScore
262
+ minScore: params?.minScore,
263
+ model: params?.model,
264
+ maxTokens: params?.maxTokens,
265
+ temperature: params?.temperature
249
266
  };
250
267
  break;
251
268
  default:
@@ -381,7 +398,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
381
398
  // src/run-scenario/index.ts
382
399
  import {
383
400
  AssertionResultStatus,
384
- AVAILABLE_MODEL_IDS
401
+ DEFAULT_JUDGE_MODEL
385
402
  } from "@wix/evalforge-types";
386
403
  import {
387
404
  evaluateAssertions as evaluateAssertionsBase
@@ -514,7 +531,7 @@ import { randomUUID as randomUUID2 } from "crypto";
514
531
  // src/run-scenario/agents/registry.ts
515
532
  var AgentAdapterRegistry = class {
516
533
  /**
517
- * Map of command strings to their registered adapters.
534
+ * Map of run commands to their registered adapters.
518
535
  * Multiple commands can map to the same adapter.
519
536
  */
520
537
  adapters = /* @__PURE__ */ new Map();
@@ -543,9 +560,9 @@ var AgentAdapterRegistry = class {
543
560
  }
544
561
  }
545
562
  /**
546
- * Get an adapter by command string.
563
+ * Get an adapter by run command.
547
564
  *
548
- * @param runCommand - The command string to look up (e.g., 'claude', 'cursor')
565
+ * @param runCommand - The run command to look up
549
566
  * @returns The registered adapter, or undefined if not found
550
567
  */
551
568
  get(runCommand) {
@@ -554,7 +571,7 @@ var AgentAdapterRegistry = class {
554
571
  /**
555
572
  * Check if a command has a registered adapter.
556
573
  *
557
- * @param runCommand - The command string to check
574
+ * @param runCommand - The run command to check
558
575
  * @returns True if an adapter is registered for this command
559
576
  */
560
577
  has(runCommand) {
@@ -571,7 +588,7 @@ var AgentAdapterRegistry = class {
571
588
  /**
572
589
  * Get all supported commands.
573
590
  *
574
- * @returns Array of all registered command strings
591
+ * @returns Array of all registered run commands
575
592
  */
576
593
  getSupportedCommands() {
577
594
  return Array.from(this.adapters.keys());
@@ -621,6 +638,9 @@ function getAdapter(runCommand) {
621
638
  return adapter;
622
639
  }
623
640
 
641
+ // src/run-scenario/agents/claude-code/claude-code-adapter.ts
642
+ import { AgentRunCommand } from "@wix/evalforge-types";
643
+
624
644
  // src/run-scenario/agents/claude-code/execute.ts
625
645
  import {
626
646
  ClaudeModel,
@@ -1642,7 +1662,7 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
1642
1662
  var ClaudeCodeAdapter = class {
1643
1663
  id = "claude-code";
1644
1664
  name = "Claude Code";
1645
- supportedCommands = ["claude"];
1665
+ supportedCommands = [AgentRunCommand.CLAUDE];
1646
1666
  /**
1647
1667
  * Execute a skill using the Claude Code SDK.
1648
1668
  *
@@ -2423,7 +2443,8 @@ function extractTemplateFiles(before, after) {
2423
2443
  }
2424
2444
 
2425
2445
  // src/run-scenario/run-agent-with-context.ts
2426
- var DEFAULT_AGENT_COMMAND = "claude";
2446
+ import { AgentRunCommand as AgentRunCommand2 } from "@wix/evalforge-types";
2447
+ var DEFAULT_AGENT_COMMAND = AgentRunCommand2.CLAUDE;
2427
2448
  async function runAgentWithContext(config, evalRunId2, scenario, evalData, workDir) {
2428
2449
  const skillsGroupId = evalData.evalRun.skillsGroupId;
2429
2450
  if (!skillsGroupId) {
@@ -2510,7 +2531,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
2510
2531
  }))
2511
2532
  };
2512
2533
  const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
2513
- const defaultJudgeModel = AVAILABLE_MODEL_IDS[0];
2534
+ const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
2514
2535
  const assertionContext = {
2515
2536
  workDir,
2516
2537
  defaultJudgeModel,