@mastra/evals 1.2.4 → 1.3.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var chunkBULMCHKJ_cjs = require('../../chunk-BULMCHKJ.cjs');
3
+ var chunkUNQXHPOD_cjs = require('../../chunk-UNQXHPOD.cjs');
4
4
  var evals = require('@mastra/core/evals');
5
5
  var nlp = require('compromise');
6
6
  var keyword_extractor = require('keyword-extractor');
@@ -250,7 +250,7 @@ function createAnswerRelevancyScorer({
250
250
  description: "Extract relevant statements from the LLM output",
251
251
  outputSchema: extractOutputSchema,
252
252
  createPrompt: ({ run }) => {
253
- const assistantMessage = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
253
+ const assistantMessage = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
254
254
  return createExtractPrompt(assistantMessage);
255
255
  }
256
256
  }).analyze({
@@ -283,7 +283,7 @@ function createAnswerRelevancyScorer({
283
283
  ]
284
284
  },
285
285
  createPrompt: ({ run, results }) => {
286
- const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
286
+ const input = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
287
287
  return createScorePrompt(JSON.stringify(input), results.preprocessStepResult?.statements || []);
288
288
  }
289
289
  }).generateScore(({ results }) => {
@@ -300,13 +300,13 @@ function createAnswerRelevancyScorer({
300
300
  }
301
301
  }
302
302
  const score = relevancyCount / numberOfResults;
303
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * options.scale);
303
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score * options.scale);
304
304
  }).generateReason({
305
305
  description: "Reason about the results",
306
306
  createPrompt: ({ run, results, score }) => {
307
307
  return createReasonPrompt({
308
- input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
309
- output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
308
+ input: chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "",
309
+ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
310
310
  score,
311
311
  results: results.analyzeStepResult.results,
312
312
  scale: options.scale
@@ -581,7 +581,7 @@ function createAnswerSimilarityScorer({
581
581
  groundTruth: ""
582
582
  });
583
583
  }
584
- const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
584
+ const output = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
585
585
  const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
586
586
  return createExtractPrompt2({
587
587
  output,
@@ -639,14 +639,14 @@ function createAnswerSimilarityScorer({
639
639
  );
640
640
  score -= extraInfoPenalty;
641
641
  score = Math.max(0, Math.min(1, score));
642
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * mergedOptions.scale);
642
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score * mergedOptions.scale);
643
643
  }).generateReason({
644
644
  description: "Generate explanation of similarity score",
645
645
  createPrompt: ({ run, results, score }) => {
646
646
  if (!run.groundTruth) {
647
647
  return "No ground truth was provided for comparison. Score is 0 by default.";
648
648
  }
649
- const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
649
+ const output = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
650
650
  const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
651
651
  return createReasonPrompt2({
652
652
  output,
@@ -848,7 +848,7 @@ function createFaithfulnessScorer({
848
848
  ]
849
849
  },
850
850
  createPrompt: ({ run }) => {
851
- const prompt = createFaithfulnessExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
851
+ const prompt = createFaithfulnessExtractPrompt({ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
852
852
  return prompt;
853
853
  }
854
854
  }).analyze({
@@ -895,13 +895,13 @@ function createFaithfulnessScorer({
895
895
  return 0;
896
896
  }
897
897
  const score = supportedClaims / totalClaims * (options?.scale || 1);
898
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
898
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score);
899
899
  }).generateReason({
900
900
  description: "Reason about the results",
901
901
  createPrompt: ({ run, results, score }) => {
902
902
  const prompt = createFaithfulnessReasonPrompt({
903
- input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
904
- output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
903
+ input: chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "",
904
+ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
905
905
  context: options?.context ?? getToolInvocationContext(run.output),
906
906
  score,
907
907
  scale: options?.scale || 1,
@@ -1046,7 +1046,7 @@ function createBiasScorer({ model, options }) {
1046
1046
  "opinions"
1047
1047
  ]
1048
1048
  },
1049
- createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
1049
+ createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
1050
1050
  }).analyze({
1051
1051
  description: "Score the relevance of the statements to the input",
1052
1052
  outputSchema: {
@@ -1078,7 +1078,7 @@ function createBiasScorer({ model, options }) {
1078
1078
  },
1079
1079
  createPrompt: ({ run, results }) => {
1080
1080
  const prompt = createBiasAnalyzePrompt({
1081
- output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1081
+ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1082
1082
  opinions: results.preprocessStepResult?.opinions || []
1083
1083
  });
1084
1084
  return prompt;
@@ -1089,7 +1089,7 @@ function createBiasScorer({ model, options }) {
1089
1089
  }
1090
1090
  const biasedVerdicts = results.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
1091
1091
  const score = biasedVerdicts.length / results.analyzeStepResult.results.length;
1092
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1092
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1093
1093
  }).generateReason({
1094
1094
  description: "Reason about the results",
1095
1095
  createPrompt: ({ score, results }) => {
@@ -1320,7 +1320,7 @@ function createHallucinationScorer({
1320
1320
  ]
1321
1321
  },
1322
1322
  createPrompt: ({ run }) => {
1323
- const prompt = createHallucinationExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
1323
+ const prompt = createHallucinationExtractPrompt({ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
1324
1324
  return prompt;
1325
1325
  }
1326
1326
  }).analyze({
@@ -1376,7 +1376,7 @@ function createHallucinationScorer({
1376
1376
  return 0;
1377
1377
  }
1378
1378
  const score = contradictedStatements / totalStatements * (options?.scale || 1);
1379
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
1379
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score);
1380
1380
  }).generateReason({
1381
1381
  description: "Reason about the results",
1382
1382
  createPrompt: async ({ run, results, score }) => {
@@ -1387,8 +1387,8 @@ function createHallucinationScorer({
1387
1387
  context = options?.context ?? [];
1388
1388
  }
1389
1389
  const prompt = createHallucinationReasonPrompt({
1390
- input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
1391
- output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1390
+ input: chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "",
1391
+ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1392
1392
  context,
1393
1393
  score,
1394
1394
  scale: options?.scale || 1,
@@ -1528,8 +1528,8 @@ function createToxicityScorer({
1528
1528
  },
1529
1529
  createPrompt: ({ run }) => {
1530
1530
  const prompt = createToxicityAnalyzePrompt({
1531
- input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
1532
- output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
1531
+ input: chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "",
1532
+ output: chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
1533
1533
  });
1534
1534
  return prompt;
1535
1535
  }
@@ -1545,7 +1545,7 @@ function createToxicityScorer({
1545
1545
  }
1546
1546
  }
1547
1547
  const score = toxicityCount / numberOfVerdicts;
1548
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1548
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1549
1549
  }).generateReason({
1550
1550
  description: "Reason about the results",
1551
1551
  createPrompt: ({ results, score }) => {
@@ -1706,7 +1706,7 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1706
1706
  if (isInputInvalid || isOutputInvalid) {
1707
1707
  throw new Error("Input and output messages cannot be null or empty");
1708
1708
  }
1709
- const { tools: actualTools, toolCallInfos } = chunkBULMCHKJ_cjs.extractToolCalls(run.output);
1709
+ const { tools: actualTools, toolCallInfos } = chunkUNQXHPOD_cjs.extractToolCalls(run.output);
1710
1710
  return {
1711
1711
  actualTools,
1712
1712
  hasToolCalls: actualTools.length > 0,
@@ -1716,8 +1716,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1716
1716
  description: "Analyze the appropriateness of tool selections",
1717
1717
  outputSchema: analyzeOutputSchema2,
1718
1718
  createPrompt: ({ run, results }) => {
1719
- const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
1720
- const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1719
+ const userInput = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
1720
+ const agentResponse = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1721
1721
  const toolsCalled = results.preprocessStepResult?.actualTools || [];
1722
1722
  return createAnalyzePrompt2({
1723
1723
  userInput,
@@ -1734,11 +1734,11 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1734
1734
  }
1735
1735
  const appropriateToolCalls = evaluations.filter((e) => e.wasAppropriate).length;
1736
1736
  const totalToolCalls = evaluations.length;
1737
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
1737
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
1738
1738
  }).generateReason({
1739
1739
  description: "Generate human-readable explanation of tool selection evaluation",
1740
1740
  createPrompt: ({ run, results, score }) => {
1741
- const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
1741
+ const userInput = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
1742
1742
  const evaluations = results.analyzeStepResult?.evaluations || [];
1743
1743
  const missingTools = results.analyzeStepResult?.missingTools || [];
1744
1744
  return createReasonPrompt3({
@@ -1968,7 +1968,7 @@ var getContext = ({
1968
1968
  output,
1969
1969
  options
1970
1970
  }) => {
1971
- if (options.contextExtractor && chunkBULMCHKJ_cjs.isScorerRunInputForAgent(input) && chunkBULMCHKJ_cjs.isScorerRunOutputForAgent(output)) {
1971
+ if (options.contextExtractor && chunkUNQXHPOD_cjs.isScorerRunInputForAgent(input) && chunkUNQXHPOD_cjs.isScorerRunOutputForAgent(output)) {
1972
1972
  return options.contextExtractor(input, output);
1973
1973
  }
1974
1974
  return options.context ?? [];
@@ -1996,8 +1996,8 @@ function createContextRelevanceScorerLLM({
1996
1996
  description: "Analyze the relevance and utility of provided context",
1997
1997
  outputSchema: analyzeOutputSchema3,
1998
1998
  createPrompt: ({ run }) => {
1999
- const userQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2000
- const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1999
+ const userQuery = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2000
+ const agentResponse = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2001
2001
  const context = getContext({ input: run.input, output: run.output, options });
2002
2002
  if (context.length === 0) {
2003
2003
  return createAnalyzePrompt3({
@@ -2045,11 +2045,11 @@ function createContextRelevanceScorerLLM({
2045
2045
  const missingContextPenalty = Math.min(missingContext.length * missingPenaltyRate, maxMissingPenalty);
2046
2046
  const finalScore = Math.max(0, relevanceScore - usagePenalty - missingContextPenalty);
2047
2047
  const scaledScore = finalScore * (options.scale || 1);
2048
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(scaledScore);
2048
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(scaledScore);
2049
2049
  }).generateReason({
2050
2050
  description: "Generate human-readable explanation of context relevance evaluation",
2051
2051
  createPrompt: ({ run, results, score }) => {
2052
- const userQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2052
+ const userQuery = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2053
2053
  const context = getContext({ input: run.input, output: run.output, options });
2054
2054
  if (context.length === 0) {
2055
2055
  return `No context was available for evaluation. The agent response was generated without any supporting context. Score: ${score}`;
@@ -2224,7 +2224,7 @@ var getContext2 = ({
2224
2224
  output,
2225
2225
  options
2226
2226
  }) => {
2227
- if (options.contextExtractor && chunkBULMCHKJ_cjs.isScorerRunInputForAgent(input) && chunkBULMCHKJ_cjs.isScorerRunOutputForAgent(output)) {
2227
+ if (options.contextExtractor && chunkUNQXHPOD_cjs.isScorerRunInputForAgent(input) && chunkUNQXHPOD_cjs.isScorerRunOutputForAgent(output)) {
2228
2228
  return options.contextExtractor(input, output);
2229
2229
  }
2230
2230
  return options.context ?? [];
@@ -2252,8 +2252,8 @@ function createContextPrecisionScorer({
2252
2252
  description: "Evaluate the relevance of each context piece for generating the expected output",
2253
2253
  outputSchema: contextRelevanceOutputSchema,
2254
2254
  createPrompt: ({ run }) => {
2255
- const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2256
- const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2255
+ const input = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2256
+ const output = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2257
2257
  const context = getContext2({ input: run.input, output: run.output, options });
2258
2258
  if (context.length === 0) {
2259
2259
  throw new Error("No context available for evaluation");
@@ -2286,12 +2286,12 @@ function createContextPrecisionScorer({
2286
2286
  }
2287
2287
  const map = sumPrecision / relevantCount;
2288
2288
  const score = map * (options.scale || 1);
2289
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
2289
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(score);
2290
2290
  }).generateReason({
2291
2291
  description: "Reason about the context precision results",
2292
2292
  createPrompt: ({ run, results, score }) => {
2293
- const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2294
- const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2293
+ const input = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2294
+ const output = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2295
2295
  const context = getContext2({ input: run.input, output: run.output, options });
2296
2296
  return createContextPrecisionReasonPrompt({
2297
2297
  input,
@@ -2589,8 +2589,8 @@ function createNoiseSensitivityScorerLLM({
2589
2589
  description: "Analyze the impact of noise on agent response quality",
2590
2590
  outputSchema: analyzeOutputSchema4,
2591
2591
  createPrompt: ({ run }) => {
2592
- const originalQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2593
- const noisyResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2592
+ const originalQuery = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2593
+ const noisyResponse = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2594
2594
  if (!originalQuery || !noisyResponse) {
2595
2595
  throw new Error("Both original query and noisy response are required for evaluation");
2596
2596
  }
@@ -2633,11 +2633,11 @@ function createNoiseSensitivityScorerLLM({
2633
2633
  const majorIssues = analysisResult.majorIssues || [];
2634
2634
  const issuesPenalty = Math.min(majorIssues.length * majorIssuePenaltyRate, maxMajorIssuePenalty);
2635
2635
  finalScore = Math.max(0, finalScore - issuesPenalty);
2636
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(finalScore);
2636
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(finalScore);
2637
2637
  }).generateReason({
2638
2638
  description: "Generate human-readable explanation of noise sensitivity evaluation",
2639
2639
  createPrompt: ({ run, results, score }) => {
2640
- const originalQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2640
+ const originalQuery = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
2641
2641
  const analysisResult = results.analyzeStepResult;
2642
2642
  if (!analysisResult) {
2643
2643
  throw new Error("Analysis step failed to produce results for reason generation");
@@ -3049,9 +3049,9 @@ function createPromptAlignmentScorerLLM({
3049
3049
  description: "Analyze prompt-response alignment across multiple dimensions",
3050
3050
  outputSchema: analyzeOutputSchema5,
3051
3051
  createPrompt: ({ run }) => {
3052
- const userPrompt = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3053
- const systemPrompt = chunkBULMCHKJ_cjs.getCombinedSystemPrompt(run.input) ?? "";
3054
- const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
3052
+ const userPrompt = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
3053
+ const systemPrompt = chunkUNQXHPOD_cjs.getCombinedSystemPrompt(run.input) ?? "";
3054
+ const agentResponse = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
3055
3055
  if (evaluationMode === "user" && !userPrompt) {
3056
3056
  throw new Error("User prompt is required for user prompt alignment scoring");
3057
3057
  }
@@ -3087,12 +3087,12 @@ function createPromptAlignmentScorerLLM({
3087
3087
  weightedScore = userScore * SCORING_WEIGHTS.BOTH.USER_WEIGHT + systemScore * SCORING_WEIGHTS.BOTH.SYSTEM_WEIGHT;
3088
3088
  }
3089
3089
  const finalScore = weightedScore * scale;
3090
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(finalScore);
3090
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(finalScore);
3091
3091
  }).generateReason({
3092
3092
  description: "Generate human-readable explanation of prompt alignment evaluation",
3093
3093
  createPrompt: ({ run, results, score }) => {
3094
- const userPrompt = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3095
- const systemPrompt = chunkBULMCHKJ_cjs.getCombinedSystemPrompt(run.input) ?? "";
3094
+ const userPrompt = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
3095
+ const systemPrompt = chunkUNQXHPOD_cjs.getCombinedSystemPrompt(run.input) ?? "";
3096
3096
  const analysis = results.analyzeStepResult;
3097
3097
  if (!analysis) {
3098
3098
  return `Unable to analyze prompt alignment. Score: ${score}`;
@@ -3109,6 +3109,203 @@ function createPromptAlignmentScorerLLM({
3109
3109
  });
3110
3110
  }
3111
3111
 
3112
+ // src/scorers/llm/rubric/prompts.ts
3113
+ var RUBRIC_INSTRUCTIONS = `You are an exacting grader. Your job is to judge whether an agent's output satisfies each criterion in a rubric.
3114
+
3115
+ A rubric is a checklist of criteria. For each criterion you must decide, strictly and independently, whether the output satisfies it.
3116
+
3117
+ Grading guidelines:
3118
+ - Judge each criterion on its own merits. Do not let one criterion's verdict influence another.
3119
+ - A criterion is "satisfied" only when the output clearly and fully meets it. When in doubt, mark it as NOT satisfied.
3120
+ - Base your judgement on evidence in the output (and the original task for context). Do not assume facts that are not present.
3121
+ - Be concise but specific in your reasoning: say what is present or missing.
3122
+ - Do not reward effort, intent, or partial progress. Only the actual output counts.`;
3123
+ function createAnalyzePrompt6({
3124
+ originalTask,
3125
+ output,
3126
+ criteria
3127
+ }) {
3128
+ const renderedCriteria = criteria.map((c, i) => `${i + 1}. [${c.required ? "required" : "optional"}] ${c.criterion}`).join("\n");
3129
+ return `Grade the agent's output against the rubric below.
3130
+
3131
+ Original task:
3132
+ ${originalTask || "(no task provided)"}
3133
+
3134
+ Rubric criteria:
3135
+ ${renderedCriteria}
3136
+
3137
+ Agent output to grade:
3138
+ ${output || "(empty output)"}
3139
+
3140
+ For every criterion, decide whether the output satisfies it. Preserve the exact criterion text and its required/optional designation in your answer.
3141
+
3142
+ Return your judgement as JSON in this shape:
3143
+ {
3144
+ "criteria": [
3145
+ {
3146
+ "criterion": "exact criterion text",
3147
+ "satisfied": true,
3148
+ "required": true,
3149
+ "reasoning": "why it is or is not satisfied"
3150
+ }
3151
+ ],
3152
+ "overallAssessment": "one or two sentence summary of what passed and what is missing"
3153
+ }`;
3154
+ }
3155
+ function formatRubricReason({ score, analysis }) {
3156
+ const complete = score >= 1;
3157
+ const header = complete ? "\u2705 Rubric satisfied: every required criterion is met." : "\u274C Rubric not yet satisfied.";
3158
+ const lines = analysis.criteria.map((c) => {
3159
+ const mark = c.satisfied ? "\u2705" : "\u274C";
3160
+ const tag = c.required ? "required" : "optional";
3161
+ return `${mark} [${tag}] ${c.criterion}
3162
+ \u2192 ${c.reasoning}`;
3163
+ });
3164
+ const unmetRequired = analysis.criteria.filter((c) => c.required && !c.satisfied);
3165
+ const footer = complete ? "" : `
3166
+
3167
+ To finish, address the ${unmetRequired.length} unmet required ${unmetRequired.length === 1 ? "criterion" : "criteria"} above.`;
3168
+ const assessment = analysis.overallAssessment ? `
3169
+
3170
+ ${analysis.overallAssessment}` : "";
3171
+ return `${header}
3172
+
3173
+ ${lines.join("\n")}${assessment}${footer}`;
3174
+ }
3175
+
3176
+ // src/scorers/llm/rubric/index.ts
3177
+ var analyzeOutputSchema6 = {
3178
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3179
+ "type": "object",
3180
+ "properties": {
3181
+ "criteria": {
3182
+ "type": "array",
3183
+ "items": {
3184
+ "type": "object",
3185
+ "properties": {
3186
+ "criterion": {
3187
+ "type": "string"
3188
+ },
3189
+ "satisfied": {
3190
+ "type": "boolean"
3191
+ },
3192
+ "required": {
3193
+ "type": "boolean"
3194
+ },
3195
+ "reasoning": {
3196
+ "type": "string"
3197
+ }
3198
+ },
3199
+ "required": [
3200
+ "criterion",
3201
+ "satisfied",
3202
+ "required",
3203
+ "reasoning"
3204
+ ]
3205
+ }
3206
+ },
3207
+ "overallAssessment": {
3208
+ "type": "string"
3209
+ }
3210
+ },
3211
+ "required": [
3212
+ "criteria",
3213
+ "overallAssessment"
3214
+ ]
3215
+ };
3216
+ function parseRubricString(rubric) {
3217
+ return rubric.split("\n").map((line) => line.replace(/^\s*(?:[-*•]|\d+[.)])\s*/, "").trim()).filter((line) => line.length > 0).map((description) => ({ description, required: true }));
3218
+ }
3219
+ function normalizeRubric(rubric) {
3220
+ if (!rubric) return [];
3221
+ if (typeof rubric === "string") return parseRubricString(rubric);
3222
+ return rubric;
3223
+ }
3224
+ function resolveRubric({
3225
+ staticRubric,
3226
+ run
3227
+ }) {
3228
+ if (staticRubric.length > 0) return staticRubric;
3229
+ const dynamic = pickRubric(run.requestContext) ?? pickRubric(run.additionalContext) ?? pickRubric(run.input);
3230
+ return normalizeRubric(dynamic);
3231
+ }
3232
+ function pickRubric(source) {
3233
+ if (!source || typeof source !== "object") return void 0;
3234
+ let value;
3235
+ const getter = source.get;
3236
+ if (typeof getter === "function") {
3237
+ value = getter.call(source, "rubric");
3238
+ } else {
3239
+ value = source.rubric;
3240
+ }
3241
+ if (typeof value === "string") return value;
3242
+ if (Array.isArray(value)) return value;
3243
+ return void 0;
3244
+ }
3245
+ function toCriterionInputs(criteria) {
3246
+ return criteria.map((c) => ({ criterion: c.description, required: c.required !== false }));
3247
+ }
3248
+ function getOutputText(run) {
3249
+ const fromOutput = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output);
3250
+ if (fromOutput) return fromOutput;
3251
+ if (run.input && typeof run.input === "object" && typeof run.input.currentText === "string") {
3252
+ return run.input.currentText;
3253
+ }
3254
+ return typeof run.output === "string" ? run.output : "";
3255
+ }
3256
+ function getTaskText(run) {
3257
+ if (run.input && typeof run.input === "object" && typeof run.input.originalTask === "string") {
3258
+ return run.input.originalTask;
3259
+ }
3260
+ return chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
3261
+ }
3262
+ function createRubricScorer({
3263
+ model,
3264
+ criteria,
3265
+ options
3266
+ }) {
3267
+ const scale = options?.scale ?? 1;
3268
+ const staticRubric = normalizeRubric(criteria);
3269
+ return evals.createScorer({
3270
+ id: "rubric-scorer",
3271
+ name: "Rubric (LLM)",
3272
+ description: "Grades an agent output against a rubric of criteria, returning 1 only when every required criterion is satisfied",
3273
+ judge: {
3274
+ model,
3275
+ instructions: RUBRIC_INSTRUCTIONS
3276
+ }
3277
+ }).analyze({
3278
+ description: "Judge the output against each rubric criterion",
3279
+ outputSchema: analyzeOutputSchema6,
3280
+ createPrompt: ({ run }) => {
3281
+ const rubric = resolveRubric({ staticRubric, run });
3282
+ if (rubric.length === 0) {
3283
+ return `No rubric was provided. Return exactly: {"criteria": [], "overallAssessment": "No rubric provided; nothing to grade."}`;
3284
+ }
3285
+ return createAnalyzePrompt6({
3286
+ originalTask: getTaskText(run),
3287
+ output: getOutputText(run),
3288
+ criteria: toCriterionInputs(rubric)
3289
+ });
3290
+ }
3291
+ }).generateScore(({ results }) => {
3292
+ const analysis = results.analyzeStepResult;
3293
+ if (!analysis || analysis.criteria.length === 0) {
3294
+ return 1;
3295
+ }
3296
+ const requiredCriteria = analysis.criteria.filter((c) => c.required);
3297
+ const gating = requiredCriteria.length > 0 ? requiredCriteria : analysis.criteria;
3298
+ const allSatisfied = gating.every((c) => c.satisfied);
3299
+ return (allSatisfied ? 1 : 0) * scale;
3300
+ }).generateReason(({ results, score }) => {
3301
+ const analysis = results.analyzeStepResult;
3302
+ if (!analysis || analysis.criteria.length === 0) {
3303
+ return "No rubric was provided, so the rubric check passed by default.";
3304
+ }
3305
+ return formatRubricReason({ score, analysis });
3306
+ });
3307
+ }
3308
+
3112
3309
  // src/scorers/llm/trajectory/prompts.ts
3113
3310
  var TRAJECTORY_EVALUATION_INSTRUCTIONS = `
3114
3311
  You are an expert evaluator specializing in AI agent trajectory analysis. Your role is to assess whether an agent took an appropriate sequence of actions (tool calls, reasoning steps) to accomplish a user's request.
@@ -3131,7 +3328,7 @@ OUTPUT REQUIREMENTS:
3131
3328
  - Use provided JSON schema exactly as specified
3132
3329
  - Be consistent in your evaluation standards
3133
3330
  `;
3134
- var createAnalyzePrompt6 = ({
3331
+ var createAnalyzePrompt7 = ({
3135
3332
  userInput,
3136
3333
  agentResponse,
3137
3334
  actualTrajectory,
@@ -3198,7 +3395,7 @@ Provide a single, concise sentence explaining why this score was given.
3198
3395
  };
3199
3396
 
3200
3397
  // src/scorers/llm/trajectory/index.ts
3201
- var analyzeOutputSchema6 = {
3398
+ var analyzeOutputSchema7 = {
3202
3399
  "$schema": "https://json-schema.org/draft/2020-12/schema",
3203
3400
  "type": "object",
3204
3401
  "properties": {
@@ -3340,11 +3537,11 @@ function createTrajectoryAccuracyScorerLLM({
3340
3537
  };
3341
3538
  }).analyze({
3342
3539
  description: "Analyze the quality and appropriateness of the agent trajectory",
3343
- outputSchema: analyzeOutputSchema6,
3540
+ outputSchema: analyzeOutputSchema7,
3344
3541
  createPrompt: ({ run, results }) => {
3345
- const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3346
- const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
3347
- return createAnalyzePrompt6({
3542
+ const userInput = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
3543
+ const agentResponse = chunkUNQXHPOD_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
3544
+ return createAnalyzePrompt7({
3348
3545
  userInput,
3349
3546
  agentResponse,
3350
3547
  actualTrajectory: results.preprocessStepResult?.actualTrajectoryFormatted ?? "No steps taken",
@@ -3368,11 +3565,11 @@ function createTrajectoryAccuracyScorerLLM({
3368
3565
  const necessityScore = necessarySteps / totalSteps;
3369
3566
  const orderScore = orderedSteps / totalSteps;
3370
3567
  const score = necessityScore * 0.6 + orderScore * 0.3 - missingPenalty * 0.1;
3371
- return chunkBULMCHKJ_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
3568
+ return chunkUNQXHPOD_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
3372
3569
  }).generateReason({
3373
3570
  description: "Generate human-readable explanation of trajectory evaluation",
3374
3571
  createPrompt: ({ run, results, score }) => {
3375
- const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3572
+ const userInput = chunkUNQXHPOD_cjs.getUserMessageFromRunInput(run.input) ?? "";
3376
3573
  const stepEvaluations = results.analyzeStepResult?.stepEvaluations || [];
3377
3574
  const missingSteps = results.analyzeStepResult?.missingSteps || [];
3378
3575
  const extraSteps = results.analyzeStepResult?.extraSteps || [];
@@ -3435,18 +3632,18 @@ function createCompletenessScorer() {
3435
3632
  type: "agent"
3436
3633
  }).preprocess(async ({ run }) => {
3437
3634
  const isInputInvalid = !run.input || run.input.inputMessages.some((i) => {
3438
- const content = chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i);
3635
+ const content = chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i);
3439
3636
  return content === null || content === void 0;
3440
3637
  });
3441
3638
  const isOutputInvalid = !run.output || run.output.some((i) => {
3442
- const content = chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i);
3639
+ const content = chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i);
3443
3640
  return content === null || content === void 0;
3444
3641
  });
3445
3642
  if (isInputInvalid || isOutputInvalid) {
3446
3643
  throw new Error("Inputs cannot be null or undefined");
3447
3644
  }
3448
- const input = run.input?.inputMessages.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3449
- const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3645
+ const input = run.input?.inputMessages.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3646
+ const output = run.output?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3450
3647
  const inputToProcess = input;
3451
3648
  const outputToProcess = output;
3452
3649
  const inputDoc = nlp__default.default(inputToProcess.trim());
@@ -3551,8 +3748,8 @@ function createTextualDifferenceScorer() {
3551
3748
  description: "Calculate textual difference between input and output using sequence matching algorithms.",
3552
3749
  type: "agent"
3553
3750
  }).preprocess(async ({ run }) => {
3554
- const input = run.input?.inputMessages?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3555
- const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3751
+ const input = run.input?.inputMessages?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3752
+ const output = run.output?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3556
3753
  const ratio = calculateRatio(input, output);
3557
3754
  const changes = countChanges(input, output);
3558
3755
  const maxLength = Math.max(input.length, output.length);
@@ -3575,8 +3772,8 @@ function createKeywordCoverageScorer() {
3575
3772
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
3576
3773
  type: "agent"
3577
3774
  }).preprocess(async ({ run }) => {
3578
- const input = run.input?.inputMessages?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3579
- const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3775
+ const input = run.input?.inputMessages?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3776
+ const output = run.output?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3580
3777
  if (!input && !output) {
3581
3778
  return {
3582
3779
  result: {
@@ -3629,8 +3826,8 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
3629
3826
  description: "Calculates content similarity between input and output messages using string comparison algorithms.",
3630
3827
  type: "agent"
3631
3828
  }).preprocess(async ({ run }) => {
3632
- let processedInput = run.input?.inputMessages.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3633
- let processedOutput = run.output.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3829
+ let processedInput = run.input?.inputMessages.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3830
+ let processedOutput = run.output.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3634
3831
  if (ignoreCase) {
3635
3832
  processedInput = processedInput.toLowerCase();
3636
3833
  processedOutput = processedOutput.toLowerCase();
@@ -3660,7 +3857,7 @@ function createToneScorer(config = {}) {
3660
3857
  type: "agent"
3661
3858
  }).preprocess(async ({ run }) => {
3662
3859
  const sentiment = new Sentiment__default.default();
3663
- const agentMessage = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3860
+ const agentMessage = run.output?.map((i) => chunkUNQXHPOD_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3664
3861
  const responseSentiment = sentiment.analyze(agentMessage);
3665
3862
  if (referenceTone) {
3666
3863
  const referenceSentiment = sentiment.analyze(referenceTone);
@@ -3747,7 +3944,7 @@ function createToolCallAccuracyScorerCode(options) {
3747
3944
  if (isInputInvalid || isOutputInvalid) {
3748
3945
  throw new Error("Input and output messages cannot be null or empty");
3749
3946
  }
3750
- const { tools: actualTools, toolCallInfos } = chunkBULMCHKJ_cjs.extractToolCalls(run.output);
3947
+ const { tools: actualTools, toolCallInfos } = chunkUNQXHPOD_cjs.extractToolCalls(run.output);
3751
3948
  const correctToolCalled = expectedTool ? strictMode ? actualTools.length === 1 && actualTools[0] === expectedTool : actualTools.includes(expectedTool) : false;
3752
3949
  return {
3753
3950
  expectedTool,
@@ -3822,7 +4019,7 @@ function createTrajectoryAccuracyScorerCode(options = {}) {
3822
4019
  const itemExpectation = run.expectedTrajectory;
3823
4020
  const effectiveOrdering = itemExpectation?.ordering ?? ordering;
3824
4021
  const effectiveAllowRepeated = itemExpectation?.allowRepeatedSteps ?? allowRepeatedSteps;
3825
- const comparison = chunkBULMCHKJ_cjs.compareTrajectories(
4022
+ const comparison = chunkUNQXHPOD_cjs.compareTrajectories(
3826
4023
  actualTrajectory,
3827
4024
  { steps: resolvedExpectedSteps },
3828
4025
  {
@@ -3880,7 +4077,7 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
3880
4077
  const childConfig = expectedStep.children;
3881
4078
  let accuracy;
3882
4079
  if (childConfig.steps && childConfig.steps.length > 0) {
3883
- accuracy = chunkBULMCHKJ_cjs.compareTrajectories(
4080
+ accuracy = chunkUNQXHPOD_cjs.compareTrajectories(
3884
4081
  childTrajectory,
3885
4082
  { steps: childConfig.steps },
3886
4083
  {
@@ -3890,18 +4087,18 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
3890
4087
  );
3891
4088
  }
3892
4089
  const hasEfficiencyConfig = childConfig.maxSteps !== void 0 || childConfig.maxTotalTokens !== void 0 || childConfig.maxTotalDurationMs !== void 0 || childConfig.noRedundantCalls !== void 0;
3893
- const efficiency = hasEfficiencyConfig ? chunkBULMCHKJ_cjs.checkTrajectoryEfficiency(childTrajectory, {
4090
+ const efficiency = hasEfficiencyConfig ? chunkUNQXHPOD_cjs.checkTrajectoryEfficiency(childTrajectory, {
3894
4091
  maxSteps: childConfig.maxSteps,
3895
4092
  maxTotalTokens: childConfig.maxTotalTokens,
3896
4093
  maxTotalDurationMs: childConfig.maxTotalDurationMs,
3897
4094
  noRedundantCalls: childConfig.noRedundantCalls ?? true
3898
4095
  }) : void 0;
3899
4096
  const hasBlacklistConfig = childConfig.blacklistedTools && childConfig.blacklistedTools.length > 0 || childConfig.blacklistedSequences && childConfig.blacklistedSequences.length > 0;
3900
- const blacklist = hasBlacklistConfig ? chunkBULMCHKJ_cjs.checkTrajectoryBlacklist(childTrajectory, {
4097
+ const blacklist = hasBlacklistConfig ? chunkUNQXHPOD_cjs.checkTrajectoryBlacklist(childTrajectory, {
3901
4098
  blacklistedTools: childConfig.blacklistedTools,
3902
4099
  blacklistedSequences: childConfig.blacklistedSequences
3903
4100
  }) : void 0;
3904
- const toolFailures = chunkBULMCHKJ_cjs.analyzeToolFailures(childTrajectory, {
4101
+ const toolFailures = chunkUNQXHPOD_cjs.analyzeToolFailures(childTrajectory, {
3905
4102
  maxRetriesPerTool: childConfig.maxRetriesPerTool ?? 2
3906
4103
  });
3907
4104
  const nested = childConfig.steps ? evaluateNestedExpectations(childConfig.steps, actualStep.children, weights) : [];
@@ -3966,7 +4163,7 @@ function createTrajectoryScorerCode(options = {}) {
3966
4163
  }
3967
4164
  let accuracy;
3968
4165
  if (config.steps && config.steps.length > 0) {
3969
- accuracy = chunkBULMCHKJ_cjs.compareTrajectories(
4166
+ accuracy = chunkUNQXHPOD_cjs.compareTrajectories(
3970
4167
  actualTrajectory,
3971
4168
  { steps: config.steps },
3972
4169
  {
@@ -3976,18 +4173,18 @@ function createTrajectoryScorerCode(options = {}) {
3976
4173
  );
3977
4174
  }
3978
4175
  const hasEfficiencyConfig = config.maxSteps !== void 0 || config.maxTotalTokens !== void 0 || config.maxTotalDurationMs !== void 0 || config.noRedundantCalls !== void 0;
3979
- const efficiency = hasEfficiencyConfig ? chunkBULMCHKJ_cjs.checkTrajectoryEfficiency(actualTrajectory, {
4176
+ const efficiency = hasEfficiencyConfig ? chunkUNQXHPOD_cjs.checkTrajectoryEfficiency(actualTrajectory, {
3980
4177
  maxSteps: config.maxSteps,
3981
4178
  maxTotalTokens: config.maxTotalTokens,
3982
4179
  maxTotalDurationMs: config.maxTotalDurationMs,
3983
4180
  noRedundantCalls: config.noRedundantCalls ?? true
3984
4181
  }) : void 0;
3985
4182
  const hasBlacklistConfig = config.blacklistedTools && config.blacklistedTools.length > 0 || config.blacklistedSequences && config.blacklistedSequences.length > 0;
3986
- const blacklist = hasBlacklistConfig ? chunkBULMCHKJ_cjs.checkTrajectoryBlacklist(actualTrajectory, {
4183
+ const blacklist = hasBlacklistConfig ? chunkUNQXHPOD_cjs.checkTrajectoryBlacklist(actualTrajectory, {
3987
4184
  blacklistedTools: config.blacklistedTools,
3988
4185
  blacklistedSequences: config.blacklistedSequences
3989
4186
  }) : void 0;
3990
- const toolFailures = chunkBULMCHKJ_cjs.analyzeToolFailures(actualTrajectory, {
4187
+ const toolFailures = chunkUNQXHPOD_cjs.analyzeToolFailures(actualTrajectory, {
3991
4188
  maxRetriesPerTool: config.maxRetriesPerTool ?? 2
3992
4189
  });
3993
4190
  const nested = config.steps && config.steps.length > 0 ? evaluateNestedExpectations(config.steps, actualTrajectory.steps, w) : void 0;
@@ -4121,6 +4318,7 @@ exports.createHallucinationScorer = createHallucinationScorer;
4121
4318
  exports.createKeywordCoverageScorer = createKeywordCoverageScorer;
4122
4319
  exports.createNoiseSensitivityScorerLLM = createNoiseSensitivityScorerLLM;
4123
4320
  exports.createPromptAlignmentScorerLLM = createPromptAlignmentScorerLLM;
4321
+ exports.createRubricScorer = createRubricScorer;
4124
4322
  exports.createTextualDifferenceScorer = createTextualDifferenceScorer;
4125
4323
  exports.createToneScorer = createToneScorer;
4126
4324
  exports.createToolCallAccuracyScorerCode = createToolCallAccuracyScorerCode;