@mastra/evals 1.2.3 → 1.2.4-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var chunk33T2SZZ2_cjs = require('../../chunk-33T2SZZ2.cjs');
3
+ var chunkBULMCHKJ_cjs = require('../../chunk-BULMCHKJ.cjs');
4
4
  var evals = require('@mastra/core/evals');
5
5
  var nlp = require('compromise');
6
6
  var keyword_extractor = require('keyword-extractor');
@@ -250,7 +250,7 @@ function createAnswerRelevancyScorer({
250
250
  description: "Extract relevant statements from the LLM output",
251
251
  outputSchema: extractOutputSchema,
252
252
  createPrompt: ({ run }) => {
253
- const assistantMessage = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
253
+ const assistantMessage = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
254
254
  return createExtractPrompt(assistantMessage);
255
255
  }
256
256
  }).analyze({
@@ -283,7 +283,7 @@ function createAnswerRelevancyScorer({
283
283
  ]
284
284
  },
285
285
  createPrompt: ({ run, results }) => {
286
- const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
286
+ const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
287
287
  return createScorePrompt(JSON.stringify(input), results.preprocessStepResult?.statements || []);
288
288
  }
289
289
  }).generateScore(({ results }) => {
@@ -300,13 +300,13 @@ function createAnswerRelevancyScorer({
300
300
  }
301
301
  }
302
302
  const score = relevancyCount / numberOfResults;
303
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * options.scale);
303
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * options.scale);
304
304
  }).generateReason({
305
305
  description: "Reason about the results",
306
306
  createPrompt: ({ run, results, score }) => {
307
307
  return createReasonPrompt({
308
- input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
309
- output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
308
+ input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
309
+ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
310
310
  score,
311
311
  results: results.analyzeStepResult.results,
312
312
  scale: options.scale
@@ -581,7 +581,7 @@ function createAnswerSimilarityScorer({
581
581
  groundTruth: ""
582
582
  });
583
583
  }
584
- const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
584
+ const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
585
585
  const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
586
586
  return createExtractPrompt2({
587
587
  output,
@@ -639,14 +639,14 @@ function createAnswerSimilarityScorer({
639
639
  );
640
640
  score -= extraInfoPenalty;
641
641
  score = Math.max(0, Math.min(1, score));
642
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * mergedOptions.scale);
642
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * mergedOptions.scale);
643
643
  }).generateReason({
644
644
  description: "Generate explanation of similarity score",
645
645
  createPrompt: ({ run, results, score }) => {
646
646
  if (!run.groundTruth) {
647
647
  return "No ground truth was provided for comparison. Score is 0 by default.";
648
648
  }
649
- const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
649
+ const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
650
650
  const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
651
651
  return createReasonPrompt2({
652
652
  output,
@@ -848,7 +848,7 @@ function createFaithfulnessScorer({
848
848
  ]
849
849
  },
850
850
  createPrompt: ({ run }) => {
851
- const prompt = createFaithfulnessExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
851
+ const prompt = createFaithfulnessExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
852
852
  return prompt;
853
853
  }
854
854
  }).analyze({
@@ -895,13 +895,13 @@ function createFaithfulnessScorer({
895
895
  return 0;
896
896
  }
897
897
  const score = supportedClaims / totalClaims * (options?.scale || 1);
898
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
898
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
899
899
  }).generateReason({
900
900
  description: "Reason about the results",
901
901
  createPrompt: ({ run, results, score }) => {
902
902
  const prompt = createFaithfulnessReasonPrompt({
903
- input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
904
- output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
903
+ input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
904
+ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
905
905
  context: options?.context ?? getToolInvocationContext(run.output),
906
906
  score,
907
907
  scale: options?.scale || 1,
@@ -1046,7 +1046,7 @@ function createBiasScorer({ model, options }) {
1046
1046
  "opinions"
1047
1047
  ]
1048
1048
  },
1049
- createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
1049
+ createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
1050
1050
  }).analyze({
1051
1051
  description: "Score the relevance of the statements to the input",
1052
1052
  outputSchema: {
@@ -1078,7 +1078,7 @@ function createBiasScorer({ model, options }) {
1078
1078
  },
1079
1079
  createPrompt: ({ run, results }) => {
1080
1080
  const prompt = createBiasAnalyzePrompt({
1081
- output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1081
+ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1082
1082
  opinions: results.preprocessStepResult?.opinions || []
1083
1083
  });
1084
1084
  return prompt;
@@ -1089,7 +1089,7 @@ function createBiasScorer({ model, options }) {
1089
1089
  }
1090
1090
  const biasedVerdicts = results.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
1091
1091
  const score = biasedVerdicts.length / results.analyzeStepResult.results.length;
1092
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1092
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1093
1093
  }).generateReason({
1094
1094
  description: "Reason about the results",
1095
1095
  createPrompt: ({ score, results }) => {
@@ -1320,7 +1320,7 @@ function createHallucinationScorer({
1320
1320
  ]
1321
1321
  },
1322
1322
  createPrompt: ({ run }) => {
1323
- const prompt = createHallucinationExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
1323
+ const prompt = createHallucinationExtractPrompt({ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
1324
1324
  return prompt;
1325
1325
  }
1326
1326
  }).analyze({
@@ -1376,7 +1376,7 @@ function createHallucinationScorer({
1376
1376
  return 0;
1377
1377
  }
1378
1378
  const score = contradictedStatements / totalStatements * (options?.scale || 1);
1379
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
1379
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
1380
1380
  }).generateReason({
1381
1381
  description: "Reason about the results",
1382
1382
  createPrompt: async ({ run, results, score }) => {
@@ -1387,8 +1387,8 @@ function createHallucinationScorer({
1387
1387
  context = options?.context ?? [];
1388
1388
  }
1389
1389
  const prompt = createHallucinationReasonPrompt({
1390
- input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
1391
- output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1390
+ input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
1391
+ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
1392
1392
  context,
1393
1393
  score,
1394
1394
  scale: options?.scale || 1,
@@ -1528,8 +1528,8 @@ function createToxicityScorer({
1528
1528
  },
1529
1529
  createPrompt: ({ run }) => {
1530
1530
  const prompt = createToxicityAnalyzePrompt({
1531
- input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
1532
- output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
1531
+ input: chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "",
1532
+ output: chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
1533
1533
  });
1534
1534
  return prompt;
1535
1535
  }
@@ -1545,7 +1545,7 @@ function createToxicityScorer({
1545
1545
  }
1546
1546
  }
1547
1547
  const score = toxicityCount / numberOfVerdicts;
1548
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1548
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score * (options?.scale || 1));
1549
1549
  }).generateReason({
1550
1550
  description: "Reason about the results",
1551
1551
  createPrompt: ({ results, score }) => {
@@ -1706,7 +1706,7 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1706
1706
  if (isInputInvalid || isOutputInvalid) {
1707
1707
  throw new Error("Input and output messages cannot be null or empty");
1708
1708
  }
1709
- const { tools: actualTools, toolCallInfos } = chunk33T2SZZ2_cjs.extractToolCalls(run.output);
1709
+ const { tools: actualTools, toolCallInfos } = chunkBULMCHKJ_cjs.extractToolCalls(run.output);
1710
1710
  return {
1711
1711
  actualTools,
1712
1712
  hasToolCalls: actualTools.length > 0,
@@ -1716,8 +1716,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1716
1716
  description: "Analyze the appropriateness of tool selections",
1717
1717
  outputSchema: analyzeOutputSchema2,
1718
1718
  createPrompt: ({ run, results }) => {
1719
- const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
1720
- const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1719
+ const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
1720
+ const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1721
1721
  const toolsCalled = results.preprocessStepResult?.actualTools || [];
1722
1722
  return createAnalyzePrompt2({
1723
1723
  userInput,
@@ -1734,11 +1734,11 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1734
1734
  }
1735
1735
  const appropriateToolCalls = evaluations.filter((e) => e.wasAppropriate).length;
1736
1736
  const totalToolCalls = evaluations.length;
1737
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
1737
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
1738
1738
  }).generateReason({
1739
1739
  description: "Generate human-readable explanation of tool selection evaluation",
1740
1740
  createPrompt: ({ run, results, score }) => {
1741
- const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
1741
+ const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
1742
1742
  const evaluations = results.analyzeStepResult?.evaluations || [];
1743
1743
  const missingTools = results.analyzeStepResult?.missingTools || [];
1744
1744
  return createReasonPrompt3({
@@ -1968,7 +1968,7 @@ var getContext = ({
1968
1968
  output,
1969
1969
  options
1970
1970
  }) => {
1971
- if (options.contextExtractor && chunk33T2SZZ2_cjs.isScorerRunInputForAgent(input) && chunk33T2SZZ2_cjs.isScorerRunOutputForAgent(output)) {
1971
+ if (options.contextExtractor && chunkBULMCHKJ_cjs.isScorerRunInputForAgent(input) && chunkBULMCHKJ_cjs.isScorerRunOutputForAgent(output)) {
1972
1972
  return options.contextExtractor(input, output);
1973
1973
  }
1974
1974
  return options.context ?? [];
@@ -1996,8 +1996,8 @@ function createContextRelevanceScorerLLM({
1996
1996
  description: "Analyze the relevance and utility of provided context",
1997
1997
  outputSchema: analyzeOutputSchema3,
1998
1998
  createPrompt: ({ run }) => {
1999
- const userQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2000
- const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
1999
+ const userQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2000
+ const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2001
2001
  const context = getContext({ input: run.input, output: run.output, options });
2002
2002
  if (context.length === 0) {
2003
2003
  return createAnalyzePrompt3({
@@ -2045,11 +2045,11 @@ function createContextRelevanceScorerLLM({
2045
2045
  const missingContextPenalty = Math.min(missingContext.length * missingPenaltyRate, maxMissingPenalty);
2046
2046
  const finalScore = Math.max(0, relevanceScore - usagePenalty - missingContextPenalty);
2047
2047
  const scaledScore = finalScore * (options.scale || 1);
2048
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(scaledScore);
2048
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(scaledScore);
2049
2049
  }).generateReason({
2050
2050
  description: "Generate human-readable explanation of context relevance evaluation",
2051
2051
  createPrompt: ({ run, results, score }) => {
2052
- const userQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2052
+ const userQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2053
2053
  const context = getContext({ input: run.input, output: run.output, options });
2054
2054
  if (context.length === 0) {
2055
2055
  return `No context was available for evaluation. The agent response was generated without any supporting context. Score: ${score}`;
@@ -2224,7 +2224,7 @@ var getContext2 = ({
2224
2224
  output,
2225
2225
  options
2226
2226
  }) => {
2227
- if (options.contextExtractor && chunk33T2SZZ2_cjs.isScorerRunInputForAgent(input) && chunk33T2SZZ2_cjs.isScorerRunOutputForAgent(output)) {
2227
+ if (options.contextExtractor && chunkBULMCHKJ_cjs.isScorerRunInputForAgent(input) && chunkBULMCHKJ_cjs.isScorerRunOutputForAgent(output)) {
2228
2228
  return options.contextExtractor(input, output);
2229
2229
  }
2230
2230
  return options.context ?? [];
@@ -2252,8 +2252,8 @@ function createContextPrecisionScorer({
2252
2252
  description: "Evaluate the relevance of each context piece for generating the expected output",
2253
2253
  outputSchema: contextRelevanceOutputSchema,
2254
2254
  createPrompt: ({ run }) => {
2255
- const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2256
- const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2255
+ const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2256
+ const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2257
2257
  const context = getContext2({ input: run.input, output: run.output, options });
2258
2258
  if (context.length === 0) {
2259
2259
  throw new Error("No context available for evaluation");
@@ -2286,12 +2286,12 @@ function createContextPrecisionScorer({
2286
2286
  }
2287
2287
  const map = sumPrecision / relevantCount;
2288
2288
  const score = map * (options.scale || 1);
2289
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
2289
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(score);
2290
2290
  }).generateReason({
2291
2291
  description: "Reason about the context precision results",
2292
2292
  createPrompt: ({ run, results, score }) => {
2293
- const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2294
- const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2293
+ const input = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2294
+ const output = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2295
2295
  const context = getContext2({ input: run.input, output: run.output, options });
2296
2296
  return createContextPrecisionReasonPrompt({
2297
2297
  input,
@@ -2589,8 +2589,8 @@ function createNoiseSensitivityScorerLLM({
2589
2589
  description: "Analyze the impact of noise on agent response quality",
2590
2590
  outputSchema: analyzeOutputSchema4,
2591
2591
  createPrompt: ({ run }) => {
2592
- const originalQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2593
- const noisyResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2592
+ const originalQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2593
+ const noisyResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
2594
2594
  if (!originalQuery || !noisyResponse) {
2595
2595
  throw new Error("Both original query and noisy response are required for evaluation");
2596
2596
  }
@@ -2633,11 +2633,11 @@ function createNoiseSensitivityScorerLLM({
2633
2633
  const majorIssues = analysisResult.majorIssues || [];
2634
2634
  const issuesPenalty = Math.min(majorIssues.length * majorIssuePenaltyRate, maxMajorIssuePenalty);
2635
2635
  finalScore = Math.max(0, finalScore - issuesPenalty);
2636
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(finalScore);
2636
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(finalScore);
2637
2637
  }).generateReason({
2638
2638
  description: "Generate human-readable explanation of noise sensitivity evaluation",
2639
2639
  createPrompt: ({ run, results, score }) => {
2640
- const originalQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
2640
+ const originalQuery = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
2641
2641
  const analysisResult = results.analyzeStepResult;
2642
2642
  if (!analysisResult) {
2643
2643
  throw new Error("Analysis step failed to produce results for reason generation");
@@ -3049,9 +3049,9 @@ function createPromptAlignmentScorerLLM({
3049
3049
  description: "Analyze prompt-response alignment across multiple dimensions",
3050
3050
  outputSchema: analyzeOutputSchema5,
3051
3051
  createPrompt: ({ run }) => {
3052
- const userPrompt = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
3053
- const systemPrompt = chunk33T2SZZ2_cjs.getCombinedSystemPrompt(run.input) ?? "";
3054
- const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
3052
+ const userPrompt = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3053
+ const systemPrompt = chunkBULMCHKJ_cjs.getCombinedSystemPrompt(run.input) ?? "";
3054
+ const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
3055
3055
  if (evaluationMode === "user" && !userPrompt) {
3056
3056
  throw new Error("User prompt is required for user prompt alignment scoring");
3057
3057
  }
@@ -3087,12 +3087,12 @@ function createPromptAlignmentScorerLLM({
3087
3087
  weightedScore = userScore * SCORING_WEIGHTS.BOTH.USER_WEIGHT + systemScore * SCORING_WEIGHTS.BOTH.SYSTEM_WEIGHT;
3088
3088
  }
3089
3089
  const finalScore = weightedScore * scale;
3090
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(finalScore);
3090
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(finalScore);
3091
3091
  }).generateReason({
3092
3092
  description: "Generate human-readable explanation of prompt alignment evaluation",
3093
3093
  createPrompt: ({ run, results, score }) => {
3094
- const userPrompt = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
3095
- const systemPrompt = chunk33T2SZZ2_cjs.getCombinedSystemPrompt(run.input) ?? "";
3094
+ const userPrompt = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3095
+ const systemPrompt = chunkBULMCHKJ_cjs.getCombinedSystemPrompt(run.input) ?? "";
3096
3096
  const analysis = results.analyzeStepResult;
3097
3097
  if (!analysis) {
3098
3098
  return `Unable to analyze prompt alignment. Score: ${score}`;
@@ -3342,8 +3342,8 @@ function createTrajectoryAccuracyScorerLLM({
3342
3342
  description: "Analyze the quality and appropriateness of the agent trajectory",
3343
3343
  outputSchema: analyzeOutputSchema6,
3344
3344
  createPrompt: ({ run, results }) => {
3345
- const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
3346
- const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
3345
+ const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3346
+ const agentResponse = chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
3347
3347
  return createAnalyzePrompt6({
3348
3348
  userInput,
3349
3349
  agentResponse,
@@ -3368,11 +3368,11 @@ function createTrajectoryAccuracyScorerLLM({
3368
3368
  const necessityScore = necessarySteps / totalSteps;
3369
3369
  const orderScore = orderedSteps / totalSteps;
3370
3370
  const score = necessityScore * 0.6 + orderScore * 0.3 - missingPenalty * 0.1;
3371
- return chunk33T2SZZ2_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
3371
+ return chunkBULMCHKJ_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
3372
3372
  }).generateReason({
3373
3373
  description: "Generate human-readable explanation of trajectory evaluation",
3374
3374
  createPrompt: ({ run, results, score }) => {
3375
- const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
3375
+ const userInput = chunkBULMCHKJ_cjs.getUserMessageFromRunInput(run.input) ?? "";
3376
3376
  const stepEvaluations = results.analyzeStepResult?.stepEvaluations || [];
3377
3377
  const missingSteps = results.analyzeStepResult?.missingSteps || [];
3378
3378
  const extraSteps = results.analyzeStepResult?.extraSteps || [];
@@ -3435,18 +3435,18 @@ function createCompletenessScorer() {
3435
3435
  type: "agent"
3436
3436
  }).preprocess(async ({ run }) => {
3437
3437
  const isInputInvalid = !run.input || run.input.inputMessages.some((i) => {
3438
- const content = chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i);
3438
+ const content = chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i);
3439
3439
  return content === null || content === void 0;
3440
3440
  });
3441
3441
  const isOutputInvalid = !run.output || run.output.some((i) => {
3442
- const content = chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i);
3442
+ const content = chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i);
3443
3443
  return content === null || content === void 0;
3444
3444
  });
3445
3445
  if (isInputInvalid || isOutputInvalid) {
3446
3446
  throw new Error("Inputs cannot be null or undefined");
3447
3447
  }
3448
- const input = run.input?.inputMessages.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3449
- const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3448
+ const input = run.input?.inputMessages.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3449
+ const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3450
3450
  const inputToProcess = input;
3451
3451
  const outputToProcess = output;
3452
3452
  const inputDoc = nlp__default.default(inputToProcess.trim());
@@ -3551,8 +3551,8 @@ function createTextualDifferenceScorer() {
3551
3551
  description: "Calculate textual difference between input and output using sequence matching algorithms.",
3552
3552
  type: "agent"
3553
3553
  }).preprocess(async ({ run }) => {
3554
- const input = run.input?.inputMessages?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3555
- const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3554
+ const input = run.input?.inputMessages?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3555
+ const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3556
3556
  const ratio = calculateRatio(input, output);
3557
3557
  const changes = countChanges(input, output);
3558
3558
  const maxLength = Math.max(input.length, output.length);
@@ -3575,8 +3575,8 @@ function createKeywordCoverageScorer() {
3575
3575
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
3576
3576
  type: "agent"
3577
3577
  }).preprocess(async ({ run }) => {
3578
- const input = run.input?.inputMessages?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3579
- const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3578
+ const input = run.input?.inputMessages?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3579
+ const output = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3580
3580
  if (!input && !output) {
3581
3581
  return {
3582
3582
  result: {
@@ -3629,8 +3629,8 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
3629
3629
  description: "Calculates content similarity between input and output messages using string comparison algorithms.",
3630
3630
  type: "agent"
3631
3631
  }).preprocess(async ({ run }) => {
3632
- let processedInput = run.input?.inputMessages.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3633
- let processedOutput = run.output.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3632
+ let processedInput = run.input?.inputMessages.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3633
+ let processedOutput = run.output.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3634
3634
  if (ignoreCase) {
3635
3635
  processedInput = processedInput.toLowerCase();
3636
3636
  processedOutput = processedOutput.toLowerCase();
@@ -3660,7 +3660,7 @@ function createToneScorer(config = {}) {
3660
3660
  type: "agent"
3661
3661
  }).preprocess(async ({ run }) => {
3662
3662
  const sentiment = new Sentiment__default.default();
3663
- const agentMessage = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3663
+ const agentMessage = run.output?.map((i) => chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
3664
3664
  const responseSentiment = sentiment.analyze(agentMessage);
3665
3665
  if (referenceTone) {
3666
3666
  const referenceSentiment = sentiment.analyze(referenceTone);
@@ -3747,7 +3747,7 @@ function createToolCallAccuracyScorerCode(options) {
3747
3747
  if (isInputInvalid || isOutputInvalid) {
3748
3748
  throw new Error("Input and output messages cannot be null or empty");
3749
3749
  }
3750
- const { tools: actualTools, toolCallInfos } = chunk33T2SZZ2_cjs.extractToolCalls(run.output);
3750
+ const { tools: actualTools, toolCallInfos } = chunkBULMCHKJ_cjs.extractToolCalls(run.output);
3751
3751
  const correctToolCalled = expectedTool ? strictMode ? actualTools.length === 1 && actualTools[0] === expectedTool : actualTools.includes(expectedTool) : false;
3752
3752
  return {
3753
3753
  expectedTool,
@@ -3822,7 +3822,7 @@ function createTrajectoryAccuracyScorerCode(options = {}) {
3822
3822
  const itemExpectation = run.expectedTrajectory;
3823
3823
  const effectiveOrdering = itemExpectation?.ordering ?? ordering;
3824
3824
  const effectiveAllowRepeated = itemExpectation?.allowRepeatedSteps ?? allowRepeatedSteps;
3825
- const comparison = chunk33T2SZZ2_cjs.compareTrajectories(
3825
+ const comparison = chunkBULMCHKJ_cjs.compareTrajectories(
3826
3826
  actualTrajectory,
3827
3827
  { steps: resolvedExpectedSteps },
3828
3828
  {
@@ -3880,7 +3880,7 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
3880
3880
  const childConfig = expectedStep.children;
3881
3881
  let accuracy;
3882
3882
  if (childConfig.steps && childConfig.steps.length > 0) {
3883
- accuracy = chunk33T2SZZ2_cjs.compareTrajectories(
3883
+ accuracy = chunkBULMCHKJ_cjs.compareTrajectories(
3884
3884
  childTrajectory,
3885
3885
  { steps: childConfig.steps },
3886
3886
  {
@@ -3890,18 +3890,18 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
3890
3890
  );
3891
3891
  }
3892
3892
  const hasEfficiencyConfig = childConfig.maxSteps !== void 0 || childConfig.maxTotalTokens !== void 0 || childConfig.maxTotalDurationMs !== void 0 || childConfig.noRedundantCalls !== void 0;
3893
- const efficiency = hasEfficiencyConfig ? chunk33T2SZZ2_cjs.checkTrajectoryEfficiency(childTrajectory, {
3893
+ const efficiency = hasEfficiencyConfig ? chunkBULMCHKJ_cjs.checkTrajectoryEfficiency(childTrajectory, {
3894
3894
  maxSteps: childConfig.maxSteps,
3895
3895
  maxTotalTokens: childConfig.maxTotalTokens,
3896
3896
  maxTotalDurationMs: childConfig.maxTotalDurationMs,
3897
3897
  noRedundantCalls: childConfig.noRedundantCalls ?? true
3898
3898
  }) : void 0;
3899
3899
  const hasBlacklistConfig = childConfig.blacklistedTools && childConfig.blacklistedTools.length > 0 || childConfig.blacklistedSequences && childConfig.blacklistedSequences.length > 0;
3900
- const blacklist = hasBlacklistConfig ? chunk33T2SZZ2_cjs.checkTrajectoryBlacklist(childTrajectory, {
3900
+ const blacklist = hasBlacklistConfig ? chunkBULMCHKJ_cjs.checkTrajectoryBlacklist(childTrajectory, {
3901
3901
  blacklistedTools: childConfig.blacklistedTools,
3902
3902
  blacklistedSequences: childConfig.blacklistedSequences
3903
3903
  }) : void 0;
3904
- const toolFailures = chunk33T2SZZ2_cjs.analyzeToolFailures(childTrajectory, {
3904
+ const toolFailures = chunkBULMCHKJ_cjs.analyzeToolFailures(childTrajectory, {
3905
3905
  maxRetriesPerTool: childConfig.maxRetriesPerTool ?? 2
3906
3906
  });
3907
3907
  const nested = childConfig.steps ? evaluateNestedExpectations(childConfig.steps, actualStep.children, weights) : [];
@@ -3966,7 +3966,7 @@ function createTrajectoryScorerCode(options = {}) {
3966
3966
  }
3967
3967
  let accuracy;
3968
3968
  if (config.steps && config.steps.length > 0) {
3969
- accuracy = chunk33T2SZZ2_cjs.compareTrajectories(
3969
+ accuracy = chunkBULMCHKJ_cjs.compareTrajectories(
3970
3970
  actualTrajectory,
3971
3971
  { steps: config.steps },
3972
3972
  {
@@ -3976,18 +3976,18 @@ function createTrajectoryScorerCode(options = {}) {
3976
3976
  );
3977
3977
  }
3978
3978
  const hasEfficiencyConfig = config.maxSteps !== void 0 || config.maxTotalTokens !== void 0 || config.maxTotalDurationMs !== void 0 || config.noRedundantCalls !== void 0;
3979
- const efficiency = hasEfficiencyConfig ? chunk33T2SZZ2_cjs.checkTrajectoryEfficiency(actualTrajectory, {
3979
+ const efficiency = hasEfficiencyConfig ? chunkBULMCHKJ_cjs.checkTrajectoryEfficiency(actualTrajectory, {
3980
3980
  maxSteps: config.maxSteps,
3981
3981
  maxTotalTokens: config.maxTotalTokens,
3982
3982
  maxTotalDurationMs: config.maxTotalDurationMs,
3983
3983
  noRedundantCalls: config.noRedundantCalls ?? true
3984
3984
  }) : void 0;
3985
3985
  const hasBlacklistConfig = config.blacklistedTools && config.blacklistedTools.length > 0 || config.blacklistedSequences && config.blacklistedSequences.length > 0;
3986
- const blacklist = hasBlacklistConfig ? chunk33T2SZZ2_cjs.checkTrajectoryBlacklist(actualTrajectory, {
3986
+ const blacklist = hasBlacklistConfig ? chunkBULMCHKJ_cjs.checkTrajectoryBlacklist(actualTrajectory, {
3987
3987
  blacklistedTools: config.blacklistedTools,
3988
3988
  blacklistedSequences: config.blacklistedSequences
3989
3989
  }) : void 0;
3990
- const toolFailures = chunk33T2SZZ2_cjs.analyzeToolFailures(actualTrajectory, {
3990
+ const toolFailures = chunkBULMCHKJ_cjs.analyzeToolFailures(actualTrajectory, {
3991
3991
  maxRetriesPerTool: config.maxRetriesPerTool ?? 2
3992
3992
  });
3993
3993
  const nested = config.steps && config.steps.length > 0 ? evaluateNestedExpectations(config.steps, actualTrajectory.steps, w) : void 0;
@@ -1,4 +1,4 @@
1
- import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage, compareTrajectories, checkTrajectoryEfficiency, checkTrajectoryBlacklist, analyzeToolFailures, isScorerRunInputForAgent, isScorerRunOutputForAgent } from '../../chunk-ZRHCSFKL.js';
1
+ import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage, compareTrajectories, checkTrajectoryEfficiency, checkTrajectoryBlacklist, analyzeToolFailures, isScorerRunInputForAgent, isScorerRunOutputForAgent } from '../../chunk-XOXUFZEG.js';
2
2
  import { createScorer } from '@mastra/core/evals';
3
3
  import nlp from 'compromise';
4
4
  import keyword_extractor from 'keyword-extractor';
@@ -1,104 +1,104 @@
1
1
  'use strict';
2
2
 
3
- var chunk33T2SZZ2_cjs = require('../chunk-33T2SZZ2.cjs');
3
+ var chunkBULMCHKJ_cjs = require('../chunk-BULMCHKJ.cjs');
4
4
 
5
5
 
6
6
 
7
7
  Object.defineProperty(exports, "analyzeToolFailures", {
8
8
  enumerable: true,
9
- get: function () { return chunk33T2SZZ2_cjs.analyzeToolFailures; }
9
+ get: function () { return chunkBULMCHKJ_cjs.analyzeToolFailures; }
10
10
  });
11
11
  Object.defineProperty(exports, "checkTrajectoryBlacklist", {
12
12
  enumerable: true,
13
- get: function () { return chunk33T2SZZ2_cjs.checkTrajectoryBlacklist; }
13
+ get: function () { return chunkBULMCHKJ_cjs.checkTrajectoryBlacklist; }
14
14
  });
15
15
  Object.defineProperty(exports, "checkTrajectoryEfficiency", {
16
16
  enumerable: true,
17
- get: function () { return chunk33T2SZZ2_cjs.checkTrajectoryEfficiency; }
17
+ get: function () { return chunkBULMCHKJ_cjs.checkTrajectoryEfficiency; }
18
18
  });
19
19
  Object.defineProperty(exports, "compareTrajectories", {
20
20
  enumerable: true,
21
- get: function () { return chunk33T2SZZ2_cjs.compareTrajectories; }
21
+ get: function () { return chunkBULMCHKJ_cjs.compareTrajectories; }
22
22
  });
23
23
  Object.defineProperty(exports, "createAgentTestRun", {
24
24
  enumerable: true,
25
- get: function () { return chunk33T2SZZ2_cjs.createAgentTestRun; }
25
+ get: function () { return chunkBULMCHKJ_cjs.createAgentTestRun; }
26
26
  });
27
27
  Object.defineProperty(exports, "createTestMessage", {
28
28
  enumerable: true,
29
- get: function () { return chunk33T2SZZ2_cjs.createTestMessage; }
29
+ get: function () { return chunkBULMCHKJ_cjs.createTestMessage; }
30
30
  });
31
31
  Object.defineProperty(exports, "createTestRun", {
32
32
  enumerable: true,
33
- get: function () { return chunk33T2SZZ2_cjs.createTestRun; }
33
+ get: function () { return chunkBULMCHKJ_cjs.createTestRun; }
34
34
  });
35
35
  Object.defineProperty(exports, "createToolInvocation", {
36
36
  enumerable: true,
37
- get: function () { return chunk33T2SZZ2_cjs.createToolInvocation; }
37
+ get: function () { return chunkBULMCHKJ_cjs.createToolInvocation; }
38
38
  });
39
39
  Object.defineProperty(exports, "createTrajectoryTestRun", {
40
40
  enumerable: true,
41
- get: function () { return chunk33T2SZZ2_cjs.createTrajectoryTestRun; }
41
+ get: function () { return chunkBULMCHKJ_cjs.createTrajectoryTestRun; }
42
42
  });
43
43
  Object.defineProperty(exports, "extractAgentResponseMessages", {
44
44
  enumerable: true,
45
- get: function () { return chunk33T2SZZ2_cjs.extractAgentResponseMessages; }
45
+ get: function () { return chunkBULMCHKJ_cjs.extractAgentResponseMessages; }
46
46
  });
47
47
  Object.defineProperty(exports, "extractInputMessages", {
48
48
  enumerable: true,
49
- get: function () { return chunk33T2SZZ2_cjs.extractInputMessages; }
49
+ get: function () { return chunkBULMCHKJ_cjs.extractInputMessages; }
50
50
  });
51
51
  Object.defineProperty(exports, "extractToolCalls", {
52
52
  enumerable: true,
53
- get: function () { return chunk33T2SZZ2_cjs.extractToolCalls; }
53
+ get: function () { return chunkBULMCHKJ_cjs.extractToolCalls; }
54
54
  });
55
55
  Object.defineProperty(exports, "extractToolResults", {
56
56
  enumerable: true,
57
- get: function () { return chunk33T2SZZ2_cjs.extractToolResults; }
57
+ get: function () { return chunkBULMCHKJ_cjs.extractToolResults; }
58
58
  });
59
59
  Object.defineProperty(exports, "extractTrajectory", {
60
60
  enumerable: true,
61
- get: function () { return chunk33T2SZZ2_cjs.extractTrajectory; }
61
+ get: function () { return chunkBULMCHKJ_cjs.extractTrajectory; }
62
62
  });
63
63
  Object.defineProperty(exports, "getAssistantMessageFromRunOutput", {
64
64
  enumerable: true,
65
- get: function () { return chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput; }
65
+ get: function () { return chunkBULMCHKJ_cjs.getAssistantMessageFromRunOutput; }
66
66
  });
67
67
  Object.defineProperty(exports, "getCombinedSystemPrompt", {
68
68
  enumerable: true,
69
- get: function () { return chunk33T2SZZ2_cjs.getCombinedSystemPrompt; }
69
+ get: function () { return chunkBULMCHKJ_cjs.getCombinedSystemPrompt; }
70
70
  });
71
71
  Object.defineProperty(exports, "getReasoningFromRunOutput", {
72
72
  enumerable: true,
73
- get: function () { return chunk33T2SZZ2_cjs.getReasoningFromRunOutput; }
73
+ get: function () { return chunkBULMCHKJ_cjs.getReasoningFromRunOutput; }
74
74
  });
75
75
  Object.defineProperty(exports, "getSystemMessagesFromRunInput", {
76
76
  enumerable: true,
77
- get: function () { return chunk33T2SZZ2_cjs.getSystemMessagesFromRunInput; }
77
+ get: function () { return chunkBULMCHKJ_cjs.getSystemMessagesFromRunInput; }
78
78
  });
79
79
  Object.defineProperty(exports, "getTextContentFromMastraDBMessage", {
80
80
  enumerable: true,
81
- get: function () { return chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage; }
81
+ get: function () { return chunkBULMCHKJ_cjs.getTextContentFromMastraDBMessage; }
82
82
  });
83
83
  Object.defineProperty(exports, "getUserMessageFromRunInput", {
84
84
  enumerable: true,
85
- get: function () { return chunk33T2SZZ2_cjs.getUserMessageFromRunInput; }
85
+ get: function () { return chunkBULMCHKJ_cjs.getUserMessageFromRunInput; }
86
86
  });
87
87
  Object.defineProperty(exports, "isCloserTo", {
88
88
  enumerable: true,
89
- get: function () { return chunk33T2SZZ2_cjs.isCloserTo; }
89
+ get: function () { return chunkBULMCHKJ_cjs.isCloserTo; }
90
90
  });
91
91
  Object.defineProperty(exports, "isScorerRunInputForAgent", {
92
92
  enumerable: true,
93
- get: function () { return chunk33T2SZZ2_cjs.isScorerRunInputForAgent; }
93
+ get: function () { return chunkBULMCHKJ_cjs.isScorerRunInputForAgent; }
94
94
  });
95
95
  Object.defineProperty(exports, "isScorerRunOutputForAgent", {
96
96
  enumerable: true,
97
- get: function () { return chunk33T2SZZ2_cjs.isScorerRunOutputForAgent; }
97
+ get: function () { return chunkBULMCHKJ_cjs.isScorerRunOutputForAgent; }
98
98
  });
99
99
  Object.defineProperty(exports, "roundToTwoDecimals", {
100
100
  enumerable: true,
101
- get: function () { return chunk33T2SZZ2_cjs.roundToTwoDecimals; }
101
+ get: function () { return chunkBULMCHKJ_cjs.roundToTwoDecimals; }
102
102
  });
103
103
  //# sourceMappingURL=utils.cjs.map
104
104
  //# sourceMappingURL=utils.cjs.map