npm - @mastra/evals - Versions diffs - 1.2.1 → 1.2.2-alpha.0 - Mend

@mastra/evals 1.2.1 → 1.2.2-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/CHANGELOG.md +19 -0
package/dist/{chunk-AY4K3J4R.cjs → chunk-33T2SZZ2.cjs} +74 -14
package/dist/chunk-33T2SZZ2.cjs.map +1 -0
package/dist/{chunk-X4MKZ735.js → chunk-ZRHCSFKL.js} +73 -15
package/dist/chunk-ZRHCSFKL.js.map +1 -0
package/dist/docs/SKILL.md +1 -1
package/dist/docs/assets/SOURCE_MAP.json +1 -1
package/dist/docs/references/reference-evals-context-precision.md +3 -3
package/dist/docs/references/reference-evals-context-relevance.md +3 -3
package/dist/docs/references/reference-evals-noise-sensitivity.md +6 -6
package/dist/docs/references/reference-evals-prompt-alignment.md +12 -12
package/dist/docs/references/reference-evals-scorer-utils.md +3 -3
package/dist/docs/references/reference-evals-trajectory-accuracy.md +3 -3
package/dist/scorers/llm/answer-relevancy/index.d.ts +2 -1
package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
package/dist/scorers/llm/answer-similarity/index.d.ts +2 -1
package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
package/dist/scorers/llm/bias/index.d.ts +2 -2
package/dist/scorers/llm/bias/index.d.ts.map +1 -1
package/dist/scorers/llm/context-precision/index.d.ts +2 -1
package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
package/dist/scorers/llm/context-relevance/index.d.ts +2 -1
package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
package/dist/scorers/llm/faithfulness/index.d.ts +2 -1
package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
package/dist/scorers/llm/hallucination/index.d.ts +4 -4
package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
package/dist/scorers/llm/noise-sensitivity/index.d.ts +2 -1
package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
package/dist/scorers/llm/toxicity/index.d.ts +2 -1
package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
package/dist/scorers/prebuilt/index.cjs +105 -85
package/dist/scorers/prebuilt/index.cjs.map +1 -1
package/dist/scorers/prebuilt/index.js +34 -14
package/dist/scorers/prebuilt/index.js.map +1 -1
package/dist/scorers/utils.cjs +31 -23
package/dist/scorers/utils.d.ts +33 -16
package/dist/scorers/utils.d.ts.map +1 -1
package/dist/scorers/utils.js +1 -1
package/package.json +12 -12
package/dist/chunk-AY4K3J4R.cjs.map +0 -1
package/dist/chunk-X4MKZ735.js.map +0 -1

package/dist/scorers/prebuilt/index.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
 'use strict';
-var chunkAY4K3J4R_cjs = require('../../chunk-AY4K3J4R.cjs');
+var chunk33T2SZZ2_cjs = require('../../chunk-33T2SZZ2.cjs');
 var evals = require('@mastra/core/evals');
 var zod = require('zod');
 var nlp = require('compromise');
@@ -239,14 +239,14 @@ function createAnswerRelevancyScorer({
     description: "Extract relevant statements from the LLM output",
     outputSchema: extractOutputSchema,
     createPrompt: ({ run }) => {
-      const assistantMessage = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const assistantMessage = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       return createExtractPrompt(assistantMessage);
     }
   }).analyze({
     description: "Score the relevance of the statements to the input",
     outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
     createPrompt: ({ run, results }) => {
-      const input = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
       return createScorePrompt(JSON.stringify(input), results.preprocessStepResult?.statements || []);
     }
   }).generateScore(({ results }) => {
@@ -263,13 +263,13 @@ function createAnswerRelevancyScorer({
       }
     }
     const score = relevancyCount / numberOfResults;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score * options.scale);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * options.scale);
   }).generateReason({
     description: "Reason about the results",
     createPrompt: ({ run, results, score }) => {
       return createReasonPrompt({
-        input: chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "",
-        output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
+        input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
+        output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
         score,
         results: results.analyzeStepResult.results,
         scale: options.scale
@@ -466,7 +466,7 @@ function createAnswerSimilarityScorer({
           groundTruth: ""
         });
       }
-      const output = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
       return createExtractPrompt2({
         output,
@@ -524,14 +524,14 @@ function createAnswerSimilarityScorer({
     );
     score -= extraInfoPenalty;
     score = Math.max(0, Math.min(1, score));
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score * mergedOptions.scale);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * mergedOptions.scale);
   }).generateReason({
     description: "Generate explanation of similarity score",
     createPrompt: ({ run, results, score }) => {
       if (!run.groundTruth) {
         return "No ground truth was provided for comparison. Score is 0 by default.";
       }
-      const output = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
       return createReasonPrompt2({
         output,
@@ -698,6 +698,10 @@ Example Responses:
 }
 // src/scorers/llm/faithfulness/index.ts
+var getToolInvocationContext = (output) => {
+  if (!Array.isArray(output)) return [];
+  return output.filter((message) => message?.role === "assistant").flatMap((message) => message?.content?.toolInvocations ?? []).filter((toolCall) => toolCall.state === "result").map((toolCall) => JSON.stringify(toolCall.result));
+};
 function createFaithfulnessScorer({
   model,
   options
@@ -717,17 +721,14 @@ function createFaithfulnessScorer({
       claims: zod.z.array(zod.z.string())
     }),
     createPrompt: ({ run }) => {
-      const prompt = createFaithfulnessExtractPrompt({ output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
+      const prompt = createFaithfulnessExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
       return prompt;
     }
   }).analyze({
     description: "Score the relevance of the statements to the input",
     outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
     createPrompt: ({ results, run }) => {
-      const assistantMessage = run.output.find(({ role }) => role === "assistant");
-      const context = options?.context ?? assistantMessage?.content?.toolInvocations?.map(
-        (toolCall) => toolCall.state === "result" ? JSON.stringify(toolCall.result) : ""
-      ) ?? [];
+      const context = options?.context ?? getToolInvocationContext(run.output);
       const prompt = createFaithfulnessAnalyzePrompt({
         claims: results.preprocessStepResult?.claims || [],
         context
@@ -741,15 +742,14 @@ function createFaithfulnessScorer({
       return 0;
     }
     const score = supportedClaims / totalClaims * (options?.scale || 1);
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
   }).generateReason({
     description: "Reason about the results",
     createPrompt: ({ run, results, score }) => {
-      const assistantMessage = run.output.find(({ role }) => role === "assistant");
       const prompt = createFaithfulnessReasonPrompt({
-        input: chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "",
-        output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
-        context: assistantMessage?.content?.toolInvocations?.map((toolCall) => JSON.stringify(toolCall)) || [],
+        input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
+        output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
+        context: options?.context ?? getToolInvocationContext(run.output),
         score,
         scale: options?.scale || 1,
         verdicts: results.analyzeStepResult?.verdicts || []
@@ -881,13 +881,13 @@ function createBiasScorer({ model, options }) {
     outputSchema: zod.z.object({
       opinions: zod.z.array(zod.z.string())
     }),
-    createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
+    createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
   }).analyze({
     description: "Score the relevance of the statements to the input",
     outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
     createPrompt: ({ run, results }) => {
       const prompt = createBiasAnalyzePrompt({
-        output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
+        output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
         opinions: results.preprocessStepResult?.opinions || []
       });
       return prompt;
@@ -898,7 +898,7 @@ function createBiasScorer({ model, options }) {
     }
     const biasedVerdicts = results.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
     const score = biasedVerdicts.length / results.analyzeStepResult.results.length;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score * (options?.scale || 1));
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * (options?.scale || 1));
   }).generateReason({
     description: "Reason about the results",
     createPrompt: ({ score, results }) => {
@@ -1117,7 +1117,7 @@ function createHallucinationScorer({
       claims: zod.z.array(zod.z.string())
     }),
     createPrompt: ({ run }) => {
-      const prompt = createHallucinationExtractPrompt({ output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
+      const prompt = createHallucinationExtractPrompt({ output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
       return prompt;
     }
   }).analyze({
@@ -1145,7 +1145,7 @@ function createHallucinationScorer({
       return 0;
     }
     const score = contradictedStatements / totalStatements * (options?.scale || 1);
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
   }).generateReason({
     description: "Reason about the results",
     createPrompt: async ({ run, results, score }) => {
@@ -1156,8 +1156,8 @@ function createHallucinationScorer({
         context = options?.context ?? [];
       }
       const prompt = createHallucinationReasonPrompt({
-        input: chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "",
-        output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
+        input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
+        output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
         context,
         score,
         scale: options?.scale || 1,
@@ -1271,8 +1271,8 @@ function createToxicityScorer({
     outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
     createPrompt: ({ run }) => {
       const prompt = createToxicityAnalyzePrompt({
-        input: chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "",
-        output: chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
+        input: chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "",
+        output: chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
       });
       return prompt;
     }
@@ -1288,7 +1288,7 @@ function createToxicityScorer({
       }
     }
     const score = toxicityCount / numberOfVerdicts;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score * (options?.scale || 1));
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score * (options?.scale || 1));
   }).generateReason({
     description: "Reason about the results",
     createPrompt: ({ results, score }) => {
@@ -1422,7 +1422,7 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
     if (isInputInvalid || isOutputInvalid) {
       throw new Error("Input and output messages cannot be null or empty");
     }
-    const { tools: actualTools, toolCallInfos } = chunkAY4K3J4R_cjs.extractToolCalls(run.output);
+    const { tools: actualTools, toolCallInfos } = chunk33T2SZZ2_cjs.extractToolCalls(run.output);
     return {
       actualTools,
       hasToolCalls: actualTools.length > 0,
@@ -1432,8 +1432,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
     description: "Analyze the appropriateness of tool selections",
     outputSchema: analyzeOutputSchema2,
     createPrompt: ({ run, results }) => {
-      const userInput = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const agentResponse = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       const toolsCalled = results.preprocessStepResult?.actualTools || [];
       return createAnalyzePrompt2({
         userInput,
@@ -1450,11 +1450,11 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
     }
     const appropriateToolCalls = evaluations.filter((e) => e.wasAppropriate).length;
     const totalToolCalls = evaluations.length;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
   }).generateReason({
     description: "Generate human-readable explanation of tool selection evaluation",
     createPrompt: ({ run, results, score }) => {
-      const userInput = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
       const evaluations = results.analyzeStepResult?.evaluations || [];
       const missingTools = results.analyzeStepResult?.missingTools || [];
       return createReasonPrompt3({
@@ -1636,6 +1636,16 @@ var DEFAULT_PENALTIES = {
   MAX_MISSING_CONTEXT_PENALTY: 0.5
   // Maximum 50% penalty for missing context
 };
+var getContext = ({
+  input,
+  output,
+  options
+}) => {
+  if (options.contextExtractor && chunk33T2SZZ2_cjs.isScorerRunInputForAgent(input) && chunk33T2SZZ2_cjs.isScorerRunOutputForAgent(output)) {
+    return options.contextExtractor(input, output);
+  }
+  return options.context ?? [];
+};
 function createContextRelevanceScorerLLM({
   model,
   options
@@ -1659,9 +1669,9 @@ function createContextRelevanceScorerLLM({
     description: "Analyze the relevance and utility of provided context",
     outputSchema: analyzeOutputSchema3,
     createPrompt: ({ run }) => {
-      const userQuery = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const agentResponse = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
-      const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
+      const userQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const context = getContext({ input: run.input, output: run.output, options });
       if (context.length === 0) {
         return createAnalyzePrompt3({
           userQuery,
@@ -1677,7 +1687,7 @@ function createContextRelevanceScorerLLM({
     }
   }).generateScore(({ results, run }) => {
     const evaluations = results.analyzeStepResult?.evaluations || [];
-    const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
+    const context = getContext({ input: run.input, output: run.output, options });
     if (context.length === 0) {
       return 1 * (options.scale || 1);
     }
@@ -1708,12 +1718,12 @@ function createContextRelevanceScorerLLM({
     const missingContextPenalty = Math.min(missingContext.length * missingPenaltyRate, maxMissingPenalty);
     const finalScore = Math.max(0, relevanceScore - usagePenalty - missingContextPenalty);
     const scaledScore = finalScore * (options.scale || 1);
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(scaledScore);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(scaledScore);
   }).generateReason({
     description: "Generate human-readable explanation of context relevance evaluation",
     createPrompt: ({ run, results, score }) => {
-      const userQuery = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
+      const userQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const context = getContext({ input: run.input, output: run.output, options });
       if (context.length === 0) {
         return `No context was available for evaluation. The agent response was generated without any supporting context. Score: ${score}`;
       }
@@ -1860,6 +1870,16 @@ var contextRelevanceOutputSchema = zod.z.object({
     })
   )
 });
+var getContext2 = ({
+  input,
+  output,
+  options
+}) => {
+  if (options.contextExtractor && chunk33T2SZZ2_cjs.isScorerRunInputForAgent(input) && chunk33T2SZZ2_cjs.isScorerRunOutputForAgent(output)) {
+    return options.contextExtractor(input, output);
+  }
+  return options.context ?? [];
+};
 function createContextPrecisionScorer({
   model,
   options
@@ -1883,9 +1903,9 @@ function createContextPrecisionScorer({
     description: "Evaluate the relevance of each context piece for generating the expected output",
     outputSchema: contextRelevanceOutputSchema,
     createPrompt: ({ run }) => {
-      const input = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const output = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
-      const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
+      const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const context = getContext2({ input: run.input, output: run.output, options });
       if (context.length === 0) {
         throw new Error("No context available for evaluation");
       }
@@ -1917,13 +1937,13 @@ function createContextPrecisionScorer({
     }
     const map = sumPrecision / relevantCount;
     const score = map * (options.scale || 1);
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(score);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(score);
   }).generateReason({
     description: "Reason about the context precision results",
     createPrompt: ({ run, results, score }) => {
-      const input = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const output = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
-      const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
+      const input = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const output = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const context = getContext2({ input: run.input, output: run.output, options });
       return createContextPrecisionReasonPrompt({
         input,
         output,
@@ -2177,8 +2197,8 @@ function createNoiseSensitivityScorerLLM({
     description: "Analyze the impact of noise on agent response quality",
     outputSchema: analyzeOutputSchema4,
     createPrompt: ({ run }) => {
-      const originalQuery = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const noisyResponse = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const originalQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const noisyResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       if (!originalQuery || !noisyResponse) {
         throw new Error("Both original query and noisy response are required for evaluation");
       }
@@ -2221,11 +2241,11 @@ function createNoiseSensitivityScorerLLM({
     const majorIssues = analysisResult.majorIssues || [];
     const issuesPenalty = Math.min(majorIssues.length * majorIssuePenaltyRate, maxMajorIssuePenalty);
     finalScore = Math.max(0, finalScore - issuesPenalty);
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(finalScore);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(finalScore);
   }).generateReason({
     description: "Generate human-readable explanation of noise sensitivity evaluation",
     createPrompt: ({ run, results, score }) => {
-      const originalQuery = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const originalQuery = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
       const analysisResult = results.analyzeStepResult;
       if (!analysisResult) {
         throw new Error("Analysis step failed to produce results for reason generation");
@@ -2550,17 +2570,17 @@ function createPromptAlignmentScorerLLM({
     description: "Analyze prompt-response alignment across multiple dimensions",
     outputSchema: analyzeOutputSchema5,
     createPrompt: ({ run }) => {
-      const userPrompt = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const systemPrompt = chunkAY4K3J4R_cjs.getCombinedSystemPrompt(run.input) ?? "";
-      const agentResponse = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
+      const userPrompt = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const systemPrompt = chunk33T2SZZ2_cjs.getCombinedSystemPrompt(run.input) ?? "";
+      const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
       if (evaluationMode === "user" && !userPrompt) {
         throw new Error("User prompt is required for user prompt alignment scoring");
       }
       if (evaluationMode === "system" && !systemPrompt) {
         throw new Error("System prompt is required for system prompt alignment scoring");
       }
-      if (evaluationMode === "both" && (!userPrompt || !systemPrompt)) {
-        throw new Error("Both user and system prompts are required for combined alignment scoring");
+      if (evaluationMode === "both" && !userPrompt && !systemPrompt) {
+        throw new Error("A user or system prompt is required for combined alignment scoring");
       }
       if (!agentResponse) {
         throw new Error("Agent response is required for prompt alignment scoring");
@@ -2588,12 +2608,12 @@ function createPromptAlignmentScorerLLM({
       weightedScore = userScore * SCORING_WEIGHTS.BOTH.USER_WEIGHT + systemScore * SCORING_WEIGHTS.BOTH.SYSTEM_WEIGHT;
     }
     const finalScore = weightedScore * scale;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(finalScore);
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(finalScore);
   }).generateReason({
     description: "Generate human-readable explanation of prompt alignment evaluation",
     createPrompt: ({ run, results, score }) => {
-      const userPrompt = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const systemPrompt = chunkAY4K3J4R_cjs.getCombinedSystemPrompt(run.input) ?? "";
+      const userPrompt = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const systemPrompt = chunk33T2SZZ2_cjs.getCombinedSystemPrompt(run.input) ?? "";
       const analysis = results.analyzeStepResult;
       if (!analysis) {
         return `Unable to analyze prompt alignment. Score: ${score}`;
@@ -2798,8 +2818,8 @@ function createTrajectoryAccuracyScorerLLM({
     description: "Analyze the quality and appropriateness of the agent trajectory",
     outputSchema: analyzeOutputSchema6,
     createPrompt: ({ run, results }) => {
-      const userInput = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
-      const agentResponse = chunkAY4K3J4R_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
+      const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const agentResponse = chunk33T2SZZ2_cjs.getAssistantMessageFromRunOutput(run.output.rawOutput) ?? "";
       return createAnalyzePrompt6({
         userInput,
         agentResponse,
@@ -2824,11 +2844,11 @@ function createTrajectoryAccuracyScorerLLM({
     const necessityScore = necessarySteps / totalSteps;
     const orderScore = orderedSteps / totalSteps;
     const score = necessityScore * 0.6 + orderScore * 0.3 - missingPenalty * 0.1;
-    return chunkAY4K3J4R_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
+    return chunk33T2SZZ2_cjs.roundToTwoDecimals(Math.max(0, Math.min(1, score)));
   }).generateReason({
     description: "Generate human-readable explanation of trajectory evaluation",
     createPrompt: ({ run, results, score }) => {
-      const userInput = chunkAY4K3J4R_cjs.getUserMessageFromRunInput(run.input) ?? "";
+      const userInput = chunk33T2SZZ2_cjs.getUserMessageFromRunInput(run.input) ?? "";
       const stepEvaluations = results.analyzeStepResult?.stepEvaluations || [];
       const missingSteps = results.analyzeStepResult?.missingSteps || [];
       const extraSteps = results.analyzeStepResult?.extraSteps || [];
@@ -2891,18 +2911,18 @@ function createCompletenessScorer() {
     type: "agent"
   }).preprocess(async ({ run }) => {
     const isInputInvalid = !run.input || run.input.inputMessages.some((i) => {
-      const content = chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i);
+      const content = chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i);
       return content === null || content === void 0;
     });
     const isOutputInvalid = !run.output || run.output.some((i) => {
-      const content = chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i);
+      const content = chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i);
       return content === null || content === void 0;
     });
     if (isInputInvalid || isOutputInvalid) {
       throw new Error("Inputs cannot be null or undefined");
     }
-    const input = run.input?.inputMessages.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
-    const output = run.output?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const input = run.input?.inputMessages.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
     const inputToProcess = input;
     const outputToProcess = output;
     const inputDoc = nlp__default.default(inputToProcess.trim());
@@ -3007,8 +3027,8 @@ function createTextualDifferenceScorer() {
     description: "Calculate textual difference between input and output using sequence matching algorithms.",
     type: "agent"
   }).preprocess(async ({ run }) => {
-    const input = run.input?.inputMessages?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
-    const output = run.output?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const input = run.input?.inputMessages?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
     const ratio = calculateRatio(input, output);
     const changes = countChanges(input, output);
     const maxLength = Math.max(input.length, output.length);
@@ -3031,8 +3051,8 @@ function createKeywordCoverageScorer() {
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     type: "agent"
   }).preprocess(async ({ run }) => {
-    const input = run.input?.inputMessages?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
-    const output = run.output?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const input = run.input?.inputMessages?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const output = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
     if (!input && !output) {
       return {
         result: {
@@ -3085,8 +3105,8 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
     description: "Calculates content similarity between input and output messages using string comparison algorithms.",
     type: "agent"
   }).preprocess(async ({ run }) => {
-    let processedInput = run.input?.inputMessages.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
-    let processedOutput = run.output.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    let processedInput = run.input?.inputMessages.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    let processedOutput = run.output.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
     if (ignoreCase) {
       processedInput = processedInput.toLowerCase();
       processedOutput = processedOutput.toLowerCase();
@@ -3116,7 +3136,7 @@ function createToneScorer(config = {}) {
     type: "agent"
   }).preprocess(async ({ run }) => {
     const sentiment = new Sentiment__default.default();
-    const agentMessage = run.output?.map((i) => chunkAY4K3J4R_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
+    const agentMessage = run.output?.map((i) => chunk33T2SZZ2_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
     const responseSentiment = sentiment.analyze(agentMessage);
     if (referenceTone) {
       const referenceSentiment = sentiment.analyze(referenceTone);
@@ -3203,7 +3223,7 @@ function createToolCallAccuracyScorerCode(options) {
     if (isInputInvalid || isOutputInvalid) {
       throw new Error("Input and output messages cannot be null or empty");
     }
-    const { tools: actualTools, toolCallInfos } = chunkAY4K3J4R_cjs.extractToolCalls(run.output);
+    const { tools: actualTools, toolCallInfos } = chunk33T2SZZ2_cjs.extractToolCalls(run.output);
     const correctToolCalled = expectedTool ? strictMode ? actualTools.length === 1 && actualTools[0] === expectedTool : actualTools.includes(expectedTool) : false;
     return {
       expectedTool,
@@ -3278,7 +3298,7 @@ function createTrajectoryAccuracyScorerCode(options = {}) {
     const itemExpectation = run.expectedTrajectory;
     const effectiveOrdering = itemExpectation?.ordering ?? ordering;
     const effectiveAllowRepeated = itemExpectation?.allowRepeatedSteps ?? allowRepeatedSteps;
-    const comparison = chunkAY4K3J4R_cjs.compareTrajectories(
+    const comparison = chunk33T2SZZ2_cjs.compareTrajectories(
       actualTrajectory,
       { steps: resolvedExpectedSteps },
       {
@@ -3336,7 +3356,7 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
     const childConfig = expectedStep.children;
     let accuracy;
     if (childConfig.steps && childConfig.steps.length > 0) {
-      accuracy = chunkAY4K3J4R_cjs.compareTrajectories(
+      accuracy = chunk33T2SZZ2_cjs.compareTrajectories(
         childTrajectory,
         { steps: childConfig.steps },
         {
@@ -3346,18 +3366,18 @@ function evaluateNestedExpectations(expectedSteps, actualSteps, weights = { accu
       );
     }
     const hasEfficiencyConfig = childConfig.maxSteps !== void 0 || childConfig.maxTotalTokens !== void 0 || childConfig.maxTotalDurationMs !== void 0 || childConfig.noRedundantCalls !== void 0;
-    const efficiency = hasEfficiencyConfig ? chunkAY4K3J4R_cjs.checkTrajectoryEfficiency(childTrajectory, {
+    const efficiency = hasEfficiencyConfig ? chunk33T2SZZ2_cjs.checkTrajectoryEfficiency(childTrajectory, {
       maxSteps: childConfig.maxSteps,
       maxTotalTokens: childConfig.maxTotalTokens,
       maxTotalDurationMs: childConfig.maxTotalDurationMs,
       noRedundantCalls: childConfig.noRedundantCalls ?? true
     }) : void 0;
     const hasBlacklistConfig = childConfig.blacklistedTools && childConfig.blacklistedTools.length > 0 || childConfig.blacklistedSequences && childConfig.blacklistedSequences.length > 0;
-    const blacklist = hasBlacklistConfig ? chunkAY4K3J4R_cjs.checkTrajectoryBlacklist(childTrajectory, {
+    const blacklist = hasBlacklistConfig ? chunk33T2SZZ2_cjs.checkTrajectoryBlacklist(childTrajectory, {
       blacklistedTools: childConfig.blacklistedTools,
       blacklistedSequences: childConfig.blacklistedSequences
     }) : void 0;
-    const toolFailures = chunkAY4K3J4R_cjs.analyzeToolFailures(childTrajectory, {
+    const toolFailures = chunk33T2SZZ2_cjs.analyzeToolFailures(childTrajectory, {
       maxRetriesPerTool: childConfig.maxRetriesPerTool ?? 2
     });
     const nested = childConfig.steps ? evaluateNestedExpectations(childConfig.steps, actualStep.children, weights) : [];
@@ -3422,7 +3442,7 @@ function createTrajectoryScorerCode(options = {}) {
     }
     let accuracy;
     if (config.steps && config.steps.length > 0) {
-      accuracy = chunkAY4K3J4R_cjs.compareTrajectories(
+      accuracy = chunk33T2SZZ2_cjs.compareTrajectories(
         actualTrajectory,
         { steps: config.steps },
         {
@@ -3432,18 +3452,18 @@ function createTrajectoryScorerCode(options = {}) {
       );
     }
     const hasEfficiencyConfig = config.maxSteps !== void 0 || config.maxTotalTokens !== void 0 || config.maxTotalDurationMs !== void 0 || config.noRedundantCalls !== void 0;
-    const efficiency = hasEfficiencyConfig ? chunkAY4K3J4R_cjs.checkTrajectoryEfficiency(actualTrajectory, {
+    const efficiency = hasEfficiencyConfig ? chunk33T2SZZ2_cjs.checkTrajectoryEfficiency(actualTrajectory, {
       maxSteps: config.maxSteps,
       maxTotalTokens: config.maxTotalTokens,
       maxTotalDurationMs: config.maxTotalDurationMs,
       noRedundantCalls: config.noRedundantCalls ?? true
     }) : void 0;
     const hasBlacklistConfig = config.blacklistedTools && config.blacklistedTools.length > 0 || config.blacklistedSequences && config.blacklistedSequences.length > 0;
-    const blacklist = hasBlacklistConfig ? chunkAY4K3J4R_cjs.checkTrajectoryBlacklist(actualTrajectory, {
+    const blacklist = hasBlacklistConfig ? chunk33T2SZZ2_cjs.checkTrajectoryBlacklist(actualTrajectory, {
       blacklistedTools: config.blacklistedTools,
       blacklistedSequences: config.blacklistedSequences
     }) : void 0;
-    const toolFailures = chunkAY4K3J4R_cjs.analyzeToolFailures(actualTrajectory, {
+    const toolFailures = chunk33T2SZZ2_cjs.analyzeToolFailures(actualTrajectory, {
       maxRetriesPerTool: config.maxRetriesPerTool ?? 2
     });
     const nested = config.steps && config.steps.length > 0 ? evaluateNestedExpectations(config.steps, actualTrajectory.steps, w) : void 0;