npm - @mastra/evals - Versions diffs - 0.10.8-alpha.0 → 0.11.0-alpha.2 - Mend

@mastra/evals 0.10.8-alpha.0 → 0.11.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/_tsup-dts-rollup.d.cts +4 -1
package/dist/_tsup-dts-rollup.d.ts +4 -1
package/dist/scorers/code/index.cjs +44 -4
package/dist/scorers/code/index.d.cts +1 -0
package/dist/scorers/code/index.d.ts +1 -0
package/dist/scorers/code/index.js +43 -5
package/dist/scorers/llm/index.cjs +4 -4
package/dist/scorers/llm/index.js +4 -4
package/package.json +3 -3

package/dist/_tsup-dts-rollup.d.cts CHANGED Viewed

@@ -405,7 +405,10 @@ export { createTextualDifferenceScorer }
 export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_1 }
 export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_2 }
-export declare function createToneScorer(): MastraScorer;
+declare function createToneScorer(): MastraScorer;
+export { createToneScorer }
+export { createToneScorer as createToneScorer_alias_1 }
+export { createToneScorer as createToneScorer_alias_2 }
 export declare function createToxicityAnalyzePrompt({ input, output }: {
     input: string;

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -405,7 +405,10 @@ export { createTextualDifferenceScorer }
 export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_1 }
 export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_2 }
-export declare function createToneScorer(): MastraScorer;
+declare function createToneScorer(): MastraScorer;
+export { createToneScorer }
+export { createToneScorer as createToneScorer_alias_1 }
+export { createToneScorer as createToneScorer_alias_2 }
 export declare function createToxicityAnalyzePrompt({ input, output }: {
     input: string;

package/dist/scorers/code/index.cjs CHANGED Viewed

@@ -5,12 +5,14 @@ var nlp = require('compromise');
 var difflib = require('difflib');
 var keyword_extractor = require('keyword-extractor');
 var stringSimilarity = require('string-similarity');
+var Sentiment = require('sentiment');
 function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
 var nlp__default = /*#__PURE__*/_interopDefault(nlp);
 var keyword_extractor__default = /*#__PURE__*/_interopDefault(keyword_extractor);
 var stringSimilarity__default = /*#__PURE__*/_interopDefault(stringSimilarity);
+var Sentiment__default = /*#__PURE__*/_interopDefault(Sentiment);
 function normalizeString(str) {
   return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
@@ -63,7 +65,7 @@ function createCompletenessScorer() {
       if (isInputInvalid || isOutputInvalid) {
         throw new Error("Inputs cannot be null or undefined");
       }
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       const inputToProcess = input;
       const outputToProcess = output;
@@ -100,7 +102,7 @@ function createTextualDifferenceScorer() {
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     analyze: async (run) => {
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       const matcher = new difflib.SequenceMatcher(null, input, output);
       const ratio = matcher.ratio();
@@ -125,7 +127,7 @@ function createKeywordCoverageScorer() {
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     extract: async (run) => {
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       if (!input && !output) {
         return {
@@ -182,7 +184,7 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     extract: async (run) => {
-      let processedInput = run.input.map((i) => i.content).join(", ");
+      let processedInput = run.input?.map((i) => i.content).join(", ") || "";
       let processedOutput = run.output.text;
       if (ignoreCase) {
         processedInput = processedInput.toLowerCase();
@@ -213,8 +215,46 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
     }
   });
 }
+function createToneScorer() {
+  return scores.createScorer({
+    name: "Completeness",
+    description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
+    analyze: async (run) => {
+      const sentiment = new Sentiment__default.default();
+      const input = run.input?.map((i) => i.content).join(", ") || "";
+      const output = run.output.text;
+      const responseSentiment = sentiment.analyze(input);
+      if (output) {
+        const referenceSentiment = sentiment.analyze(output);
+        const sentimentDiff = Math.abs(responseSentiment.comparative - referenceSentiment.comparative);
+        const normalizedScore = Math.max(0, 1 - sentimentDiff);
+        return {
+          score: normalizedScore,
+          result: {
+            responseSentiment: responseSentiment.comparative,
+            referenceSentiment: referenceSentiment.comparative,
+            difference: sentimentDiff
+          }
+        };
+      }
+      const sentences = input.match(/[^.!?]+[.!?]+/g) || [input];
+      const sentiments = sentences.map((s) => sentiment.analyze(s).comparative);
+      const avgSentiment = sentiments.reduce((a, b) => a + b, 0) / sentiments.length;
+      const variance = sentiments.reduce((sum, s) => sum + Math.pow(s - avgSentiment, 2), 0) / sentiments.length;
+      const stability = Math.max(0, 1 - variance);
+      return {
+        score: stability,
+        result: {
+          avgSentiment,
+          sentimentVariance: variance
+        }
+      };
+    }
+  });
+}
 exports.createCompletenessScorer = createCompletenessScorer;
 exports.createContentSimilarityScorer = createContentSimilarityScorer;
 exports.createKeywordCoverageScorer = createKeywordCoverageScorer;
 exports.createTextualDifferenceScorer = createTextualDifferenceScorer;
+exports.createToneScorer = createToneScorer;

package/dist/scorers/code/index.d.cts CHANGED Viewed

@@ -2,3 +2,4 @@ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '..
 export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.cjs';
 export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.cjs';
 export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.cjs';
+export { createToneScorer_alias_1 as createToneScorer } from '../../_tsup-dts-rollup.cjs';

package/dist/scorers/code/index.d.ts CHANGED Viewed

@@ -2,3 +2,4 @@ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '..
 export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.js';
 export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.js';
 export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.js';
+export { createToneScorer_alias_1 as createToneScorer } from '../../_tsup-dts-rollup.js';

package/dist/scorers/code/index.js CHANGED Viewed

@@ -3,6 +3,7 @@ import nlp from 'compromise';
 import { SequenceMatcher } from 'difflib';
 import keyword_extractor from 'keyword-extractor';
 import stringSimilarity from 'string-similarity';
+import Sentiment from 'sentiment';
 function normalizeString(str) {
   return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
@@ -55,7 +56,7 @@ function createCompletenessScorer() {
       if (isInputInvalid || isOutputInvalid) {
         throw new Error("Inputs cannot be null or undefined");
       }
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       const inputToProcess = input;
       const outputToProcess = output;
@@ -92,7 +93,7 @@ function createTextualDifferenceScorer() {
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     analyze: async (run) => {
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       const matcher = new SequenceMatcher(null, input, output);
       const ratio = matcher.ratio();
@@ -117,7 +118,7 @@ function createKeywordCoverageScorer() {
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     extract: async (run) => {
-      const input = run.input.map((i) => i.content).join(", ");
+      const input = run.input?.map((i) => i.content).join(", ") || "";
       const output = run.output.text;
       if (!input && !output) {
         return {
@@ -174,7 +175,7 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
     name: "Completeness",
     description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
     extract: async (run) => {
-      let processedInput = run.input.map((i) => i.content).join(", ");
+      let processedInput = run.input?.map((i) => i.content).join(", ") || "";
       let processedOutput = run.output.text;
       if (ignoreCase) {
         processedInput = processedInput.toLowerCase();
@@ -205,5 +206,42 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
     }
   });
 }
+function createToneScorer() {
+  return createScorer({
+    name: "Completeness",
+    description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
+    analyze: async (run) => {
+      const sentiment = new Sentiment();
+      const input = run.input?.map((i) => i.content).join(", ") || "";
+      const output = run.output.text;
+      const responseSentiment = sentiment.analyze(input);
+      if (output) {
+        const referenceSentiment = sentiment.analyze(output);
+        const sentimentDiff = Math.abs(responseSentiment.comparative - referenceSentiment.comparative);
+        const normalizedScore = Math.max(0, 1 - sentimentDiff);
+        return {
+          score: normalizedScore,
+          result: {
+            responseSentiment: responseSentiment.comparative,
+            referenceSentiment: referenceSentiment.comparative,
+            difference: sentimentDiff
+          }
+        };
+      }
+      const sentences = input.match(/[^.!?]+[.!?]+/g) || [input];
+      const sentiments = sentences.map((s) => sentiment.analyze(s).comparative);
+      const avgSentiment = sentiments.reduce((a, b) => a + b, 0) / sentiments.length;
+      const variance = sentiments.reduce((sum, s) => sum + Math.pow(s - avgSentiment, 2), 0) / sentiments.length;
+      const stability = Math.max(0, 1 - variance);
+      return {
+        score: stability,
+        result: {
+          avgSentiment,
+          sentimentVariance: variance
+        }
+      };
+    }
+  });
+}
-export { createCompletenessScorer, createContentSimilarityScorer, createKeywordCoverageScorer, createTextualDifferenceScorer };
+export { createCompletenessScorer, createContentSimilarityScorer, createKeywordCoverageScorer, createTextualDifferenceScorer, createToneScorer };

package/dist/scorers/llm/index.cjs CHANGED Viewed

@@ -243,7 +243,7 @@ function createAnswerRelevancyScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         return createReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           score: run.score,
           results: run.analyzeStepResult.results,
@@ -477,7 +477,7 @@ function createFaithfulnessScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         const prompt = createFaithfulnessReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           context: options?.context || [],
           score: run.score,
@@ -885,7 +885,7 @@ function createHallucinationScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         const prompt = createHallucinationReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           context: run?.additionalContext?.context || [],
           score: run.score,
@@ -999,7 +999,7 @@ function createToxicityScorer({ model, options }) {
       outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createToxicityAnalyzePrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text
         });
         return prompt;

package/dist/scorers/llm/index.js CHANGED Viewed

@@ -241,7 +241,7 @@ function createAnswerRelevancyScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         return createReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           score: run.score,
           results: run.analyzeStepResult.results,
@@ -475,7 +475,7 @@ function createFaithfulnessScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         const prompt = createFaithfulnessReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           context: options?.context || [],
           score: run.score,
@@ -883,7 +883,7 @@ function createHallucinationScorer({
       description: "Reason about the results",
       createPrompt: ({ run }) => {
         const prompt = createHallucinationReasonPrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text,
           context: run?.additionalContext?.context || [],
           score: run.score,
@@ -997,7 +997,7 @@ function createToxicityScorer({ model, options }) {
       outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createToxicityAnalyzePrompt({
-          input: run.input.map((input) => input.content).join(", "),
+          input: run.input?.map((input) => input.content).join(", ") || "",
           output: run.output.text
         });
         return prompt;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/evals",
-  "version": "0.10.8-alpha.0",
+  "version": "0.11.0-alpha.2",
   "description": "",
   "type": "module",
   "files": [
@@ -84,7 +84,7 @@
     "zod": "^3.25.67"
   },
   "peerDependencies": {
-    "@mastra/core": ">=0.11.0-0 <0.12.0-0",
+    "@mastra/core": ">=0.12.0-0 <0.13.0-0",
     "ai": "^4.0.0"
   },
   "devDependencies": {
@@ -101,7 +101,7 @@
     "typescript": "^5.8.3",
     "vitest": "^3.2.4",
     "@internal/lint": "0.0.23",
-    "@mastra/core": "0.12.0-alpha.0"
+    "@mastra/core": "0.12.0-alpha.5"
   },
   "scripts": {
     "check": "tsc --noEmit",