@mastra/evals 0.10.8-alpha.0 → 0.10.8-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -405,7 +405,10 @@ export { createTextualDifferenceScorer }
405
405
  export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_1 }
406
406
  export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_2 }
407
407
 
408
- export declare function createToneScorer(): MastraScorer;
408
+ declare function createToneScorer(): MastraScorer;
409
+ export { createToneScorer }
410
+ export { createToneScorer as createToneScorer_alias_1 }
411
+ export { createToneScorer as createToneScorer_alias_2 }
409
412
 
410
413
  export declare function createToxicityAnalyzePrompt({ input, output }: {
411
414
  input: string;
@@ -405,7 +405,10 @@ export { createTextualDifferenceScorer }
405
405
  export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_1 }
406
406
  export { createTextualDifferenceScorer as createTextualDifferenceScorer_alias_2 }
407
407
 
408
- export declare function createToneScorer(): MastraScorer;
408
+ declare function createToneScorer(): MastraScorer;
409
+ export { createToneScorer }
410
+ export { createToneScorer as createToneScorer_alias_1 }
411
+ export { createToneScorer as createToneScorer_alias_2 }
409
412
 
410
413
  export declare function createToxicityAnalyzePrompt({ input, output }: {
411
414
  input: string;
@@ -5,12 +5,14 @@ var nlp = require('compromise');
5
5
  var difflib = require('difflib');
6
6
  var keyword_extractor = require('keyword-extractor');
7
7
  var stringSimilarity = require('string-similarity');
8
+ var Sentiment = require('sentiment');
8
9
 
9
10
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
10
11
 
11
12
  var nlp__default = /*#__PURE__*/_interopDefault(nlp);
12
13
  var keyword_extractor__default = /*#__PURE__*/_interopDefault(keyword_extractor);
13
14
  var stringSimilarity__default = /*#__PURE__*/_interopDefault(stringSimilarity);
15
+ var Sentiment__default = /*#__PURE__*/_interopDefault(Sentiment);
14
16
 
15
17
  function normalizeString(str) {
16
18
  return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
@@ -63,7 +65,7 @@ function createCompletenessScorer() {
63
65
  if (isInputInvalid || isOutputInvalid) {
64
66
  throw new Error("Inputs cannot be null or undefined");
65
67
  }
66
- const input = run.input.map((i) => i.content).join(", ");
68
+ const input = run.input?.map((i) => i.content).join(", ") || "";
67
69
  const output = run.output.text;
68
70
  const inputToProcess = input;
69
71
  const outputToProcess = output;
@@ -100,7 +102,7 @@ function createTextualDifferenceScorer() {
100
102
  name: "Completeness",
101
103
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
102
104
  analyze: async (run) => {
103
- const input = run.input.map((i) => i.content).join(", ");
105
+ const input = run.input?.map((i) => i.content).join(", ") || "";
104
106
  const output = run.output.text;
105
107
  const matcher = new difflib.SequenceMatcher(null, input, output);
106
108
  const ratio = matcher.ratio();
@@ -125,7 +127,7 @@ function createKeywordCoverageScorer() {
125
127
  name: "Completeness",
126
128
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
127
129
  extract: async (run) => {
128
- const input = run.input.map((i) => i.content).join(", ");
130
+ const input = run.input?.map((i) => i.content).join(", ") || "";
129
131
  const output = run.output.text;
130
132
  if (!input && !output) {
131
133
  return {
@@ -182,7 +184,7 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
182
184
  name: "Completeness",
183
185
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
184
186
  extract: async (run) => {
185
- let processedInput = run.input.map((i) => i.content).join(", ");
187
+ let processedInput = run.input?.map((i) => i.content).join(", ") || "";
186
188
  let processedOutput = run.output.text;
187
189
  if (ignoreCase) {
188
190
  processedInput = processedInput.toLowerCase();
@@ -213,8 +215,46 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
213
215
  }
214
216
  });
215
217
  }
218
+ function createToneScorer() {
219
+ return scores.createScorer({
220
+ name: "Completeness",
221
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
222
+ analyze: async (run) => {
223
+ const sentiment = new Sentiment__default.default();
224
+ const input = run.input?.map((i) => i.content).join(", ") || "";
225
+ const output = run.output.text;
226
+ const responseSentiment = sentiment.analyze(input);
227
+ if (output) {
228
+ const referenceSentiment = sentiment.analyze(output);
229
+ const sentimentDiff = Math.abs(responseSentiment.comparative - referenceSentiment.comparative);
230
+ const normalizedScore = Math.max(0, 1 - sentimentDiff);
231
+ return {
232
+ score: normalizedScore,
233
+ result: {
234
+ responseSentiment: responseSentiment.comparative,
235
+ referenceSentiment: referenceSentiment.comparative,
236
+ difference: sentimentDiff
237
+ }
238
+ };
239
+ }
240
+ const sentences = input.match(/[^.!?]+[.!?]+/g) || [input];
241
+ const sentiments = sentences.map((s) => sentiment.analyze(s).comparative);
242
+ const avgSentiment = sentiments.reduce((a, b) => a + b, 0) / sentiments.length;
243
+ const variance = sentiments.reduce((sum, s) => sum + Math.pow(s - avgSentiment, 2), 0) / sentiments.length;
244
+ const stability = Math.max(0, 1 - variance);
245
+ return {
246
+ score: stability,
247
+ result: {
248
+ avgSentiment,
249
+ sentimentVariance: variance
250
+ }
251
+ };
252
+ }
253
+ });
254
+ }
216
255
 
217
256
  exports.createCompletenessScorer = createCompletenessScorer;
218
257
  exports.createContentSimilarityScorer = createContentSimilarityScorer;
219
258
  exports.createKeywordCoverageScorer = createKeywordCoverageScorer;
220
259
  exports.createTextualDifferenceScorer = createTextualDifferenceScorer;
260
+ exports.createToneScorer = createToneScorer;
@@ -2,3 +2,4 @@ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '..
2
2
  export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.cjs';
3
3
  export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.cjs';
4
4
  export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.cjs';
5
+ export { createToneScorer_alias_1 as createToneScorer } from '../../_tsup-dts-rollup.cjs';
@@ -2,3 +2,4 @@ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '..
2
2
  export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.js';
3
3
  export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.js';
4
4
  export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.js';
5
+ export { createToneScorer_alias_1 as createToneScorer } from '../../_tsup-dts-rollup.js';
@@ -3,6 +3,7 @@ import nlp from 'compromise';
3
3
  import { SequenceMatcher } from 'difflib';
4
4
  import keyword_extractor from 'keyword-extractor';
5
5
  import stringSimilarity from 'string-similarity';
6
+ import Sentiment from 'sentiment';
6
7
 
7
8
  function normalizeString(str) {
8
9
  return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
@@ -55,7 +56,7 @@ function createCompletenessScorer() {
55
56
  if (isInputInvalid || isOutputInvalid) {
56
57
  throw new Error("Inputs cannot be null or undefined");
57
58
  }
58
- const input = run.input.map((i) => i.content).join(", ");
59
+ const input = run.input?.map((i) => i.content).join(", ") || "";
59
60
  const output = run.output.text;
60
61
  const inputToProcess = input;
61
62
  const outputToProcess = output;
@@ -92,7 +93,7 @@ function createTextualDifferenceScorer() {
92
93
  name: "Completeness",
93
94
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
94
95
  analyze: async (run) => {
95
- const input = run.input.map((i) => i.content).join(", ");
96
+ const input = run.input?.map((i) => i.content).join(", ") || "";
96
97
  const output = run.output.text;
97
98
  const matcher = new SequenceMatcher(null, input, output);
98
99
  const ratio = matcher.ratio();
@@ -117,7 +118,7 @@ function createKeywordCoverageScorer() {
117
118
  name: "Completeness",
118
119
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
119
120
  extract: async (run) => {
120
- const input = run.input.map((i) => i.content).join(", ");
121
+ const input = run.input?.map((i) => i.content).join(", ") || "";
121
122
  const output = run.output.text;
122
123
  if (!input && !output) {
123
124
  return {
@@ -174,7 +175,7 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
174
175
  name: "Completeness",
175
176
  description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
176
177
  extract: async (run) => {
177
- let processedInput = run.input.map((i) => i.content).join(", ");
178
+ let processedInput = run.input?.map((i) => i.content).join(", ") || "";
178
179
  let processedOutput = run.output.text;
179
180
  if (ignoreCase) {
180
181
  processedInput = processedInput.toLowerCase();
@@ -205,5 +206,42 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
205
206
  }
206
207
  });
207
208
  }
209
+ function createToneScorer() {
210
+ return createScorer({
211
+ name: "Completeness",
212
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
213
+ analyze: async (run) => {
214
+ const sentiment = new Sentiment();
215
+ const input = run.input?.map((i) => i.content).join(", ") || "";
216
+ const output = run.output.text;
217
+ const responseSentiment = sentiment.analyze(input);
218
+ if (output) {
219
+ const referenceSentiment = sentiment.analyze(output);
220
+ const sentimentDiff = Math.abs(responseSentiment.comparative - referenceSentiment.comparative);
221
+ const normalizedScore = Math.max(0, 1 - sentimentDiff);
222
+ return {
223
+ score: normalizedScore,
224
+ result: {
225
+ responseSentiment: responseSentiment.comparative,
226
+ referenceSentiment: referenceSentiment.comparative,
227
+ difference: sentimentDiff
228
+ }
229
+ };
230
+ }
231
+ const sentences = input.match(/[^.!?]+[.!?]+/g) || [input];
232
+ const sentiments = sentences.map((s) => sentiment.analyze(s).comparative);
233
+ const avgSentiment = sentiments.reduce((a, b) => a + b, 0) / sentiments.length;
234
+ const variance = sentiments.reduce((sum, s) => sum + Math.pow(s - avgSentiment, 2), 0) / sentiments.length;
235
+ const stability = Math.max(0, 1 - variance);
236
+ return {
237
+ score: stability,
238
+ result: {
239
+ avgSentiment,
240
+ sentimentVariance: variance
241
+ }
242
+ };
243
+ }
244
+ });
245
+ }
208
246
 
209
- export { createCompletenessScorer, createContentSimilarityScorer, createKeywordCoverageScorer, createTextualDifferenceScorer };
247
+ export { createCompletenessScorer, createContentSimilarityScorer, createKeywordCoverageScorer, createTextualDifferenceScorer, createToneScorer };
@@ -243,7 +243,7 @@ function createAnswerRelevancyScorer({
243
243
  description: "Reason about the results",
244
244
  createPrompt: ({ run }) => {
245
245
  return createReasonPrompt({
246
- input: run.input.map((input) => input.content).join(", "),
246
+ input: run.input?.map((input) => input.content).join(", ") || "",
247
247
  output: run.output.text,
248
248
  score: run.score,
249
249
  results: run.analyzeStepResult.results,
@@ -477,7 +477,7 @@ function createFaithfulnessScorer({
477
477
  description: "Reason about the results",
478
478
  createPrompt: ({ run }) => {
479
479
  const prompt = createFaithfulnessReasonPrompt({
480
- input: run.input.map((input) => input.content).join(", "),
480
+ input: run.input?.map((input) => input.content).join(", ") || "",
481
481
  output: run.output.text,
482
482
  context: options?.context || [],
483
483
  score: run.score,
@@ -885,7 +885,7 @@ function createHallucinationScorer({
885
885
  description: "Reason about the results",
886
886
  createPrompt: ({ run }) => {
887
887
  const prompt = createHallucinationReasonPrompt({
888
- input: run.input.map((input) => input.content).join(", "),
888
+ input: run.input?.map((input) => input.content).join(", ") || "",
889
889
  output: run.output.text,
890
890
  context: run?.additionalContext?.context || [],
891
891
  score: run.score,
@@ -999,7 +999,7 @@ function createToxicityScorer({ model, options }) {
999
999
  outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
1000
1000
  createPrompt: ({ run }) => {
1001
1001
  const prompt = createToxicityAnalyzePrompt({
1002
- input: run.input.map((input) => input.content).join(", "),
1002
+ input: run.input?.map((input) => input.content).join(", ") || "",
1003
1003
  output: run.output.text
1004
1004
  });
1005
1005
  return prompt;
@@ -241,7 +241,7 @@ function createAnswerRelevancyScorer({
241
241
  description: "Reason about the results",
242
242
  createPrompt: ({ run }) => {
243
243
  return createReasonPrompt({
244
- input: run.input.map((input) => input.content).join(", "),
244
+ input: run.input?.map((input) => input.content).join(", ") || "",
245
245
  output: run.output.text,
246
246
  score: run.score,
247
247
  results: run.analyzeStepResult.results,
@@ -475,7 +475,7 @@ function createFaithfulnessScorer({
475
475
  description: "Reason about the results",
476
476
  createPrompt: ({ run }) => {
477
477
  const prompt = createFaithfulnessReasonPrompt({
478
- input: run.input.map((input) => input.content).join(", "),
478
+ input: run.input?.map((input) => input.content).join(", ") || "",
479
479
  output: run.output.text,
480
480
  context: options?.context || [],
481
481
  score: run.score,
@@ -883,7 +883,7 @@ function createHallucinationScorer({
883
883
  description: "Reason about the results",
884
884
  createPrompt: ({ run }) => {
885
885
  const prompt = createHallucinationReasonPrompt({
886
- input: run.input.map((input) => input.content).join(", "),
886
+ input: run.input?.map((input) => input.content).join(", ") || "",
887
887
  output: run.output.text,
888
888
  context: run?.additionalContext?.context || [],
889
889
  score: run.score,
@@ -997,7 +997,7 @@ function createToxicityScorer({ model, options }) {
997
997
  outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
998
998
  createPrompt: ({ run }) => {
999
999
  const prompt = createToxicityAnalyzePrompt({
1000
- input: run.input.map((input) => input.content).join(", "),
1000
+ input: run.input?.map((input) => input.content).join(", ") || "",
1001
1001
  output: run.output.text
1002
1002
  });
1003
1003
  return prompt;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "0.10.8-alpha.0",
3
+ "version": "0.10.8-alpha.1",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "files": [
@@ -101,7 +101,7 @@
101
101
  "typescript": "^5.8.3",
102
102
  "vitest": "^3.2.4",
103
103
  "@internal/lint": "0.0.23",
104
- "@mastra/core": "0.12.0-alpha.0"
104
+ "@mastra/core": "0.12.0-alpha.1"
105
105
  },
106
106
  "scripts": {
107
107
  "check": "tsc --noEmit",