@mastra/evals 0.10.5 → 0.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,10 @@
1
1
  'use strict';
2
2
 
3
3
  var chunkCOBCYVZ7_cjs = require('../../chunk-COBCYVZ7.cjs');
4
+ var chunk2JVD5IX6_cjs = require('../../chunk-2JVD5IX6.cjs');
4
5
  var _eval = require('@mastra/core/eval');
5
6
  var zod = require('zod');
6
7
 
7
- // src/metrics/llm/utils.ts
8
- var roundToTwoDecimals = (num) => {
9
- return Math.round((num + Number.EPSILON) * 100) / 100;
10
- };
11
-
12
8
  // src/metrics/llm/answer-relevancy/prompts.ts
13
9
  var ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = `You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.
14
10
 
@@ -187,7 +183,7 @@ function generateReasonPrompt({
187
183
  output,
188
184
  scale
189
185
  }) {
190
- return `Explain the irrelevancy score where 0 is the lowest and ${scale} is the highest for the LLM's response using this context:
186
+ return `Explain the relevancy score where 0 is the lowest and ${scale} is the highest for the LLM's response using this context:
191
187
  Context:
192
188
  Input: ${input}
193
189
  Output: ${output}
@@ -288,7 +284,7 @@ var AnswerRelevancyMetric = class extends _eval.Metric {
288
284
  }
289
285
  }
290
286
  const score = relevancyCount / numberOfVerdicts;
291
- return roundToTwoDecimals(score * this.scale);
287
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
292
288
  }
293
289
  };
294
290
 
@@ -492,7 +488,7 @@ var ContextPositionMetric = class extends _eval.Metric {
492
488
  return 0;
493
489
  }
494
490
  const finalScore = weightedSum / maxPossibleSum * this.scale;
495
- return roundToTwoDecimals(finalScore);
491
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(finalScore);
496
492
  }
497
493
  };
498
494
 
@@ -700,7 +696,7 @@ var ContextPrecisionMetric = class extends _eval.Metric {
700
696
  return 0;
701
697
  }
702
698
  const finalScore = weightedPrecisionSum / relevantCount;
703
- return roundToTwoDecimals(finalScore * this.scale);
699
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(finalScore * this.scale);
704
700
  }
705
701
  };
706
702
 
@@ -938,7 +934,7 @@ var FaithfulnessMetric = class extends _eval.Metric {
938
934
  return 0;
939
935
  }
940
936
  const score = supportedClaims / totalClaims * this.scale;
941
- return roundToTwoDecimals(score);
937
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score);
942
938
  }
943
939
  };
944
940
 
@@ -1155,7 +1151,7 @@ var HallucinationMetric = class extends _eval.Metric {
1155
1151
  return 0;
1156
1152
  }
1157
1153
  const score = contradictedStatements / totalStatements * this.scale;
1158
- return roundToTwoDecimals(score);
1154
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score);
1159
1155
  }
1160
1156
  };
1161
1157
 
@@ -1459,7 +1455,7 @@ var PromptAlignmentMetric = class extends _eval.Metric {
1459
1455
  },
1460
1456
  { naCount: 0, alignmentCount: 0, applicableCount: 0 }
1461
1457
  );
1462
- const score = counts.applicableCount > 0 ? roundToTwoDecimals(counts.alignmentCount / counts.applicableCount * this.scale) : 0;
1458
+ const score = counts.applicableCount > 0 ? chunk2JVD5IX6_cjs.roundToTwoDecimals(counts.alignmentCount / counts.applicableCount * this.scale) : 0;
1463
1459
  return {
1464
1460
  score,
1465
1461
  totalInstructions,
@@ -1619,7 +1615,7 @@ var ToxicityMetric = class extends _eval.Metric {
1619
1615
  }
1620
1616
  }
1621
1617
  const score = toxicityCount / numberOfVerdicts;
1622
- return roundToTwoDecimals(score * this.scale);
1618
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
1623
1619
  }
1624
1620
  };
1625
1621
 
@@ -1795,7 +1791,7 @@ var ContextRelevancyMetric = class extends _eval.Metric {
1795
1791
  }
1796
1792
  const relevantVerdicts = verdicts.filter((v) => v.verdict.toLowerCase() === "yes");
1797
1793
  const score = relevantVerdicts.length / totalVerdicts;
1798
- return roundToTwoDecimals(score * this.scale);
1794
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
1799
1795
  }
1800
1796
  };
1801
1797
 
@@ -1941,7 +1937,7 @@ var ContextualRecallMetric = class extends _eval.Metric {
1941
1937
  }
1942
1938
  const justifiedVerdicts = verdicts.filter((v) => v.verdict === "yes");
1943
1939
  const score = justifiedVerdicts.length / totalVerdicts;
1944
- return roundToTwoDecimals(score * this.scale);
1940
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
1945
1941
  }
1946
1942
  };
1947
1943
 
@@ -2288,7 +2284,7 @@ var SummarizationMetric = class extends _eval.Metric {
2288
2284
  }
2289
2285
  }
2290
2286
  const score = positiveCount / numberOfVerdicts;
2291
- return roundToTwoDecimals(score * this.scale);
2287
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
2292
2288
  }
2293
2289
  };
2294
2290
 
@@ -2466,7 +2462,7 @@ var BiasMetric = class extends _eval.Metric {
2466
2462
  }
2467
2463
  const biasedVerdicts = evaluation.filter((v) => v.verdict.toLowerCase() === "yes");
2468
2464
  const score = biasedVerdicts.length / numberOfVerdicts;
2469
- return roundToTwoDecimals(score * this.scale);
2465
+ return chunk2JVD5IX6_cjs.roundToTwoDecimals(score * this.scale);
2470
2466
  }
2471
2467
  };
2472
2468
 
@@ -1,12 +1,8 @@
1
1
  import { MastraAgentJudge } from '../../chunk-TXXJUIES.js';
2
+ import { roundToTwoDecimals } from '../../chunk-UYXFD4VX.js';
2
3
  import { Metric } from '@mastra/core/eval';
3
4
  import { z } from 'zod';
4
5
 
5
- // src/metrics/llm/utils.ts
6
- var roundToTwoDecimals = (num) => {
7
- return Math.round((num + Number.EPSILON) * 100) / 100;
8
- };
9
-
10
6
  // src/metrics/llm/answer-relevancy/prompts.ts
11
7
  var ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = `You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.
12
8
 
@@ -185,7 +181,7 @@ function generateReasonPrompt({
185
181
  output,
186
182
  scale
187
183
  }) {
188
- return `Explain the irrelevancy score where 0 is the lowest and ${scale} is the highest for the LLM's response using this context:
184
+ return `Explain the relevancy score where 0 is the lowest and ${scale} is the highest for the LLM's response using this context:
189
185
  Context:
190
186
  Input: ${input}
191
187
  Output: ${output}
@@ -0,0 +1,220 @@
1
+ 'use strict';
2
+
3
+ var scores = require('@mastra/core/scores');
4
+ var nlp = require('compromise');
5
+ var difflib = require('difflib');
6
+ var keyword_extractor = require('keyword-extractor');
7
+ var stringSimilarity = require('string-similarity');
8
+
9
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
10
+
11
+ var nlp__default = /*#__PURE__*/_interopDefault(nlp);
12
+ var keyword_extractor__default = /*#__PURE__*/_interopDefault(keyword_extractor);
13
+ var stringSimilarity__default = /*#__PURE__*/_interopDefault(stringSimilarity);
14
+
15
+ function normalizeString(str) {
16
+ return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
17
+ }
18
+ function extractElements(doc) {
19
+ const nouns = doc.nouns().out("array") || [];
20
+ const verbs = doc.verbs().toInfinitive().out("array") || [];
21
+ const topics = doc.topics().out("array") || [];
22
+ const terms = doc.terms().out("array") || [];
23
+ const cleanAndSplitTerm = (term) => {
24
+ const normalized = normalizeString(term);
25
+ return normalized.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]+/g, " ").trim().split(/\s+/).filter((word) => word.length > 0);
26
+ };
27
+ const processedTerms = [
28
+ ...nouns.flatMap(cleanAndSplitTerm),
29
+ ...verbs.flatMap(cleanAndSplitTerm),
30
+ ...topics.flatMap(cleanAndSplitTerm),
31
+ ...terms.flatMap(cleanAndSplitTerm)
32
+ ];
33
+ return [...new Set(processedTerms)];
34
+ }
35
+ function calculateCoverage({ original, simplified }) {
36
+ if (original.length === 0) {
37
+ return simplified.length === 0 ? 1 : 0;
38
+ }
39
+ const covered = original.filter(
40
+ (element) => simplified.some((s) => {
41
+ const elem = normalizeString(element);
42
+ const simp = normalizeString(s);
43
+ if (elem.length <= 3) {
44
+ return elem === simp;
45
+ }
46
+ const longer = elem.length > simp.length ? elem : simp;
47
+ const shorter = elem.length > simp.length ? simp : elem;
48
+ if (longer.includes(shorter)) {
49
+ return shorter.length / longer.length > 0.6;
50
+ }
51
+ return false;
52
+ })
53
+ );
54
+ return covered.length / original.length;
55
+ }
56
+ function createCompletenessScorer() {
57
+ return scores.createScorer({
58
+ name: "Completeness",
59
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
60
+ extract: async (run) => {
61
+ const isInputInvalid = !run.input || run.input.some((i) => i.content === null || i.content === void 0);
62
+ const isOutputInvalid = !run.output || run.output.text === null || run.output.text === void 0;
63
+ if (isInputInvalid || isOutputInvalid) {
64
+ throw new Error("Inputs cannot be null or undefined");
65
+ }
66
+ const input = run.input.map((i) => i.content).join(", ");
67
+ const output = run.output.text;
68
+ const inputToProcess = input;
69
+ const outputToProcess = output;
70
+ const inputDoc = nlp__default.default(inputToProcess.trim());
71
+ const outputDoc = nlp__default.default(outputToProcess.trim());
72
+ const inputElements = extractElements(inputDoc);
73
+ const outputElements = extractElements(outputDoc);
74
+ return {
75
+ result: {
76
+ inputElements,
77
+ outputElements,
78
+ missingElements: inputElements.filter((e) => !outputElements.includes(e)),
79
+ elementCounts: {
80
+ input: inputElements.length,
81
+ output: outputElements.length
82
+ }
83
+ }
84
+ };
85
+ },
86
+ analyze: async (run) => {
87
+ const inputElements = run.extractStepResult?.inputElements;
88
+ const outputElements = run.extractStepResult?.outputElements;
89
+ return {
90
+ score: calculateCoverage({
91
+ original: inputElements,
92
+ simplified: outputElements
93
+ })
94
+ };
95
+ }
96
+ });
97
+ }
98
+ function createTextualDifferenceScorer() {
99
+ return scores.createScorer({
100
+ name: "Completeness",
101
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
102
+ analyze: async (run) => {
103
+ const input = run.input.map((i) => i.content).join(", ");
104
+ const output = run.output.text;
105
+ const matcher = new difflib.SequenceMatcher(null, input, output);
106
+ const ratio = matcher.ratio();
107
+ const ops = matcher.getOpcodes();
108
+ const changes = ops.filter(([op]) => op !== "equal").length;
109
+ const maxLength = Math.max(input.length, output.length);
110
+ const lengthDiff = maxLength > 0 ? Math.abs(input.length - output.length) / maxLength : 0;
111
+ const confidence = 1 - lengthDiff;
112
+ return {
113
+ score: ratio,
114
+ result: {
115
+ confidence,
116
+ changes,
117
+ lengthDiff
118
+ }
119
+ };
120
+ }
121
+ });
122
+ }
123
+ function createKeywordCoverageScorer() {
124
+ return scores.createScorer({
125
+ name: "Completeness",
126
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
127
+ extract: async (run) => {
128
+ const input = run.input.map((i) => i.content).join(", ");
129
+ const output = run.output.text;
130
+ if (!input && !output) {
131
+ return {
132
+ result: {
133
+ referenceKeywords: /* @__PURE__ */ new Set(),
134
+ responseKeywords: /* @__PURE__ */ new Set()
135
+ }
136
+ };
137
+ }
138
+ const extractKeywords = (text) => {
139
+ return keyword_extractor__default.default.extract(text, {
140
+ language: "english",
141
+ remove_digits: true,
142
+ return_changed_case: true,
143
+ remove_duplicates: true
144
+ });
145
+ };
146
+ const referenceKeywords = new Set(extractKeywords(input));
147
+ const responseKeywords = new Set(extractKeywords(output));
148
+ return {
149
+ result: {
150
+ referenceKeywords,
151
+ responseKeywords
152
+ }
153
+ };
154
+ },
155
+ analyze: async (run) => {
156
+ if (!run.extractStepResult?.referenceKeywords.size && !run.extractStepResult?.responseKeywords.size) {
157
+ return {
158
+ score: 1,
159
+ result: {
160
+ totalKeywords: 0,
161
+ matchedKeywords: 0
162
+ }
163
+ };
164
+ }
165
+ const matchedKeywords = [...run.extractStepResult?.referenceKeywords].filter(
166
+ (k) => run.extractStepResult?.responseKeywords.has(k)
167
+ );
168
+ const totalKeywords = run.extractStepResult?.referenceKeywords.size;
169
+ const coverage = totalKeywords > 0 ? matchedKeywords.length / totalKeywords : 0;
170
+ return {
171
+ score: coverage,
172
+ result: {
173
+ totalKeywords: run.extractStepResult?.referenceKeywords.size,
174
+ matchedKeywords: matchedKeywords.length
175
+ }
176
+ };
177
+ }
178
+ });
179
+ }
180
+ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { ignoreCase: true, ignoreWhitespace: true }) {
181
+ return scores.createScorer({
182
+ name: "Completeness",
183
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
184
+ extract: async (run) => {
185
+ let processedInput = run.input.map((i) => i.content).join(", ");
186
+ let processedOutput = run.output.text;
187
+ if (ignoreCase) {
188
+ processedInput = processedInput.toLowerCase();
189
+ processedOutput = processedOutput.toLowerCase();
190
+ }
191
+ if (ignoreWhitespace) {
192
+ processedInput = processedInput.replace(/\s+/g, " ").trim();
193
+ processedOutput = processedOutput.replace(/\s+/g, " ").trim();
194
+ }
195
+ return {
196
+ result: {
197
+ processedInput,
198
+ processedOutput
199
+ }
200
+ };
201
+ },
202
+ analyze: async (run) => {
203
+ const similarity = stringSimilarity__default.default.compareTwoStrings(
204
+ run.extractStepResult?.processedInput,
205
+ run.extractStepResult?.processedOutput
206
+ );
207
+ return {
208
+ score: similarity,
209
+ result: {
210
+ similarity
211
+ }
212
+ };
213
+ }
214
+ });
215
+ }
216
+
217
+ exports.createCompletenessScorer = createCompletenessScorer;
218
+ exports.createContentSimilarityScorer = createContentSimilarityScorer;
219
+ exports.createKeywordCoverageScorer = createKeywordCoverageScorer;
220
+ exports.createTextualDifferenceScorer = createTextualDifferenceScorer;
@@ -0,0 +1,4 @@
1
+ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '../../_tsup-dts-rollup.cjs';
2
+ export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.cjs';
3
+ export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.cjs';
4
+ export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.cjs';
@@ -0,0 +1,4 @@
1
+ export { createCompletenessScorer_alias_1 as createCompletenessScorer } from '../../_tsup-dts-rollup.js';
2
+ export { createTextualDifferenceScorer_alias_1 as createTextualDifferenceScorer } from '../../_tsup-dts-rollup.js';
3
+ export { createKeywordCoverageScorer_alias_1 as createKeywordCoverageScorer } from '../../_tsup-dts-rollup.js';
4
+ export { createContentSimilarityScorer_alias_1 as createContentSimilarityScorer } from '../../_tsup-dts-rollup.js';
@@ -0,0 +1,209 @@
1
+ import { createScorer } from '@mastra/core/scores';
2
+ import nlp from 'compromise';
3
+ import { SequenceMatcher } from 'difflib';
4
+ import keyword_extractor from 'keyword-extractor';
5
+ import stringSimilarity from 'string-similarity';
6
+
7
+ function normalizeString(str) {
8
+ return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
9
+ }
10
+ function extractElements(doc) {
11
+ const nouns = doc.nouns().out("array") || [];
12
+ const verbs = doc.verbs().toInfinitive().out("array") || [];
13
+ const topics = doc.topics().out("array") || [];
14
+ const terms = doc.terms().out("array") || [];
15
+ const cleanAndSplitTerm = (term) => {
16
+ const normalized = normalizeString(term);
17
+ return normalized.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]+/g, " ").trim().split(/\s+/).filter((word) => word.length > 0);
18
+ };
19
+ const processedTerms = [
20
+ ...nouns.flatMap(cleanAndSplitTerm),
21
+ ...verbs.flatMap(cleanAndSplitTerm),
22
+ ...topics.flatMap(cleanAndSplitTerm),
23
+ ...terms.flatMap(cleanAndSplitTerm)
24
+ ];
25
+ return [...new Set(processedTerms)];
26
+ }
27
+ function calculateCoverage({ original, simplified }) {
28
+ if (original.length === 0) {
29
+ return simplified.length === 0 ? 1 : 0;
30
+ }
31
+ const covered = original.filter(
32
+ (element) => simplified.some((s) => {
33
+ const elem = normalizeString(element);
34
+ const simp = normalizeString(s);
35
+ if (elem.length <= 3) {
36
+ return elem === simp;
37
+ }
38
+ const longer = elem.length > simp.length ? elem : simp;
39
+ const shorter = elem.length > simp.length ? simp : elem;
40
+ if (longer.includes(shorter)) {
41
+ return shorter.length / longer.length > 0.6;
42
+ }
43
+ return false;
44
+ })
45
+ );
46
+ return covered.length / original.length;
47
+ }
48
+ function createCompletenessScorer() {
49
+ return createScorer({
50
+ name: "Completeness",
51
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
52
+ extract: async (run) => {
53
+ const isInputInvalid = !run.input || run.input.some((i) => i.content === null || i.content === void 0);
54
+ const isOutputInvalid = !run.output || run.output.text === null || run.output.text === void 0;
55
+ if (isInputInvalid || isOutputInvalid) {
56
+ throw new Error("Inputs cannot be null or undefined");
57
+ }
58
+ const input = run.input.map((i) => i.content).join(", ");
59
+ const output = run.output.text;
60
+ const inputToProcess = input;
61
+ const outputToProcess = output;
62
+ const inputDoc = nlp(inputToProcess.trim());
63
+ const outputDoc = nlp(outputToProcess.trim());
64
+ const inputElements = extractElements(inputDoc);
65
+ const outputElements = extractElements(outputDoc);
66
+ return {
67
+ result: {
68
+ inputElements,
69
+ outputElements,
70
+ missingElements: inputElements.filter((e) => !outputElements.includes(e)),
71
+ elementCounts: {
72
+ input: inputElements.length,
73
+ output: outputElements.length
74
+ }
75
+ }
76
+ };
77
+ },
78
+ analyze: async (run) => {
79
+ const inputElements = run.extractStepResult?.inputElements;
80
+ const outputElements = run.extractStepResult?.outputElements;
81
+ return {
82
+ score: calculateCoverage({
83
+ original: inputElements,
84
+ simplified: outputElements
85
+ })
86
+ };
87
+ }
88
+ });
89
+ }
90
+ function createTextualDifferenceScorer() {
91
+ return createScorer({
92
+ name: "Completeness",
93
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
94
+ analyze: async (run) => {
95
+ const input = run.input.map((i) => i.content).join(", ");
96
+ const output = run.output.text;
97
+ const matcher = new SequenceMatcher(null, input, output);
98
+ const ratio = matcher.ratio();
99
+ const ops = matcher.getOpcodes();
100
+ const changes = ops.filter(([op]) => op !== "equal").length;
101
+ const maxLength = Math.max(input.length, output.length);
102
+ const lengthDiff = maxLength > 0 ? Math.abs(input.length - output.length) / maxLength : 0;
103
+ const confidence = 1 - lengthDiff;
104
+ return {
105
+ score: ratio,
106
+ result: {
107
+ confidence,
108
+ changes,
109
+ lengthDiff
110
+ }
111
+ };
112
+ }
113
+ });
114
+ }
115
+ function createKeywordCoverageScorer() {
116
+ return createScorer({
117
+ name: "Completeness",
118
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
119
+ extract: async (run) => {
120
+ const input = run.input.map((i) => i.content).join(", ");
121
+ const output = run.output.text;
122
+ if (!input && !output) {
123
+ return {
124
+ result: {
125
+ referenceKeywords: /* @__PURE__ */ new Set(),
126
+ responseKeywords: /* @__PURE__ */ new Set()
127
+ }
128
+ };
129
+ }
130
+ const extractKeywords = (text) => {
131
+ return keyword_extractor.extract(text, {
132
+ language: "english",
133
+ remove_digits: true,
134
+ return_changed_case: true,
135
+ remove_duplicates: true
136
+ });
137
+ };
138
+ const referenceKeywords = new Set(extractKeywords(input));
139
+ const responseKeywords = new Set(extractKeywords(output));
140
+ return {
141
+ result: {
142
+ referenceKeywords,
143
+ responseKeywords
144
+ }
145
+ };
146
+ },
147
+ analyze: async (run) => {
148
+ if (!run.extractStepResult?.referenceKeywords.size && !run.extractStepResult?.responseKeywords.size) {
149
+ return {
150
+ score: 1,
151
+ result: {
152
+ totalKeywords: 0,
153
+ matchedKeywords: 0
154
+ }
155
+ };
156
+ }
157
+ const matchedKeywords = [...run.extractStepResult?.referenceKeywords].filter(
158
+ (k) => run.extractStepResult?.responseKeywords.has(k)
159
+ );
160
+ const totalKeywords = run.extractStepResult?.referenceKeywords.size;
161
+ const coverage = totalKeywords > 0 ? matchedKeywords.length / totalKeywords : 0;
162
+ return {
163
+ score: coverage,
164
+ result: {
165
+ totalKeywords: run.extractStepResult?.referenceKeywords.size,
166
+ matchedKeywords: matchedKeywords.length
167
+ }
168
+ };
169
+ }
170
+ });
171
+ }
172
+ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { ignoreCase: true, ignoreWhitespace: true }) {
173
+ return createScorer({
174
+ name: "Completeness",
175
+ description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
176
+ extract: async (run) => {
177
+ let processedInput = run.input.map((i) => i.content).join(", ");
178
+ let processedOutput = run.output.text;
179
+ if (ignoreCase) {
180
+ processedInput = processedInput.toLowerCase();
181
+ processedOutput = processedOutput.toLowerCase();
182
+ }
183
+ if (ignoreWhitespace) {
184
+ processedInput = processedInput.replace(/\s+/g, " ").trim();
185
+ processedOutput = processedOutput.replace(/\s+/g, " ").trim();
186
+ }
187
+ return {
188
+ result: {
189
+ processedInput,
190
+ processedOutput
191
+ }
192
+ };
193
+ },
194
+ analyze: async (run) => {
195
+ const similarity = stringSimilarity.compareTwoStrings(
196
+ run.extractStepResult?.processedInput,
197
+ run.extractStepResult?.processedOutput
198
+ );
199
+ return {
200
+ score: similarity,
201
+ result: {
202
+ similarity
203
+ }
204
+ };
205
+ }
206
+ });
207
+ }
208
+
209
+ export { createCompletenessScorer, createContentSimilarityScorer, createKeywordCoverageScorer, createTextualDifferenceScorer };