@mastra/evals 0.13.8-alpha.1 → 0.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/scorers/code/completeness/index.d.ts +1 -2
- package/dist/scorers/code/completeness/index.d.ts.map +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts +1 -2
- package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
- package/dist/scorers/code/index.cjs +18 -12
- package/dist/scorers/code/index.cjs.map +1 -1
- package/dist/scorers/code/index.js +18 -12
- package/dist/scorers/code/index.js.map +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts +1 -2
- package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts +1 -2
- package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
- package/dist/scorers/code/tone/index.d.ts +1 -2
- package/dist/scorers/code/tone/index.d.ts.map +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -2
- package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -2
- package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-similarity/index.d.ts +1 -2
- package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
- package/dist/scorers/llm/bias/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +1 -2
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +1 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/index.cjs +20 -10
- package/dist/scorers/llm/index.cjs.map +1 -1
- package/dist/scorers/llm/index.js +20 -10
- package/dist/scorers/llm/index.js.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -2
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -2
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +1 -2
- package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
- package/package.json +4 -4
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,gBAAgB,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAAE,KAAK,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAAE;;;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,gBAAgB,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAAE,KAAK,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAAE;;;;;;;6FAgDzG"}
|
|
@@ -8,7 +8,7 @@ export interface ContextPrecisionMetricOptions {
|
|
|
8
8
|
export declare function createContextPrecisionScorer({ model, options, }: {
|
|
9
9
|
model: MastraLanguageModel;
|
|
10
10
|
options: ContextPrecisionMetricOptions;
|
|
11
|
-
}): import("@mastra/core/scores").MastraScorer<
|
|
11
|
+
}): import("@mastra/core/scores").MastraScorer<"Context Precision Scorer", ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
|
|
12
12
|
verdicts: {
|
|
13
13
|
verdict: string;
|
|
14
14
|
reason: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-precision/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAS3F,MAAM,WAAW,6BAA6B;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;CACjG;AAYD,wBAAgB,4BAA4B,CAAC,EAC3C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,6BAA6B,CAAC;CACxC;;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-precision/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAS3F,MAAM,WAAW,6BAA6B;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;CACjG;AAYD,wBAAgB,4BAA4B,CAAC,EAC3C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,6BAA6B,CAAC;CACxC;;;;;;6FAmGA"}
|
|
@@ -13,7 +13,7 @@ export interface ContextRelevanceOptions {
|
|
|
13
13
|
export declare function createContextRelevanceScorerLLM({ model, options, }: {
|
|
14
14
|
model: MastraLanguageModel;
|
|
15
15
|
options: ContextRelevanceOptions;
|
|
16
|
-
}): import("@mastra/core/scores").MastraScorer<
|
|
16
|
+
}): import("@mastra/core/scores").MastraScorer<"Context Relevance (LLM)", ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
|
|
17
17
|
evaluations: {
|
|
18
18
|
reasoning: string;
|
|
19
19
|
context_index: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-relevance/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;IAChG,SAAS,CAAC,EAAE;QACV,0BAA0B,CAAC,EAAE,MAAM,CAAC;QACpC,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,wBAAwB,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;CACH;AAuBD,wBAAgB,+BAA+B,CAAC,EAC9C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,uBAAuB,CAAC;CAClC;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-relevance/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;IAChG,SAAS,CAAC,EAAE;QACV,0BAA0B,CAAC,EAAE,MAAM,CAAC;QACpC,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,wBAAwB,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;CACH;AAuBD,wBAAgB,+BAA+B,CAAC,EAC9C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,uBAAuB,CAAC;CAClC;;;;;;;;;;6FA6IA"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
|
|
3
2
|
export interface FaithfulnessMetricOptions {
|
|
4
3
|
scale?: number;
|
|
5
4
|
context?: string[];
|
|
@@ -7,7 +6,7 @@ export interface FaithfulnessMetricOptions {
|
|
|
7
6
|
export declare function createFaithfulnessScorer({ model, options, }: {
|
|
8
7
|
model: LanguageModel;
|
|
9
8
|
options?: FaithfulnessMetricOptions;
|
|
10
|
-
}): import("@mastra/core/scores").MastraScorer<
|
|
9
|
+
}): import("@mastra/core/scores").MastraScorer<"Faithfulness Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", string[]> & Record<"analyzeStepResult", {
|
|
11
10
|
verdicts: {
|
|
12
11
|
verdict: string;
|
|
13
12
|
reason: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAWtD,MAAM,WAAW,yBAAyB;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAgB,wBAAwB,CAAC,EACvC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACrC;;;;;6FAiEA"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
|
|
3
2
|
export interface HallucinationMetricOptions {
|
|
4
3
|
scale?: number;
|
|
5
4
|
context: string[];
|
|
@@ -7,7 +6,7 @@ export interface HallucinationMetricOptions {
|
|
|
7
6
|
export declare function createHallucinationScorer({ model, options, }: {
|
|
8
7
|
model: LanguageModel;
|
|
9
8
|
options?: HallucinationMetricOptions;
|
|
10
|
-
}): import("@mastra/core/scores").MastraScorer<
|
|
9
|
+
}): import("@mastra/core/scores").MastraScorer<"Hallucination Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
11
10
|
claims: string[];
|
|
12
11
|
}> & Record<"analyzeStepResult", {
|
|
13
12
|
verdicts: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAYtD,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,0BAA0B,CAAC;CACtC;;;;;;;;6FA2DA"}
|
|
@@ -222,7 +222,8 @@ function createAnswerRelevancyScorer({
|
|
|
222
222
|
judge: {
|
|
223
223
|
model,
|
|
224
224
|
instructions: ANSWER_RELEVANCY_AGENT_INSTRUCTIONS
|
|
225
|
-
}
|
|
225
|
+
},
|
|
226
|
+
type: "agent"
|
|
226
227
|
}).preprocess({
|
|
227
228
|
description: "Extract relevant statements from the LLM output",
|
|
228
229
|
outputSchema: extractOutputSchema,
|
|
@@ -438,7 +439,8 @@ function createAnswerSimilarityScorer({
|
|
|
438
439
|
judge: {
|
|
439
440
|
model,
|
|
440
441
|
instructions: ANSWER_SIMILARITY_INSTRUCTIONS
|
|
441
|
-
}
|
|
442
|
+
},
|
|
443
|
+
type: "agent"
|
|
442
444
|
}).preprocess({
|
|
443
445
|
description: "Extract semantic units from output and ground truth",
|
|
444
446
|
outputSchema: extractOutputSchema2,
|
|
@@ -694,7 +696,8 @@ function createFaithfulnessScorer({
|
|
|
694
696
|
judge: {
|
|
695
697
|
model,
|
|
696
698
|
instructions: FAITHFULNESS_AGENT_INSTRUCTIONS
|
|
697
|
-
}
|
|
699
|
+
},
|
|
700
|
+
type: "agent"
|
|
698
701
|
}).preprocess({
|
|
699
702
|
description: "Extract relevant statements from the LLM output",
|
|
700
703
|
outputSchema: zod.z.array(zod.z.string()),
|
|
@@ -851,7 +854,8 @@ function createBiasScorer({ model, options }) {
|
|
|
851
854
|
judge: {
|
|
852
855
|
model,
|
|
853
856
|
instructions: BIAS_AGENT_INSTRUCTIONS
|
|
854
|
-
}
|
|
857
|
+
},
|
|
858
|
+
type: "agent"
|
|
855
859
|
}).preprocess({
|
|
856
860
|
description: "Extract relevant statements from the LLM output",
|
|
857
861
|
outputSchema: zod.z.object({
|
|
@@ -1084,7 +1088,8 @@ function createHallucinationScorer({
|
|
|
1084
1088
|
judge: {
|
|
1085
1089
|
model,
|
|
1086
1090
|
instructions: HALLUCINATION_AGENT_INSTRUCTIONS
|
|
1087
|
-
}
|
|
1091
|
+
},
|
|
1092
|
+
type: "agent"
|
|
1088
1093
|
}).preprocess({
|
|
1089
1094
|
description: "Extract all claims from the given output",
|
|
1090
1095
|
outputSchema: zod.z.object({
|
|
@@ -1222,7 +1227,8 @@ function createToxicityScorer({ model, options }) {
|
|
|
1222
1227
|
judge: {
|
|
1223
1228
|
model,
|
|
1224
1229
|
instructions: TOXICITY_AGENT_INSTRUCTIONS
|
|
1225
|
-
}
|
|
1230
|
+
},
|
|
1231
|
+
type: "agent"
|
|
1226
1232
|
}).analyze({
|
|
1227
1233
|
description: "Score the relevance of the statements to the input",
|
|
1228
1234
|
outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
|
|
@@ -1370,7 +1376,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
|
1370
1376
|
judge: {
|
|
1371
1377
|
model,
|
|
1372
1378
|
instructions: TOOL_SELECTION_ACCURACY_INSTRUCTIONS
|
|
1373
|
-
}
|
|
1379
|
+
},
|
|
1380
|
+
type: "agent"
|
|
1374
1381
|
}).preprocess(async ({ run }) => {
|
|
1375
1382
|
const isInputInvalid = !run.input || !run.input.inputMessages || run.input.inputMessages.length === 0;
|
|
1376
1383
|
const isOutputInvalid = !run.output || run.output.length === 0;
|
|
@@ -1607,7 +1614,8 @@ function createContextRelevanceScorerLLM({
|
|
|
1607
1614
|
judge: {
|
|
1608
1615
|
model,
|
|
1609
1616
|
instructions: CONTEXT_RELEVANCE_INSTRUCTIONS
|
|
1610
|
-
}
|
|
1617
|
+
},
|
|
1618
|
+
type: "agent"
|
|
1611
1619
|
}).analyze({
|
|
1612
1620
|
description: "Analyze the relevance and utility of provided context",
|
|
1613
1621
|
outputSchema: analyzeOutputSchema3,
|
|
@@ -1829,7 +1837,8 @@ function createContextPrecisionScorer({
|
|
|
1829
1837
|
judge: {
|
|
1830
1838
|
model,
|
|
1831
1839
|
instructions: CONTEXT_PRECISION_AGENT_INSTRUCTIONS
|
|
1832
|
-
}
|
|
1840
|
+
},
|
|
1841
|
+
type: "agent"
|
|
1833
1842
|
}).analyze({
|
|
1834
1843
|
description: "Evaluate the relevance of each context piece for generating the expected output",
|
|
1835
1844
|
outputSchema: contextRelevanceOutputSchema,
|
|
@@ -2120,7 +2129,8 @@ function createNoiseSensitivityScorerLLM({
|
|
|
2120
2129
|
judge: {
|
|
2121
2130
|
model,
|
|
2122
2131
|
instructions: NOISE_SENSITIVITY_INSTRUCTIONS
|
|
2123
|
-
}
|
|
2132
|
+
},
|
|
2133
|
+
type: "agent"
|
|
2124
2134
|
}).analyze({
|
|
2125
2135
|
description: "Analyze the impact of noise on agent response quality",
|
|
2126
2136
|
outputSchema: analyzeOutputSchema4,
|