@mastra/evals 0.13.8-alpha.1 → 0.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/scorers/code/completeness/index.d.ts +1 -2
- package/dist/scorers/code/completeness/index.d.ts.map +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts +1 -2
- package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
- package/dist/scorers/code/index.cjs +18 -12
- package/dist/scorers/code/index.cjs.map +1 -1
- package/dist/scorers/code/index.js +18 -12
- package/dist/scorers/code/index.js.map +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts +1 -2
- package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts +1 -2
- package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
- package/dist/scorers/code/tone/index.d.ts +1 -2
- package/dist/scorers/code/tone/index.d.ts.map +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -2
- package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -2
- package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-similarity/index.d.ts +1 -2
- package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
- package/dist/scorers/llm/bias/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +1 -2
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +1 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/index.cjs +20 -10
- package/dist/scorers/llm/index.cjs.map +1 -1
- package/dist/scorers/llm/index.js +20 -10
- package/dist/scorers/llm/index.js.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -2
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -2
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +1 -2
- package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
- package/package.json +4 -4
|
@@ -220,7 +220,8 @@ function createAnswerRelevancyScorer({
|
|
|
220
220
|
judge: {
|
|
221
221
|
model,
|
|
222
222
|
instructions: ANSWER_RELEVANCY_AGENT_INSTRUCTIONS
|
|
223
|
-
}
|
|
223
|
+
},
|
|
224
|
+
type: "agent"
|
|
224
225
|
}).preprocess({
|
|
225
226
|
description: "Extract relevant statements from the LLM output",
|
|
226
227
|
outputSchema: extractOutputSchema,
|
|
@@ -436,7 +437,8 @@ function createAnswerSimilarityScorer({
|
|
|
436
437
|
judge: {
|
|
437
438
|
model,
|
|
438
439
|
instructions: ANSWER_SIMILARITY_INSTRUCTIONS
|
|
439
|
-
}
|
|
440
|
+
},
|
|
441
|
+
type: "agent"
|
|
440
442
|
}).preprocess({
|
|
441
443
|
description: "Extract semantic units from output and ground truth",
|
|
442
444
|
outputSchema: extractOutputSchema2,
|
|
@@ -692,7 +694,8 @@ function createFaithfulnessScorer({
|
|
|
692
694
|
judge: {
|
|
693
695
|
model,
|
|
694
696
|
instructions: FAITHFULNESS_AGENT_INSTRUCTIONS
|
|
695
|
-
}
|
|
697
|
+
},
|
|
698
|
+
type: "agent"
|
|
696
699
|
}).preprocess({
|
|
697
700
|
description: "Extract relevant statements from the LLM output",
|
|
698
701
|
outputSchema: z.array(z.string()),
|
|
@@ -849,7 +852,8 @@ function createBiasScorer({ model, options }) {
|
|
|
849
852
|
judge: {
|
|
850
853
|
model,
|
|
851
854
|
instructions: BIAS_AGENT_INSTRUCTIONS
|
|
852
|
-
}
|
|
855
|
+
},
|
|
856
|
+
type: "agent"
|
|
853
857
|
}).preprocess({
|
|
854
858
|
description: "Extract relevant statements from the LLM output",
|
|
855
859
|
outputSchema: z.object({
|
|
@@ -1082,7 +1086,8 @@ function createHallucinationScorer({
|
|
|
1082
1086
|
judge: {
|
|
1083
1087
|
model,
|
|
1084
1088
|
instructions: HALLUCINATION_AGENT_INSTRUCTIONS
|
|
1085
|
-
}
|
|
1089
|
+
},
|
|
1090
|
+
type: "agent"
|
|
1086
1091
|
}).preprocess({
|
|
1087
1092
|
description: "Extract all claims from the given output",
|
|
1088
1093
|
outputSchema: z.object({
|
|
@@ -1220,7 +1225,8 @@ function createToxicityScorer({ model, options }) {
|
|
|
1220
1225
|
judge: {
|
|
1221
1226
|
model,
|
|
1222
1227
|
instructions: TOXICITY_AGENT_INSTRUCTIONS
|
|
1223
|
-
}
|
|
1228
|
+
},
|
|
1229
|
+
type: "agent"
|
|
1224
1230
|
}).analyze({
|
|
1225
1231
|
description: "Score the relevance of the statements to the input",
|
|
1226
1232
|
outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
|
|
@@ -1368,7 +1374,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
|
1368
1374
|
judge: {
|
|
1369
1375
|
model,
|
|
1370
1376
|
instructions: TOOL_SELECTION_ACCURACY_INSTRUCTIONS
|
|
1371
|
-
}
|
|
1377
|
+
},
|
|
1378
|
+
type: "agent"
|
|
1372
1379
|
}).preprocess(async ({ run }) => {
|
|
1373
1380
|
const isInputInvalid = !run.input || !run.input.inputMessages || run.input.inputMessages.length === 0;
|
|
1374
1381
|
const isOutputInvalid = !run.output || run.output.length === 0;
|
|
@@ -1605,7 +1612,8 @@ function createContextRelevanceScorerLLM({
|
|
|
1605
1612
|
judge: {
|
|
1606
1613
|
model,
|
|
1607
1614
|
instructions: CONTEXT_RELEVANCE_INSTRUCTIONS
|
|
1608
|
-
}
|
|
1615
|
+
},
|
|
1616
|
+
type: "agent"
|
|
1609
1617
|
}).analyze({
|
|
1610
1618
|
description: "Analyze the relevance and utility of provided context",
|
|
1611
1619
|
outputSchema: analyzeOutputSchema3,
|
|
@@ -1827,7 +1835,8 @@ function createContextPrecisionScorer({
|
|
|
1827
1835
|
judge: {
|
|
1828
1836
|
model,
|
|
1829
1837
|
instructions: CONTEXT_PRECISION_AGENT_INSTRUCTIONS
|
|
1830
|
-
}
|
|
1838
|
+
},
|
|
1839
|
+
type: "agent"
|
|
1831
1840
|
}).analyze({
|
|
1832
1841
|
description: "Evaluate the relevance of each context piece for generating the expected output",
|
|
1833
1842
|
outputSchema: contextRelevanceOutputSchema,
|
|
@@ -2118,7 +2127,8 @@ function createNoiseSensitivityScorerLLM({
|
|
|
2118
2127
|
judge: {
|
|
2119
2128
|
model,
|
|
2120
2129
|
instructions: NOISE_SENSITIVITY_INSTRUCTIONS
|
|
2121
|
-
}
|
|
2130
|
+
},
|
|
2131
|
+
type: "agent"
|
|
2122
2132
|
}).analyze({
|
|
2123
2133
|
description: "Analyze the impact of noise on agent response quality",
|
|
2124
2134
|
outputSchema: analyzeOutputSchema4,
|