@mastra/evals 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/dist/{chunk-6EA6D7JG.js → chunk-OEOE7ZHN.js} +21 -3
- package/dist/chunk-OEOE7ZHN.js.map +1 -0
- package/dist/{chunk-DSXZHUHI.cjs → chunk-W3U7MMDX.cjs} +21 -2
- package/dist/chunk-W3U7MMDX.cjs.map +1 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/evals/03-reference.md +84 -10
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/index.d.ts +1 -0
- package/dist/scorers/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +19 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/prebuilt/index.cjs +75 -63
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +17 -5
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/dist/scorers/utils.cjs +20 -16
- package/dist/scorers/utils.d.ts +39 -0
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +4 -4
- package/dist/chunk-6EA6D7JG.js.map +0 -1
- package/dist/chunk-DSXZHUHI.cjs.map +0 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage } from '../../chunk-
|
|
1
|
+
import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage } from '../../chunk-OEOE7ZHN.js';
|
|
2
2
|
import { createScorer } from '@mastra/core/evals';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
import nlp from 'compromise';
|
|
@@ -1114,10 +1114,16 @@ function createHallucinationScorer({
|
|
|
1114
1114
|
outputSchema: z.object({
|
|
1115
1115
|
verdicts: z.array(z.object({ statement: z.string(), verdict: z.string(), reason: z.string() }))
|
|
1116
1116
|
}),
|
|
1117
|
-
createPrompt: ({ results }) => {
|
|
1117
|
+
createPrompt: async ({ run, results }) => {
|
|
1118
|
+
let context;
|
|
1119
|
+
if (options?.getContext) {
|
|
1120
|
+
context = await options.getContext({ run, results, step: "analyze" });
|
|
1121
|
+
} else {
|
|
1122
|
+
context = options?.context ?? [];
|
|
1123
|
+
}
|
|
1118
1124
|
const prompt = createHallucinationAnalyzePrompt({
|
|
1119
1125
|
claims: results.preprocessStepResult.claims,
|
|
1120
|
-
context
|
|
1126
|
+
context
|
|
1121
1127
|
});
|
|
1122
1128
|
return prompt;
|
|
1123
1129
|
}
|
|
@@ -1131,11 +1137,17 @@ function createHallucinationScorer({
|
|
|
1131
1137
|
return roundToTwoDecimals(score);
|
|
1132
1138
|
}).generateReason({
|
|
1133
1139
|
description: "Reason about the results",
|
|
1134
|
-
createPrompt: ({ run, results, score }) => {
|
|
1140
|
+
createPrompt: async ({ run, results, score }) => {
|
|
1141
|
+
let context;
|
|
1142
|
+
if (options?.getContext) {
|
|
1143
|
+
context = await options.getContext({ run, results, score, step: "generateReason" });
|
|
1144
|
+
} else {
|
|
1145
|
+
context = options?.context ?? [];
|
|
1146
|
+
}
|
|
1135
1147
|
const prompt = createHallucinationReasonPrompt({
|
|
1136
1148
|
input: getUserMessageFromRunInput(run.input) ?? "",
|
|
1137
1149
|
output: getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
1138
|
-
context
|
|
1150
|
+
context,
|
|
1139
1151
|
score,
|
|
1140
1152
|
scale: options?.scale || 1,
|
|
1141
1153
|
verdicts: results.analyzeStepResult?.verdicts || []
|