@mastra/evals 1.1.0 → 1.1.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/docs/SKILL.md +31 -20
- package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
- package/dist/docs/{evals/02-built-in-scorers.md → references/docs-evals-built-in-scorers.md} +5 -7
- package/dist/docs/{evals/01-overview.md → references/docs-evals-overview.md} +26 -10
- package/dist/docs/references/reference-evals-answer-relevancy.md +105 -0
- package/dist/docs/references/reference-evals-answer-similarity.md +99 -0
- package/dist/docs/references/reference-evals-bias.md +120 -0
- package/dist/docs/references/reference-evals-completeness.md +137 -0
- package/dist/docs/references/reference-evals-content-similarity.md +101 -0
- package/dist/docs/references/reference-evals-context-precision.md +196 -0
- package/dist/docs/references/reference-evals-context-relevance.md +536 -0
- package/dist/docs/references/reference-evals-faithfulness.md +114 -0
- package/dist/docs/references/reference-evals-hallucination.md +220 -0
- package/dist/docs/references/reference-evals-keyword-coverage.md +128 -0
- package/dist/docs/references/reference-evals-noise-sensitivity.md +685 -0
- package/dist/docs/references/reference-evals-prompt-alignment.md +619 -0
- package/dist/docs/references/reference-evals-scorer-utils.md +330 -0
- package/dist/docs/references/reference-evals-textual-difference.md +113 -0
- package/dist/docs/references/reference-evals-tone-consistency.md +119 -0
- package/dist/docs/references/reference-evals-tool-call-accuracy.md +533 -0
- package/dist/docs/references/reference-evals-toxicity.md +123 -0
- package/dist/scorers/llm/faithfulness/index.d.ts +3 -1
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
- package/dist/scorers/prebuilt/index.cjs +11 -7
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +11 -7
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/package.json +3 -4
- package/dist/docs/README.md +0 -31
- package/dist/docs/evals/03-reference.md +0 -4092
|
@@ -713,7 +713,9 @@ function createFaithfulnessScorer({
|
|
|
713
713
|
type: "agent"
|
|
714
714
|
}).preprocess({
|
|
715
715
|
description: "Extract relevant statements from the LLM output",
|
|
716
|
-
outputSchema: zod.z.
|
|
716
|
+
outputSchema: zod.z.object({
|
|
717
|
+
claims: zod.z.array(zod.z.string())
|
|
718
|
+
}),
|
|
717
719
|
createPrompt: ({ run }) => {
|
|
718
720
|
const prompt = createFaithfulnessExtractPrompt({ output: chunkW3U7MMDX_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
719
721
|
return prompt;
|
|
@@ -727,7 +729,7 @@ function createFaithfulnessScorer({
|
|
|
727
729
|
(toolCall) => toolCall.state === "result" ? JSON.stringify(toolCall.result) : ""
|
|
728
730
|
) ?? [];
|
|
729
731
|
const prompt = createFaithfulnessAnalyzePrompt({
|
|
730
|
-
claims: results.preprocessStepResult || [],
|
|
732
|
+
claims: results.preprocessStepResult?.claims || [],
|
|
731
733
|
context
|
|
732
734
|
});
|
|
733
735
|
return prompt;
|
|
@@ -2126,6 +2128,7 @@ Example responses:
|
|
|
2126
2128
|
}
|
|
2127
2129
|
|
|
2128
2130
|
// src/scorers/llm/noise-sensitivity/index.ts
|
|
2131
|
+
var scoreSchema = zod.z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
|
|
2129
2132
|
var analyzeOutputSchema4 = zod.z.object({
|
|
2130
2133
|
dimensions: zod.z.array(
|
|
2131
2134
|
zod.z.object({
|
|
@@ -2137,7 +2140,7 @@ var analyzeOutputSchema4 = zod.z.object({
|
|
|
2137
2140
|
),
|
|
2138
2141
|
overallAssessment: zod.z.string(),
|
|
2139
2142
|
majorIssues: zod.z.array(zod.z.string()).optional().default([]),
|
|
2140
|
-
robustnessScore:
|
|
2143
|
+
robustnessScore: scoreSchema
|
|
2141
2144
|
});
|
|
2142
2145
|
var DEFAULT_IMPACT_WEIGHTS = {
|
|
2143
2146
|
none: 1,
|
|
@@ -2471,9 +2474,10 @@ Example responses:
|
|
|
2471
2474
|
}
|
|
2472
2475
|
|
|
2473
2476
|
// src/scorers/llm/prompt-alignment/index.ts
|
|
2477
|
+
var scoreSchema2 = zod.z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
|
|
2474
2478
|
var analyzeOutputSchema5 = zod.z.object({
|
|
2475
2479
|
intentAlignment: zod.z.object({
|
|
2476
|
-
score:
|
|
2480
|
+
score: scoreSchema2,
|
|
2477
2481
|
primaryIntent: zod.z.string(),
|
|
2478
2482
|
isAddressed: zod.z.boolean(),
|
|
2479
2483
|
reasoning: zod.z.string()
|
|
@@ -2486,15 +2490,15 @@ var analyzeOutputSchema5 = zod.z.object({
|
|
|
2486
2490
|
reasoning: zod.z.string()
|
|
2487
2491
|
})
|
|
2488
2492
|
),
|
|
2489
|
-
overallScore:
|
|
2493
|
+
overallScore: scoreSchema2
|
|
2490
2494
|
}),
|
|
2491
2495
|
completeness: zod.z.object({
|
|
2492
|
-
score:
|
|
2496
|
+
score: scoreSchema2,
|
|
2493
2497
|
missingElements: zod.z.array(zod.z.string()),
|
|
2494
2498
|
reasoning: zod.z.string()
|
|
2495
2499
|
}),
|
|
2496
2500
|
responseAppropriateness: zod.z.object({
|
|
2497
|
-
score:
|
|
2501
|
+
score: scoreSchema2,
|
|
2498
2502
|
formatAlignment: zod.z.boolean(),
|
|
2499
2503
|
toneAlignment: zod.z.boolean(),
|
|
2500
2504
|
reasoning: zod.z.string()
|