@mastra/evals 1.1.0 → 1.1.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/docs/SKILL.md +31 -20
- package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
- package/dist/docs/{evals/02-built-in-scorers.md → references/docs-evals-built-in-scorers.md} +5 -7
- package/dist/docs/{evals/01-overview.md → references/docs-evals-overview.md} +26 -10
- package/dist/docs/references/reference-evals-answer-relevancy.md +105 -0
- package/dist/docs/references/reference-evals-answer-similarity.md +99 -0
- package/dist/docs/references/reference-evals-bias.md +120 -0
- package/dist/docs/references/reference-evals-completeness.md +137 -0
- package/dist/docs/references/reference-evals-content-similarity.md +101 -0
- package/dist/docs/references/reference-evals-context-precision.md +196 -0
- package/dist/docs/references/reference-evals-context-relevance.md +536 -0
- package/dist/docs/references/reference-evals-faithfulness.md +114 -0
- package/dist/docs/references/reference-evals-hallucination.md +220 -0
- package/dist/docs/references/reference-evals-keyword-coverage.md +128 -0
- package/dist/docs/references/reference-evals-noise-sensitivity.md +685 -0
- package/dist/docs/references/reference-evals-prompt-alignment.md +619 -0
- package/dist/docs/references/reference-evals-scorer-utils.md +330 -0
- package/dist/docs/references/reference-evals-textual-difference.md +113 -0
- package/dist/docs/references/reference-evals-tone-consistency.md +119 -0
- package/dist/docs/references/reference-evals-tool-call-accuracy.md +533 -0
- package/dist/docs/references/reference-evals-toxicity.md +123 -0
- package/dist/scorers/llm/faithfulness/index.d.ts +3 -1
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
- package/dist/scorers/prebuilt/index.cjs +11 -7
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +11 -7
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/package.json +3 -4
- package/dist/docs/README.md +0 -31
- package/dist/docs/evals/03-reference.md +0 -4092
|
@@ -704,7 +704,9 @@ function createFaithfulnessScorer({
|
|
|
704
704
|
type: "agent"
|
|
705
705
|
}).preprocess({
|
|
706
706
|
description: "Extract relevant statements from the LLM output",
|
|
707
|
-
outputSchema: z.
|
|
707
|
+
outputSchema: z.object({
|
|
708
|
+
claims: z.array(z.string())
|
|
709
|
+
}),
|
|
708
710
|
createPrompt: ({ run }) => {
|
|
709
711
|
const prompt = createFaithfulnessExtractPrompt({ output: getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
710
712
|
return prompt;
|
|
@@ -718,7 +720,7 @@ function createFaithfulnessScorer({
|
|
|
718
720
|
(toolCall) => toolCall.state === "result" ? JSON.stringify(toolCall.result) : ""
|
|
719
721
|
) ?? [];
|
|
720
722
|
const prompt = createFaithfulnessAnalyzePrompt({
|
|
721
|
-
claims: results.preprocessStepResult || [],
|
|
723
|
+
claims: results.preprocessStepResult?.claims || [],
|
|
722
724
|
context
|
|
723
725
|
});
|
|
724
726
|
return prompt;
|
|
@@ -2117,6 +2119,7 @@ Example responses:
|
|
|
2117
2119
|
}
|
|
2118
2120
|
|
|
2119
2121
|
// src/scorers/llm/noise-sensitivity/index.ts
|
|
2122
|
+
var scoreSchema = z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
|
|
2120
2123
|
var analyzeOutputSchema4 = z.object({
|
|
2121
2124
|
dimensions: z.array(
|
|
2122
2125
|
z.object({
|
|
@@ -2128,7 +2131,7 @@ var analyzeOutputSchema4 = z.object({
|
|
|
2128
2131
|
),
|
|
2129
2132
|
overallAssessment: z.string(),
|
|
2130
2133
|
majorIssues: z.array(z.string()).optional().default([]),
|
|
2131
|
-
robustnessScore:
|
|
2134
|
+
robustnessScore: scoreSchema
|
|
2132
2135
|
});
|
|
2133
2136
|
var DEFAULT_IMPACT_WEIGHTS = {
|
|
2134
2137
|
none: 1,
|
|
@@ -2462,9 +2465,10 @@ Example responses:
|
|
|
2462
2465
|
}
|
|
2463
2466
|
|
|
2464
2467
|
// src/scorers/llm/prompt-alignment/index.ts
|
|
2468
|
+
var scoreSchema2 = z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
|
|
2465
2469
|
var analyzeOutputSchema5 = z.object({
|
|
2466
2470
|
intentAlignment: z.object({
|
|
2467
|
-
score:
|
|
2471
|
+
score: scoreSchema2,
|
|
2468
2472
|
primaryIntent: z.string(),
|
|
2469
2473
|
isAddressed: z.boolean(),
|
|
2470
2474
|
reasoning: z.string()
|
|
@@ -2477,15 +2481,15 @@ var analyzeOutputSchema5 = z.object({
|
|
|
2477
2481
|
reasoning: z.string()
|
|
2478
2482
|
})
|
|
2479
2483
|
),
|
|
2480
|
-
overallScore:
|
|
2484
|
+
overallScore: scoreSchema2
|
|
2481
2485
|
}),
|
|
2482
2486
|
completeness: z.object({
|
|
2483
|
-
score:
|
|
2487
|
+
score: scoreSchema2,
|
|
2484
2488
|
missingElements: z.array(z.string()),
|
|
2485
2489
|
reasoning: z.string()
|
|
2486
2490
|
}),
|
|
2487
2491
|
responseAppropriateness: z.object({
|
|
2488
|
-
score:
|
|
2492
|
+
score: scoreSchema2,
|
|
2489
2493
|
formatAlignment: z.boolean(),
|
|
2490
2494
|
toneAlignment: z.boolean(),
|
|
2491
2495
|
reasoning: z.string()
|