@mastra/evals 0.12.0-alpha.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -2
- package/dist/metrics/index.d.ts +3 -3
- package/dist/metrics/llm/answer-relevancy/index.d.ts +1 -1
- package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/bias/index.d.ts +1 -1
- package/dist/metrics/llm/bias/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/context-position/index.d.ts +1 -1
- package/dist/metrics/llm/context-position/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/context-precision/index.d.ts +1 -1
- package/dist/metrics/llm/context-precision/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/context-relevancy/index.d.ts +1 -1
- package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/contextual-recall/index.d.ts +1 -1
- package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/faithfulness/index.d.ts +1 -1
- package/dist/metrics/llm/faithfulness/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/hallucination/index.d.ts +1 -1
- package/dist/metrics/llm/hallucination/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/index.d.ts +11 -11
- package/dist/metrics/llm/prompt-alignment/index.d.ts +1 -1
- package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/summarization/index.d.ts +1 -1
- package/dist/metrics/llm/summarization/metricJudge.d.ts +1 -1
- package/dist/metrics/llm/toxicity/index.d.ts +1 -1
- package/dist/metrics/llm/toxicity/metricJudge.d.ts +1 -1
- package/dist/metrics/nlp/index.d.ts +5 -5
- package/dist/scorers/code/index.d.ts +5 -5
- package/dist/scorers/index.d.ts +2 -2
- package/dist/scorers/llm/index.d.ts +5 -5
- package/package.json +11 -10
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { evaluate } from './evaluation';
|
|
2
|
-
export { attachListeners, globalSetup } from './attachListeners';
|
|
1
|
+
export { evaluate } from './evaluation.js';
|
|
2
|
+
export { attachListeners, globalSetup } from './attachListeners.js';
|
|
3
3
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/metrics/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export * from './nlp';
|
|
2
|
-
export * from './llm';
|
|
3
|
-
export * from './judge';
|
|
1
|
+
export * from './nlp/index.js';
|
|
2
|
+
export * from './llm/index.js';
|
|
3
|
+
export * from './judge/index.js';
|
|
4
4
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface AnswerRelevancyMetricOptions {
|
|
5
5
|
uncertaintyWeight?: number;
|
|
6
6
|
scale?: number;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class AnswerRelevancyJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface BiasMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class BiasJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface ContextPositionMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class ContextPositionJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface ContextPrecisionMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class ContextPrecisionJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface ContextRelevancyOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class ContextRelevancyJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface ContextualRecallMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class ContextualRecallJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface FaithfulnessMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class FaithfulnessJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(output: string, context: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface HallucinationMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
context: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class HallucinationJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(output: string, context: string[]): Promise<{
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
export { AnswerRelevancyMetric } from './answer-relevancy';
|
|
2
|
-
export { ContextPositionMetric } from './context-position';
|
|
3
|
-
export { ContextPrecisionMetric } from './context-precision';
|
|
4
|
-
export { FaithfulnessMetric } from './faithfulness';
|
|
5
|
-
export { HallucinationMetric } from './hallucination';
|
|
6
|
-
export { PromptAlignmentMetric } from './prompt-alignment';
|
|
7
|
-
export { ToxicityMetric } from './toxicity';
|
|
8
|
-
export { ContextRelevancyMetric } from './context-relevancy';
|
|
9
|
-
export { ContextualRecallMetric } from './contextual-recall';
|
|
10
|
-
export { SummarizationMetric } from './summarization';
|
|
11
|
-
export { BiasMetric } from './bias';
|
|
1
|
+
export { AnswerRelevancyMetric } from './answer-relevancy/index.js';
|
|
2
|
+
export { ContextPositionMetric } from './context-position/index.js';
|
|
3
|
+
export { ContextPrecisionMetric } from './context-precision/index.js';
|
|
4
|
+
export { FaithfulnessMetric } from './faithfulness/index.js';
|
|
5
|
+
export { HallucinationMetric } from './hallucination/index.js';
|
|
6
|
+
export { PromptAlignmentMetric } from './prompt-alignment/index.js';
|
|
7
|
+
export { ToxicityMetric } from './toxicity/index.js';
|
|
8
|
+
export { ContextRelevancyMetric } from './context-relevancy/index.js';
|
|
9
|
+
export { ContextualRecallMetric } from './contextual-recall/index.js';
|
|
10
|
+
export { SummarizationMetric } from './summarization/index.js';
|
|
11
|
+
export { BiasMetric } from './bias/index.js';
|
|
12
12
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface PromptAlignmentMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
instructions: string[];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class PromptAlignmentJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string, instructions: string[]): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface SummarizationMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class SummarizationJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluateAlignment(originalText: string, summary: string): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Metric } from '@mastra/core/eval';
|
|
2
2
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
|
-
import type { MetricResultWithReason } from '../types';
|
|
3
|
+
import type { MetricResultWithReason } from '../types.js';
|
|
4
4
|
export interface ToxicityMetricOptions {
|
|
5
5
|
scale?: number;
|
|
6
6
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
2
|
-
import { MastraAgentJudge } from '../../judge';
|
|
2
|
+
import { MastraAgentJudge } from '../../judge/index.js';
|
|
3
3
|
export declare class ToxicityJudge extends MastraAgentJudge {
|
|
4
4
|
constructor(model: LanguageModel);
|
|
5
5
|
evaluate(input: string, actualOutput: string): Promise<{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { CompletenessMetric } from './completeness';
|
|
2
|
-
export { ContentSimilarityMetric } from './content-similarity';
|
|
3
|
-
export { TextualDifferenceMetric } from './textual-difference';
|
|
4
|
-
export { KeywordCoverageMetric } from './keyword-coverage';
|
|
5
|
-
export { ToneConsistencyMetric } from './tone';
|
|
1
|
+
export { CompletenessMetric } from './completeness/index.js';
|
|
2
|
+
export { ContentSimilarityMetric } from './content-similarity/index.js';
|
|
3
|
+
export { TextualDifferenceMetric } from './textual-difference/index.js';
|
|
4
|
+
export { KeywordCoverageMetric } from './keyword-coverage/index.js';
|
|
5
|
+
export { ToneConsistencyMetric } from './tone/index.js';
|
|
6
6
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export * from './completeness';
|
|
2
|
-
export * from './textual-difference';
|
|
3
|
-
export * from './keyword-coverage';
|
|
4
|
-
export * from './content-similarity';
|
|
5
|
-
export * from './tone';
|
|
1
|
+
export * from './completeness/index.js';
|
|
2
|
+
export * from './textual-difference/index.js';
|
|
3
|
+
export * from './keyword-coverage/index.js';
|
|
4
|
+
export * from './content-similarity/index.js';
|
|
5
|
+
export * from './tone/index.js';
|
|
6
6
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/scorers/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export * from './llm';
|
|
2
|
-
export * from './code';
|
|
1
|
+
export * from './llm/index.js';
|
|
2
|
+
export * from './code/index.js';
|
|
3
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export * from './answer-relevancy';
|
|
2
|
-
export * from './faithfulness';
|
|
3
|
-
export * from './bias';
|
|
4
|
-
export * from './hallucination';
|
|
5
|
-
export * from './toxicity';
|
|
1
|
+
export * from './answer-relevancy/index.js';
|
|
2
|
+
export * from './faithfulness/index.js';
|
|
3
|
+
export * from './bias/index.js';
|
|
4
|
+
export * from './hallucination/index.js';
|
|
5
|
+
export * from './toxicity/index.js';
|
|
6
6
|
//# sourceMappingURL=index.d.ts.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/evals",
|
|
3
|
-
"version": "0.12.0
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"default": "./dist/index.js"
|
|
16
16
|
},
|
|
17
17
|
"require": {
|
|
18
|
-
"types": "./dist/index.d.
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
19
|
"default": "./dist/index.cjs"
|
|
20
20
|
}
|
|
21
21
|
},
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"default": "./dist/metrics/judge/index.js"
|
|
26
26
|
},
|
|
27
27
|
"require": {
|
|
28
|
-
"types": "./dist/metrics/judge/index.d.
|
|
28
|
+
"types": "./dist/metrics/judge/index.d.ts",
|
|
29
29
|
"default": "./dist/metrics/judge/index.cjs"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"default": "./dist/metrics/nlp/index.js"
|
|
36
36
|
},
|
|
37
37
|
"require": {
|
|
38
|
-
"types": "./dist/metrics/nlp/index.d.
|
|
38
|
+
"types": "./dist/metrics/nlp/index.d.ts",
|
|
39
39
|
"default": "./dist/metrics/nlp/index.cjs"
|
|
40
40
|
}
|
|
41
41
|
},
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"default": "./dist/metrics/llm/index.js"
|
|
46
46
|
},
|
|
47
47
|
"require": {
|
|
48
|
-
"types": "./dist/metrics/llm/index.d.
|
|
48
|
+
"types": "./dist/metrics/llm/index.d.ts",
|
|
49
49
|
"default": "./dist/metrics/llm/index.cjs"
|
|
50
50
|
}
|
|
51
51
|
},
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"default": "./dist/scorers/llm/index.js"
|
|
56
56
|
},
|
|
57
57
|
"require": {
|
|
58
|
-
"types": "./dist/scorers/llm/index.d.
|
|
58
|
+
"types": "./dist/scorers/llm/index.d.ts",
|
|
59
59
|
"default": "./dist/scorers/llm/index.cjs"
|
|
60
60
|
}
|
|
61
61
|
},
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"default": "./dist/scorers/code/index.js"
|
|
66
66
|
},
|
|
67
67
|
"require": {
|
|
68
|
-
"types": "./dist/scorers/code/index.d.
|
|
68
|
+
"types": "./dist/scorers/code/index.d.ts",
|
|
69
69
|
"default": "./dist/scorers/code/index.cjs"
|
|
70
70
|
}
|
|
71
71
|
},
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"zod": "^3.25.67"
|
|
85
85
|
},
|
|
86
86
|
"peerDependencies": {
|
|
87
|
-
"@mastra/core": ">=0.
|
|
87
|
+
"@mastra/core": ">=0.13.0-0 <0.14.0-0",
|
|
88
88
|
"ai": "^4.0.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
@@ -100,8 +100,9 @@
|
|
|
100
100
|
"tsup": "^8.5.0",
|
|
101
101
|
"typescript": "^5.8.3",
|
|
102
102
|
"vitest": "^3.2.4",
|
|
103
|
-
"@internal/lint": "0.0.
|
|
104
|
-
"@mastra/core": "0.13.0
|
|
103
|
+
"@internal/lint": "0.0.27",
|
|
104
|
+
"@mastra/core": "0.13.0",
|
|
105
|
+
"@internal/types-builder": "0.0.2"
|
|
105
106
|
},
|
|
106
107
|
"scripts": {
|
|
107
108
|
"check": "tsc --noEmit",
|