@mastra/evals 0.1.14 → 0.1.15-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { Agent } from '@mastra/core/agent';
|
|
2
|
+
import { EvaluationResult } from '@mastra/core';
|
|
2
3
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
4
|
import type { Mastra } from '@mastra/core';
|
|
4
5
|
import type { Metric } from '@mastra/core';
|
|
5
6
|
import { Metric as Metric_2 } from '@mastra/core/eval';
|
|
6
|
-
import { MetricResult } from '@mastra/core';
|
|
7
|
-
import type { MetricResult as MetricResult_2 } from '@mastra/core/eval';
|
|
7
|
+
import type { MetricResult } from '@mastra/core/eval';
|
|
8
8
|
|
|
9
9
|
export declare const ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = "You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.\n\nKey Principles:\n1. Evaluate whether the output addresses what the input is asking for\n2. Consider both direct answers and related context\n3. Prioritize relevance to the input over correctness\n4. Recognize that responses can be partially relevant\n5. Empty inputs or error messages should always be marked as \"no\"\n6. Responses that discuss the type of information being asked show partial relevance";
|
|
10
10
|
|
|
@@ -86,7 +86,7 @@ export { CompletenessMetric }
|
|
|
86
86
|
export { CompletenessMetric as CompletenessMetric_alias_1 }
|
|
87
87
|
export { CompletenessMetric as CompletenessMetric_alias_2 }
|
|
88
88
|
|
|
89
|
-
declare interface CompletenessMetricResult extends
|
|
89
|
+
declare interface CompletenessMetricResult extends MetricResult {
|
|
90
90
|
info: {
|
|
91
91
|
inputElements: string[];
|
|
92
92
|
outputElements: string[];
|
|
@@ -112,7 +112,7 @@ declare interface ContentSimilarityOptions {
|
|
|
112
112
|
ignoreWhitespace?: boolean;
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
declare interface ContentSimilarityResult extends
|
|
115
|
+
declare interface ContentSimilarityResult extends MetricResult {
|
|
116
116
|
info: {
|
|
117
117
|
similarity: number;
|
|
118
118
|
};
|
|
@@ -258,7 +258,7 @@ export declare interface ContextualRecallMetricOptions {
|
|
|
258
258
|
context: string[];
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
-
declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<
|
|
261
|
+
declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<EvaluationResult>;
|
|
262
262
|
export { evaluate }
|
|
263
263
|
export { evaluate as evaluate_alias_1 }
|
|
264
264
|
|
|
@@ -552,7 +552,7 @@ export { KeywordCoverageMetric }
|
|
|
552
552
|
export { KeywordCoverageMetric as KeywordCoverageMetric_alias_1 }
|
|
553
553
|
export { KeywordCoverageMetric as KeywordCoverageMetric_alias_2 }
|
|
554
554
|
|
|
555
|
-
declare interface KeywordCoverageResult extends
|
|
555
|
+
declare interface KeywordCoverageResult extends MetricResult {
|
|
556
556
|
info: {
|
|
557
557
|
totalKeywords: number;
|
|
558
558
|
matchedKeywords: number;
|
|
@@ -566,7 +566,7 @@ declare abstract class MastraAgentJudge {
|
|
|
566
566
|
export { MastraAgentJudge }
|
|
567
567
|
export { MastraAgentJudge as MastraAgentJudge_alias_1 }
|
|
568
568
|
|
|
569
|
-
export declare interface MetricResultWithReason extends
|
|
569
|
+
export declare interface MetricResultWithReason extends MetricResult {
|
|
570
570
|
info: {
|
|
571
571
|
reason: string;
|
|
572
572
|
};
|
|
@@ -704,7 +704,7 @@ export { TextualDifferenceMetric }
|
|
|
704
704
|
export { TextualDifferenceMetric as TextualDifferenceMetric_alias_1 }
|
|
705
705
|
export { TextualDifferenceMetric as TextualDifferenceMetric_alias_2 }
|
|
706
706
|
|
|
707
|
-
declare interface TextualDifferenceResult extends
|
|
707
|
+
declare interface TextualDifferenceResult extends MetricResult {
|
|
708
708
|
info: {
|
|
709
709
|
ratio: number;
|
|
710
710
|
changes: number;
|
|
@@ -721,7 +721,7 @@ export { ToneConsistencyMetric }
|
|
|
721
721
|
export { ToneConsistencyMetric as ToneConsistencyMetric_alias_1 }
|
|
722
722
|
export { ToneConsistencyMetric as ToneConsistencyMetric_alias_2 }
|
|
723
723
|
|
|
724
|
-
declare interface ToneConsitencyResult extends
|
|
724
|
+
declare interface ToneConsitencyResult extends MetricResult {
|
|
725
725
|
info: {
|
|
726
726
|
responseSentiment: number;
|
|
727
727
|
referenceSentiment: number;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { Agent } from '@mastra/core/agent';
|
|
2
|
+
import { EvaluationResult } from '@mastra/core';
|
|
2
3
|
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
4
|
import type { Mastra } from '@mastra/core';
|
|
4
5
|
import type { Metric } from '@mastra/core';
|
|
5
6
|
import { Metric as Metric_2 } from '@mastra/core/eval';
|
|
6
|
-
import { MetricResult } from '@mastra/core';
|
|
7
|
-
import type { MetricResult as MetricResult_2 } from '@mastra/core/eval';
|
|
7
|
+
import type { MetricResult } from '@mastra/core/eval';
|
|
8
8
|
|
|
9
9
|
export declare const ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = "You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.\n\nKey Principles:\n1. Evaluate whether the output addresses what the input is asking for\n2. Consider both direct answers and related context\n3. Prioritize relevance to the input over correctness\n4. Recognize that responses can be partially relevant\n5. Empty inputs or error messages should always be marked as \"no\"\n6. Responses that discuss the type of information being asked show partial relevance";
|
|
10
10
|
|
|
@@ -86,7 +86,7 @@ export { CompletenessMetric }
|
|
|
86
86
|
export { CompletenessMetric as CompletenessMetric_alias_1 }
|
|
87
87
|
export { CompletenessMetric as CompletenessMetric_alias_2 }
|
|
88
88
|
|
|
89
|
-
declare interface CompletenessMetricResult extends
|
|
89
|
+
declare interface CompletenessMetricResult extends MetricResult {
|
|
90
90
|
info: {
|
|
91
91
|
inputElements: string[];
|
|
92
92
|
outputElements: string[];
|
|
@@ -112,7 +112,7 @@ declare interface ContentSimilarityOptions {
|
|
|
112
112
|
ignoreWhitespace?: boolean;
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
declare interface ContentSimilarityResult extends
|
|
115
|
+
declare interface ContentSimilarityResult extends MetricResult {
|
|
116
116
|
info: {
|
|
117
117
|
similarity: number;
|
|
118
118
|
};
|
|
@@ -258,7 +258,7 @@ export declare interface ContextualRecallMetricOptions {
|
|
|
258
258
|
context: string[];
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
-
declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<
|
|
261
|
+
declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<EvaluationResult>;
|
|
262
262
|
export { evaluate }
|
|
263
263
|
export { evaluate as evaluate_alias_1 }
|
|
264
264
|
|
|
@@ -552,7 +552,7 @@ export { KeywordCoverageMetric }
|
|
|
552
552
|
export { KeywordCoverageMetric as KeywordCoverageMetric_alias_1 }
|
|
553
553
|
export { KeywordCoverageMetric as KeywordCoverageMetric_alias_2 }
|
|
554
554
|
|
|
555
|
-
declare interface KeywordCoverageResult extends
|
|
555
|
+
declare interface KeywordCoverageResult extends MetricResult {
|
|
556
556
|
info: {
|
|
557
557
|
totalKeywords: number;
|
|
558
558
|
matchedKeywords: number;
|
|
@@ -566,7 +566,7 @@ declare abstract class MastraAgentJudge {
|
|
|
566
566
|
export { MastraAgentJudge }
|
|
567
567
|
export { MastraAgentJudge as MastraAgentJudge_alias_1 }
|
|
568
568
|
|
|
569
|
-
export declare interface MetricResultWithReason extends
|
|
569
|
+
export declare interface MetricResultWithReason extends MetricResult {
|
|
570
570
|
info: {
|
|
571
571
|
reason: string;
|
|
572
572
|
};
|
|
@@ -704,7 +704,7 @@ export { TextualDifferenceMetric }
|
|
|
704
704
|
export { TextualDifferenceMetric as TextualDifferenceMetric_alias_1 }
|
|
705
705
|
export { TextualDifferenceMetric as TextualDifferenceMetric_alias_2 }
|
|
706
706
|
|
|
707
|
-
declare interface TextualDifferenceResult extends
|
|
707
|
+
declare interface TextualDifferenceResult extends MetricResult {
|
|
708
708
|
info: {
|
|
709
709
|
ratio: number;
|
|
710
710
|
changes: number;
|
|
@@ -721,7 +721,7 @@ export { ToneConsistencyMetric }
|
|
|
721
721
|
export { ToneConsistencyMetric as ToneConsistencyMetric_alias_1 }
|
|
722
722
|
export { ToneConsistencyMetric as ToneConsistencyMetric_alias_2 }
|
|
723
723
|
|
|
724
|
-
declare interface ToneConsitencyResult extends
|
|
724
|
+
declare interface ToneConsitencyResult extends MetricResult {
|
|
725
725
|
info: {
|
|
726
726
|
responseSentiment: number;
|
|
727
727
|
referenceSentiment: number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/evals",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.15-alpha.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -62,7 +62,7 @@
|
|
|
62
62
|
"sentiment": "^5.0.2",
|
|
63
63
|
"string-similarity": "^4.0.4",
|
|
64
64
|
"zod": "^3.24.2",
|
|
65
|
-
"@mastra/core": "^0.
|
|
65
|
+
"@mastra/core": "^0.8.0-alpha.2"
|
|
66
66
|
},
|
|
67
67
|
"peerDependencies": {
|
|
68
68
|
"ai": "^4.0.0"
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"@types/fs-extra": "^11.0.4",
|
|
75
75
|
"@types/sentiment": "^5.0.4",
|
|
76
76
|
"@types/string-similarity": "^4.0.2",
|
|
77
|
-
"ai": "^4.
|
|
77
|
+
"ai": "^4.2.2",
|
|
78
78
|
"dotenv": "^16.4.7",
|
|
79
79
|
"eslint": "^9.23.0",
|
|
80
80
|
"tsup": "^8.4.0",
|