@mastra/evals 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +17 -11
- package/.turbo/turbo-lint.log +4 -0
- package/CHANGELOG.md +28 -0
- package/dist/_tsup-dts-rollup.d.ts +45 -19
- package/dist/chunk-TXXJUIES.js +15 -0
- package/dist/{dist-56AYDN4X.js → dist-W3SXCXOT.js} +843 -471
- package/dist/index.js +2 -3
- package/dist/magic-string.es-5UDOWOAZ.js +20 -20
- package/dist/metrics/judge/index.d.ts +1 -0
- package/dist/metrics/judge/index.js +2 -0
- package/dist/metrics/llm/index.d.ts +1 -0
- package/dist/metrics/llm/index.js +243 -49
- package/dist/metrics/nlp/index.js +1 -1
- package/eslint.config.js +6 -0
- package/package.json +14 -5
- package/src/evaluation.ts +3 -2
- package/src/metrics/index.ts +1 -0
- package/src/metrics/judge/index.ts +1 -1
- package/src/metrics/llm/answer-relevancy/index.test.ts +2 -1
- package/src/metrics/llm/answer-relevancy/index.ts +3 -3
- package/src/metrics/llm/answer-relevancy/metricJudge.ts +9 -9
- package/src/metrics/llm/bias/index.test.ts +2 -1
- package/src/metrics/llm/bias/index.ts +5 -5
- package/src/metrics/llm/bias/metricJudge.ts +3 -3
- package/src/metrics/llm/context-position/index.test.ts +2 -1
- package/src/metrics/llm/context-position/index.ts +3 -3
- package/src/metrics/llm/context-position/metricJudge.ts +9 -9
- package/src/metrics/llm/context-precision/index.test.ts +1 -1
- package/src/metrics/llm/context-precision/index.ts +3 -3
- package/src/metrics/llm/context-precision/metricJudge.ts +9 -10
- package/src/metrics/llm/context-relevancy/index.test.ts +1 -1
- package/src/metrics/llm/context-relevancy/index.ts +2 -2
- package/src/metrics/llm/context-relevancy/metricJudge.ts +1 -1
- package/src/metrics/llm/contextual-recall/index.test.ts +1 -1
- package/src/metrics/llm/contextual-recall/index.ts +2 -2
- package/src/metrics/llm/contextual-recall/metricJudge.ts +1 -1
- package/src/metrics/llm/faithfulness/index.test.ts +1 -1
- package/src/metrics/llm/faithfulness/index.ts +2 -2
- package/src/metrics/llm/faithfulness/metricJudge.ts +1 -1
- package/src/metrics/llm/hallucination/index.test.ts +1 -1
- package/src/metrics/llm/hallucination/index.ts +2 -2
- package/src/metrics/llm/hallucination/metricJudge.ts +1 -1
- package/src/metrics/llm/index.ts +1 -0
- package/src/metrics/llm/prompt-alignment/index.test.ts +1 -1
- package/src/metrics/llm/prompt-alignment/index.ts +1 -1
- package/src/metrics/llm/prompt-alignment/metricJudge.ts +1 -1
- package/src/metrics/llm/summarization/index.test.ts +2 -1
- package/src/metrics/llm/summarization/index.ts +2 -2
- package/src/metrics/llm/summarization/metricJudge.ts +1 -1
- package/src/metrics/llm/toxicity/index.test.ts +1 -1
- package/src/metrics/llm/toxicity/index.ts +2 -2
- package/src/metrics/llm/toxicity/metricJudge.ts +3 -3
- package/src/metrics/llm/types.ts +1 -1
- package/src/metrics/nlp/completeness/index.ts +2 -1
- package/src/metrics/nlp/content-similarity/index.ts +2 -1
- package/src/metrics/nlp/keyword-coverage/index.ts +2 -1
- package/src/metrics/nlp/textual-difference/index.ts +2 -1
- package/src/metrics/nlp/tone/index.ts +2 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,29 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/evals@0.1.
|
|
3
|
-
> tsup src/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --experimental-dts --clean --treeshake
|
|
2
|
+
> @mastra/evals@0.1.1-alpha.0 build C:\Users\Ward\projects\mastra\mastra\packages\evals
|
|
3
|
+
> pnpm check && tsup src/index.ts src/metrics/judge/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
> @mastra/evals@0.1.1-alpha.0 check C:\Users\Ward\projects\mastra\mastra\packages\evals
|
|
7
|
+
> tsc --noEmit
|
|
8
|
+
|
|
9
|
+
[34mCLI[39m Building entry: src/index.ts, src/metrics/judge/index.ts, src/metrics/llm/index.ts, src/metrics/nlp/index.ts
|
|
6
10
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
11
|
[34mCLI[39m tsup v8.3.6
|
|
8
12
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
13
|
+
[32mTSC[39m ⚡️ Build success in 5143ms
|
|
10
14
|
[34mDTS[39m Build start
|
|
11
15
|
[34mCLI[39m Target: es2022
|
|
12
16
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
17
|
[36mWriting package typings: C:\Users\Ward\projects\mastra\mastra\packages\evals\dist\_tsup-dts-rollup.d.ts[39m
|
|
14
|
-
[32mDTS[39m ⚡️ Build success in
|
|
18
|
+
[32mDTS[39m ⚡️ Build success in 3821ms
|
|
15
19
|
[34mCLI[39m Cleaning output folder
|
|
16
20
|
[34mESM[39m Build start
|
|
17
|
-
[32mESM[39m [1mdist\index.js [22m[32m2.
|
|
18
|
-
[32mESM[39m [1mdist\
|
|
21
|
+
[32mESM[39m [1mdist\index.js [22m[32m2.63 KB[39m
|
|
22
|
+
[32mESM[39m [1mdist\metrics\judge\index.js [22m[32m94.00 B[39m
|
|
19
23
|
[32mESM[39m [1mdist\chunk-4VNS5WPM.js [22m[32m1.82 KB[39m
|
|
20
|
-
[32mESM[39m [1mdist\metrics\
|
|
21
|
-
[32mESM[39m [1mdist\
|
|
22
|
-
[32mESM[39m [1mdist\
|
|
23
|
-
[32mESM[39m
|
|
24
|
+
[32mESM[39m [1mdist\metrics\llm\index.js [22m[32m85.32 KB[39m
|
|
25
|
+
[32mESM[39m [1mdist\magic-string.es-5UDOWOAZ.js [22m[32m40.80 KB[39m
|
|
26
|
+
[32mESM[39m [1mdist\chunk-TXXJUIES.js [22m[32m305.00 B[39m
|
|
27
|
+
[32mESM[39m [1mdist\metrics\nlp\index.js [22m[32m6.30 KB[39m
|
|
28
|
+
[32mESM[39m [1mdist\dist-W3SXCXOT.js [22m[32m570.71 KB[39m
|
|
29
|
+
[32mESM[39m ⚡️ Build success in 2104ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 0.1.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- d59f1a8: Added example docs for evals and export metricJudge
|
|
8
|
+
- 91ef439: Add eslint and ran autofix
|
|
9
|
+
- Updated dependencies [d59f1a8]
|
|
10
|
+
- Updated dependencies [91ef439]
|
|
11
|
+
- Updated dependencies [4a25be4]
|
|
12
|
+
- Updated dependencies [bf2e88f]
|
|
13
|
+
- Updated dependencies [2f0d707]
|
|
14
|
+
- Updated dependencies [aac1667]
|
|
15
|
+
- @mastra/core@0.2.1
|
|
16
|
+
|
|
17
|
+
## 0.1.1-alpha.0
|
|
18
|
+
|
|
19
|
+
### Patch Changes
|
|
20
|
+
|
|
21
|
+
- d59f1a8: Added example docs for evals and export metricJudge
|
|
22
|
+
- 91ef439: Add eslint and ran autofix
|
|
23
|
+
- Updated dependencies [d59f1a8]
|
|
24
|
+
- Updated dependencies [91ef439]
|
|
25
|
+
- Updated dependencies [4a25be4]
|
|
26
|
+
- Updated dependencies [bf2e88f]
|
|
27
|
+
- Updated dependencies [2f0d707]
|
|
28
|
+
- Updated dependencies [aac1667]
|
|
29
|
+
- @mastra/core@0.2.1-alpha.0
|
|
30
|
+
|
|
3
31
|
## 0.1.0
|
|
4
32
|
|
|
5
33
|
### Minor Changes
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { Agent } from '@mastra/core/agent';
|
|
2
|
-
import { LanguageModel } from '@mastra/core/llm';
|
|
2
|
+
import type { LanguageModel } from '@mastra/core/llm';
|
|
3
3
|
import type { Mastra } from '@mastra/core';
|
|
4
|
-
import { Metric } from '@mastra/core';
|
|
4
|
+
import type { Metric } from '@mastra/core';
|
|
5
5
|
import { Metric as Metric_2 } from '@mastra/core/eval';
|
|
6
6
|
import { MetricResult } from '@mastra/core';
|
|
7
|
-
import { MetricResult as MetricResult_2 } from '@mastra/core/eval';
|
|
7
|
+
import type { MetricResult as MetricResult_2 } from '@mastra/core/eval';
|
|
8
8
|
|
|
9
9
|
export declare const ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = "You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.\n\nKey Principles:\n1. Evaluate whether the output addresses what the input is asking for\n2. Consider both direct answers and related context\n3. Prioritize relevance to the input over correctness\n4. Recognize that responses can be partially relevant\n5. Empty inputs or error messages should always be marked as \"no\"\n6. Responses that discuss the type of information being asked show partial relevance";
|
|
10
10
|
|
|
@@ -14,10 +14,16 @@ export declare class AnswerRelevancyJudge extends MastraAgentJudge {
|
|
|
14
14
|
verdict: string;
|
|
15
15
|
reason: string;
|
|
16
16
|
}[]>;
|
|
17
|
-
getReason(
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
getReason(args: {
|
|
18
|
+
input: string;
|
|
19
|
+
output: string;
|
|
20
|
+
score: number;
|
|
21
|
+
scale: number;
|
|
22
|
+
verdicts: {
|
|
23
|
+
verdict: string;
|
|
24
|
+
reason: string;
|
|
25
|
+
}[];
|
|
26
|
+
}): Promise<string>;
|
|
21
27
|
}
|
|
22
28
|
|
|
23
29
|
declare class AnswerRelevancyMetric extends Metric_2 {
|
|
@@ -49,7 +55,10 @@ export declare class BiasJudge extends MastraAgentJudge {
|
|
|
49
55
|
verdict: string;
|
|
50
56
|
reason: string;
|
|
51
57
|
}[]>;
|
|
52
|
-
getReason(
|
|
58
|
+
getReason(args: {
|
|
59
|
+
score: number;
|
|
60
|
+
biases: string[];
|
|
61
|
+
}): Promise<string>;
|
|
53
62
|
}
|
|
54
63
|
|
|
55
64
|
declare class BiasMetric extends Metric_2 {
|
|
@@ -123,10 +132,16 @@ export declare class ContextPositionJudge extends MastraAgentJudge {
|
|
|
123
132
|
verdict: string;
|
|
124
133
|
reason: string;
|
|
125
134
|
}[]>;
|
|
126
|
-
getReason(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
135
|
+
getReason(args: {
|
|
136
|
+
input: string;
|
|
137
|
+
output: string;
|
|
138
|
+
score: number;
|
|
139
|
+
scale: number;
|
|
140
|
+
verdicts: {
|
|
141
|
+
verdict: string;
|
|
142
|
+
reason: string;
|
|
143
|
+
}[];
|
|
144
|
+
}): Promise<string>;
|
|
130
145
|
}
|
|
131
146
|
|
|
132
147
|
declare class ContextPositionMetric extends Metric_2 {
|
|
@@ -152,10 +167,16 @@ export declare class ContextPrecisionJudge extends MastraAgentJudge {
|
|
|
152
167
|
verdict: string;
|
|
153
168
|
reason: string;
|
|
154
169
|
}[]>;
|
|
155
|
-
getReason(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
170
|
+
getReason(args: {
|
|
171
|
+
input: string;
|
|
172
|
+
output: string;
|
|
173
|
+
score: number;
|
|
174
|
+
scale: number;
|
|
175
|
+
verdicts: {
|
|
176
|
+
verdict: string;
|
|
177
|
+
reason: string;
|
|
178
|
+
}[];
|
|
179
|
+
}): Promise<string>;
|
|
159
180
|
}
|
|
160
181
|
|
|
161
182
|
declare class ContextPrecisionMetric extends Metric_2 {
|
|
@@ -505,7 +526,7 @@ export declare class HallucinationJudge extends MastraAgentJudge {
|
|
|
505
526
|
}): Promise<string>;
|
|
506
527
|
}
|
|
507
528
|
|
|
508
|
-
|
|
529
|
+
declare class HallucinationMetric extends Metric_2 {
|
|
509
530
|
private judge;
|
|
510
531
|
private scale;
|
|
511
532
|
private context;
|
|
@@ -513,6 +534,9 @@ export declare class HallucinationMetric extends Metric_2 {
|
|
|
513
534
|
measure(input: string, output: string): Promise<MetricResultWithReason>;
|
|
514
535
|
private calculateScore;
|
|
515
536
|
}
|
|
537
|
+
export { HallucinationMetric }
|
|
538
|
+
export { HallucinationMetric as HallucinationMetric_alias_1 }
|
|
539
|
+
export { HallucinationMetric as HallucinationMetric_alias_2 }
|
|
516
540
|
|
|
517
541
|
export declare interface HallucinationMetricOptions {
|
|
518
542
|
scale?: number;
|
|
@@ -535,10 +559,12 @@ declare interface KeywordCoverageResult extends MetricResult_2 {
|
|
|
535
559
|
};
|
|
536
560
|
}
|
|
537
561
|
|
|
538
|
-
|
|
562
|
+
declare abstract class MastraAgentJudge {
|
|
539
563
|
protected readonly agent: Agent;
|
|
540
564
|
constructor(name: string, instructions: string, model: LanguageModel);
|
|
541
565
|
}
|
|
566
|
+
export { MastraAgentJudge }
|
|
567
|
+
export { MastraAgentJudge as MastraAgentJudge_alias_1 }
|
|
542
568
|
|
|
543
569
|
export declare interface MetricResultWithReason extends MetricResult_2 {
|
|
544
570
|
info: {
|
|
@@ -714,7 +740,7 @@ export declare class ToxicityJudge extends MastraAgentJudge {
|
|
|
714
740
|
verdict: string;
|
|
715
741
|
reason: string;
|
|
716
742
|
}[]>;
|
|
717
|
-
getReason(
|
|
743
|
+
getReason(args: {
|
|
718
744
|
score: number;
|
|
719
745
|
toxics: string[];
|
|
720
746
|
}): Promise<string>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
2
|
+
|
|
3
|
+
// src/metrics/judge/index.ts
|
|
4
|
+
var MastraAgentJudge = class {
|
|
5
|
+
agent;
|
|
6
|
+
constructor(name, instructions, model) {
|
|
7
|
+
this.agent = new Agent({
|
|
8
|
+
name: `Mastra Eval Judge ${name}`,
|
|
9
|
+
instructions,
|
|
10
|
+
model
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export { MastraAgentJudge };
|