@mastra/evals 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.turbo/turbo-build.log +17 -11
  2. package/.turbo/turbo-lint.log +4 -0
  3. package/CHANGELOG.md +28 -0
  4. package/dist/_tsup-dts-rollup.d.ts +45 -19
  5. package/dist/chunk-TXXJUIES.js +15 -0
  6. package/dist/{dist-56AYDN4X.js → dist-W3SXCXOT.js} +843 -471
  7. package/dist/index.js +2 -3
  8. package/dist/magic-string.es-5UDOWOAZ.js +20 -20
  9. package/dist/metrics/judge/index.d.ts +1 -0
  10. package/dist/metrics/judge/index.js +2 -0
  11. package/dist/metrics/llm/index.d.ts +1 -0
  12. package/dist/metrics/llm/index.js +243 -49
  13. package/dist/metrics/nlp/index.js +1 -1
  14. package/eslint.config.js +6 -0
  15. package/package.json +14 -5
  16. package/src/evaluation.ts +3 -2
  17. package/src/metrics/index.ts +1 -0
  18. package/src/metrics/judge/index.ts +1 -1
  19. package/src/metrics/llm/answer-relevancy/index.test.ts +2 -1
  20. package/src/metrics/llm/answer-relevancy/index.ts +3 -3
  21. package/src/metrics/llm/answer-relevancy/metricJudge.ts +9 -9
  22. package/src/metrics/llm/bias/index.test.ts +2 -1
  23. package/src/metrics/llm/bias/index.ts +5 -5
  24. package/src/metrics/llm/bias/metricJudge.ts +3 -3
  25. package/src/metrics/llm/context-position/index.test.ts +2 -1
  26. package/src/metrics/llm/context-position/index.ts +3 -3
  27. package/src/metrics/llm/context-position/metricJudge.ts +9 -9
  28. package/src/metrics/llm/context-precision/index.test.ts +1 -1
  29. package/src/metrics/llm/context-precision/index.ts +3 -3
  30. package/src/metrics/llm/context-precision/metricJudge.ts +9 -10
  31. package/src/metrics/llm/context-relevancy/index.test.ts +1 -1
  32. package/src/metrics/llm/context-relevancy/index.ts +2 -2
  33. package/src/metrics/llm/context-relevancy/metricJudge.ts +1 -1
  34. package/src/metrics/llm/contextual-recall/index.test.ts +1 -1
  35. package/src/metrics/llm/contextual-recall/index.ts +2 -2
  36. package/src/metrics/llm/contextual-recall/metricJudge.ts +1 -1
  37. package/src/metrics/llm/faithfulness/index.test.ts +1 -1
  38. package/src/metrics/llm/faithfulness/index.ts +2 -2
  39. package/src/metrics/llm/faithfulness/metricJudge.ts +1 -1
  40. package/src/metrics/llm/hallucination/index.test.ts +1 -1
  41. package/src/metrics/llm/hallucination/index.ts +2 -2
  42. package/src/metrics/llm/hallucination/metricJudge.ts +1 -1
  43. package/src/metrics/llm/index.ts +1 -0
  44. package/src/metrics/llm/prompt-alignment/index.test.ts +1 -1
  45. package/src/metrics/llm/prompt-alignment/index.ts +1 -1
  46. package/src/metrics/llm/prompt-alignment/metricJudge.ts +1 -1
  47. package/src/metrics/llm/summarization/index.test.ts +2 -1
  48. package/src/metrics/llm/summarization/index.ts +2 -2
  49. package/src/metrics/llm/summarization/metricJudge.ts +1 -1
  50. package/src/metrics/llm/toxicity/index.test.ts +1 -1
  51. package/src/metrics/llm/toxicity/index.ts +2 -2
  52. package/src/metrics/llm/toxicity/metricJudge.ts +3 -3
  53. package/src/metrics/llm/types.ts +1 -1
  54. package/src/metrics/nlp/completeness/index.ts +2 -1
  55. package/src/metrics/nlp/content-similarity/index.ts +2 -1
  56. package/src/metrics/nlp/keyword-coverage/index.ts +2 -1
  57. package/src/metrics/nlp/textual-difference/index.ts +2 -1
  58. package/src/metrics/nlp/tone/index.ts +2 -1
@@ -1,23 +1,29 @@
1
1
 
2
- > @mastra/evals@0.1.0-alpha.37 build C:\Users\Ward\projects\mastra\mastra\packages\evals
3
- > tsup src/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --experimental-dts --clean --treeshake
2
+ > @mastra/evals@0.1.1-alpha.0 build C:\Users\Ward\projects\mastra\mastra\packages\evals
3
+ > pnpm check && tsup src/index.ts src/metrics/judge/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --experimental-dts --clean --treeshake
4
4
 
5
- CLI Building entry: src/index.ts, src/metrics/llm/index.ts, src/metrics/nlp/index.ts
5
+
6
+ > @mastra/evals@0.1.1-alpha.0 check C:\Users\Ward\projects\mastra\mastra\packages\evals
7
+ > tsc --noEmit
8
+
9
+ CLI Building entry: src/index.ts, src/metrics/judge/index.ts, src/metrics/llm/index.ts, src/metrics/nlp/index.ts
6
10
  CLI Using tsconfig: tsconfig.json
7
11
  CLI tsup v8.3.6
8
12
  TSC Build start
9
- TSC ⚡️ Build success in 2319ms
13
+ TSC ⚡️ Build success in 5143ms
10
14
  DTS Build start
11
15
  CLI Target: es2022
12
16
  Analysis will use the bundled TypeScript version 5.7.3
13
17
  Writing package typings: C:\Users\Ward\projects\mastra\mastra\packages\evals\dist\_tsup-dts-rollup.d.ts
14
- DTS ⚡️ Build success in 1572ms
18
+ DTS ⚡️ Build success in 3821ms
15
19
  CLI Cleaning output folder
16
20
  ESM Build start
17
- ESM dist\index.js 2.12 KB
18
- ESM dist\magic-string.es-5UDOWOAZ.js 40.86 KB
21
+ ESM dist\index.js 2.63 KB
22
+ ESM dist\metrics\judge\index.js 94.00 B
19
23
  ESM dist\chunk-4VNS5WPM.js 1.82 KB
20
- ESM dist\metrics\nlp\index.js 6.31 KB
21
- ESM dist\metrics\llm\index.js 73.79 KB
22
- ESM dist\dist-56AYDN4X.js 566.98 KB
23
- ESM ⚡️ Build success in 772ms
24
+ ESM dist\metrics\llm\index.js 85.32 KB
25
+ ESM dist\magic-string.es-5UDOWOAZ.js 40.80 KB
26
+ ESM dist\chunk-TXXJUIES.js 305.00 B
27
+ ESM dist\metrics\nlp\index.js 6.30 KB
28
+ ESM dist\dist-W3SXCXOT.js 570.71 KB
29
+ ESM ⚡️ Build success in 2104ms
@@ -0,0 +1,4 @@
1
+
2
+ > @mastra/evals@0.1.0 lint C:\Users\Ward\projects\mastra\mastra\packages\evals
3
+ > eslint .
4
+
package/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # @mastra/evals
2
2
 
3
+ ## 0.1.1
4
+
5
+ ### Patch Changes
6
+
7
+ - d59f1a8: Added example docs for evals and export metricJudge
8
+ - 91ef439: Add eslint and ran autofix
9
+ - Updated dependencies [d59f1a8]
10
+ - Updated dependencies [91ef439]
11
+ - Updated dependencies [4a25be4]
12
+ - Updated dependencies [bf2e88f]
13
+ - Updated dependencies [2f0d707]
14
+ - Updated dependencies [aac1667]
15
+ - @mastra/core@0.2.1
16
+
17
+ ## 0.1.1-alpha.0
18
+
19
+ ### Patch Changes
20
+
21
+ - d59f1a8: Added example docs for evals and export metricJudge
22
+ - 91ef439: Add eslint and ran autofix
23
+ - Updated dependencies [d59f1a8]
24
+ - Updated dependencies [91ef439]
25
+ - Updated dependencies [4a25be4]
26
+ - Updated dependencies [bf2e88f]
27
+ - Updated dependencies [2f0d707]
28
+ - Updated dependencies [aac1667]
29
+ - @mastra/core@0.2.1-alpha.0
30
+
3
31
  ## 0.1.0
4
32
 
5
33
  ### Minor Changes
@@ -1,10 +1,10 @@
1
1
  import { Agent } from '@mastra/core/agent';
2
- import { LanguageModel } from '@mastra/core/llm';
2
+ import type { LanguageModel } from '@mastra/core/llm';
3
3
  import type { Mastra } from '@mastra/core';
4
- import { Metric } from '@mastra/core';
4
+ import type { Metric } from '@mastra/core';
5
5
  import { Metric as Metric_2 } from '@mastra/core/eval';
6
6
  import { MetricResult } from '@mastra/core';
7
- import { MetricResult as MetricResult_2 } from '@mastra/core/eval';
7
+ import type { MetricResult as MetricResult_2 } from '@mastra/core/eval';
8
8
 
9
9
  export declare const ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = "You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.\n\nKey Principles:\n1. Evaluate whether the output addresses what the input is asking for\n2. Consider both direct answers and related context\n3. Prioritize relevance to the input over correctness\n4. Recognize that responses can be partially relevant\n5. Empty inputs or error messages should always be marked as \"no\"\n6. Responses that discuss the type of information being asked show partial relevance";
10
10
 
@@ -14,10 +14,16 @@ export declare class AnswerRelevancyJudge extends MastraAgentJudge {
14
14
  verdict: string;
15
15
  reason: string;
16
16
  }[]>;
17
- getReason(input: string, actualOutput: string, score: number, scale: number, verdicts: {
18
- verdict: string;
19
- reason: string;
20
- }[]): Promise<string>;
17
+ getReason(args: {
18
+ input: string;
19
+ output: string;
20
+ score: number;
21
+ scale: number;
22
+ verdicts: {
23
+ verdict: string;
24
+ reason: string;
25
+ }[];
26
+ }): Promise<string>;
21
27
  }
22
28
 
23
29
  declare class AnswerRelevancyMetric extends Metric_2 {
@@ -49,7 +55,10 @@ export declare class BiasJudge extends MastraAgentJudge {
49
55
  verdict: string;
50
56
  reason: string;
51
57
  }[]>;
52
- getReason(score: number, biases: string[]): Promise<string>;
58
+ getReason(args: {
59
+ score: number;
60
+ biases: string[];
61
+ }): Promise<string>;
53
62
  }
54
63
 
55
64
  declare class BiasMetric extends Metric_2 {
@@ -123,10 +132,16 @@ export declare class ContextPositionJudge extends MastraAgentJudge {
123
132
  verdict: string;
124
133
  reason: string;
125
134
  }[]>;
126
- getReason(input: string, actualOutput: string, score: number, scale: number, verdicts: {
127
- verdict: string;
128
- reason: string;
129
- }[]): Promise<string>;
135
+ getReason(args: {
136
+ input: string;
137
+ output: string;
138
+ score: number;
139
+ scale: number;
140
+ verdicts: {
141
+ verdict: string;
142
+ reason: string;
143
+ }[];
144
+ }): Promise<string>;
130
145
  }
131
146
 
132
147
  declare class ContextPositionMetric extends Metric_2 {
@@ -152,10 +167,16 @@ export declare class ContextPrecisionJudge extends MastraAgentJudge {
152
167
  verdict: string;
153
168
  reason: string;
154
169
  }[]>;
155
- getReason(input: string, actualOutput: string, score: number, scale: number, verdicts: {
156
- verdict: string;
157
- reason: string;
158
- }[]): Promise<string>;
170
+ getReason(args: {
171
+ input: string;
172
+ output: string;
173
+ score: number;
174
+ scale: number;
175
+ verdicts: {
176
+ verdict: string;
177
+ reason: string;
178
+ }[];
179
+ }): Promise<string>;
159
180
  }
160
181
 
161
182
  declare class ContextPrecisionMetric extends Metric_2 {
@@ -505,7 +526,7 @@ export declare class HallucinationJudge extends MastraAgentJudge {
505
526
  }): Promise<string>;
506
527
  }
507
528
 
508
- export declare class HallucinationMetric extends Metric_2 {
529
+ declare class HallucinationMetric extends Metric_2 {
509
530
  private judge;
510
531
  private scale;
511
532
  private context;
@@ -513,6 +534,9 @@ export declare class HallucinationMetric extends Metric_2 {
513
534
  measure(input: string, output: string): Promise<MetricResultWithReason>;
514
535
  private calculateScore;
515
536
  }
537
+ export { HallucinationMetric }
538
+ export { HallucinationMetric as HallucinationMetric_alias_1 }
539
+ export { HallucinationMetric as HallucinationMetric_alias_2 }
516
540
 
517
541
  export declare interface HallucinationMetricOptions {
518
542
  scale?: number;
@@ -535,10 +559,12 @@ declare interface KeywordCoverageResult extends MetricResult_2 {
535
559
  };
536
560
  }
537
561
 
538
- export declare abstract class MastraAgentJudge {
562
+ declare abstract class MastraAgentJudge {
539
563
  protected readonly agent: Agent;
540
564
  constructor(name: string, instructions: string, model: LanguageModel);
541
565
  }
566
+ export { MastraAgentJudge }
567
+ export { MastraAgentJudge as MastraAgentJudge_alias_1 }
542
568
 
543
569
  export declare interface MetricResultWithReason extends MetricResult_2 {
544
570
  info: {
@@ -714,7 +740,7 @@ export declare class ToxicityJudge extends MastraAgentJudge {
714
740
  verdict: string;
715
741
  reason: string;
716
742
  }[]>;
717
- getReason({ score, toxics }: {
743
+ getReason(args: {
718
744
  score: number;
719
745
  toxics: string[];
720
746
  }): Promise<string>;
@@ -0,0 +1,15 @@
1
+ import { Agent } from '@mastra/core/agent';
2
+
3
+ // src/metrics/judge/index.ts
4
+ var MastraAgentJudge = class {
5
+ agent;
6
+ constructor(name, instructions, model) {
7
+ this.agent = new Agent({
8
+ name: `Mastra Eval Judge ${name}`,
9
+ instructions,
10
+ model
11
+ });
12
+ }
13
+ };
14
+
15
+ export { MastraAgentJudge };