@mastra/evals 0.13.10 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -0
- package/dist/scorers/llm/answer-relevancy/index.d.ts +2 -2
- package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
- package/dist/scorers/llm/bias/index.d.ts +2 -2
- package/dist/scorers/llm/bias/index.d.ts.map +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +2 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/index.cjs +4 -1
- package/dist/scorers/llm/index.cjs.map +1 -1
- package/dist/scorers/llm/index.js +4 -1
- package/dist/scorers/llm/index.js.map +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +3 -3
- package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
- package/package.json +6 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,69 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 0.14.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8626](https://github.com/mastra-ai/mastra/pull/8626))
|
|
8
|
+
|
|
9
|
+
- Standardize model configuration across all components to support flexible model resolution ([#8626](https://github.com/mastra-ai/mastra/pull/8626))
|
|
10
|
+
|
|
11
|
+
All model configuration points now accept `MastraModelConfig`, enabling consistent model specification across:
|
|
12
|
+
- Scorers (`createScorer` and all built-in scorers)
|
|
13
|
+
- Input/Output Processors (`ModerationProcessor`, `PIIDetector`)
|
|
14
|
+
- Relevance Scorers (`MastraAgentRelevanceScorer`)
|
|
15
|
+
|
|
16
|
+
**Supported formats:**
|
|
17
|
+
- Magic strings: `'openai/gpt-4o-mini'`
|
|
18
|
+
- Config objects: `{ id: 'openai/gpt-4o-mini' }` or `{ providerId: 'openai', modelId: 'gpt-4o-mini' }`
|
|
19
|
+
- Custom endpoints: `{ id: 'custom/model', url: 'https://...', apiKey: '...' }`
|
|
20
|
+
- Dynamic resolution: `(ctx) => 'openai/gpt-4o-mini'`
|
|
21
|
+
|
|
22
|
+
This change provides a unified model configuration experience matching the `Agent` class, making it easier to switch models and use custom providers across all Mastra components.
|
|
23
|
+
|
|
24
|
+
### Patch Changes
|
|
25
|
+
|
|
26
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8619](https://github.com/mastra-ai/mastra/pull/8619))
|
|
27
|
+
|
|
28
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8557](https://github.com/mastra-ai/mastra/pull/8557))
|
|
29
|
+
|
|
30
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8686](https://github.com/mastra-ai/mastra/pull/8686))
|
|
31
|
+
|
|
32
|
+
- Updated dependencies [[`1ed9670`](https://github.com/mastra-ai/mastra/commit/1ed9670d3ca50cb60dc2e517738c5eef3968ed27), [`b5a66b7`](https://github.com/mastra-ai/mastra/commit/b5a66b748a14fc8b3f63b04642ddb9621fbcc9e0), [`f59fc1e`](https://github.com/mastra-ai/mastra/commit/f59fc1e406b8912e692f6bff6cfd4754cc8d165c), [`158381d`](https://github.com/mastra-ai/mastra/commit/158381d39335be934b81ef8a1947bccace492c25), [`a1799bc`](https://github.com/mastra-ai/mastra/commit/a1799bcc1b5a1cdc188f2ac0165f17a1c4ac6f7b), [`6ff6094`](https://github.com/mastra-ai/mastra/commit/6ff60946f4ecfebdeef6e21d2b230c2204f2c9b8), [`fb703b9`](https://github.com/mastra-ai/mastra/commit/fb703b9634eeaff1a6eb2b5531ce0f9e8fb04727), [`37a2314`](https://github.com/mastra-ai/mastra/commit/37a23148e0e5a3b40d4f9f098b194671a8a49faf), [`7b1ef57`](https://github.com/mastra-ai/mastra/commit/7b1ef57fc071c2aa2a2e32905b18cd88719c5a39), [`05a9dee`](https://github.com/mastra-ai/mastra/commit/05a9dee3d355694d28847bfffb6289657fcf7dfa), [`e3c1077`](https://github.com/mastra-ai/mastra/commit/e3c107763aedd1643d3def5df450c235da9ff76c), [`1908ca0`](https://github.com/mastra-ai/mastra/commit/1908ca0521f90e43779cc29ab590173ca560443c), [`1bccdb3`](https://github.com/mastra-ai/mastra/commit/1bccdb33eb90cbeba2dc5ece1c2561fb774b26b6), [`5ef944a`](https://github.com/mastra-ai/mastra/commit/5ef944a3721d93105675cac2b2311432ff8cc393), [`d6b186f`](https://github.com/mastra-ai/mastra/commit/d6b186fb08f1caf1b86f73d3a5ee88fb999ca3be), [`ee68e82`](https://github.com/mastra-ai/mastra/commit/ee68e8289ea4408d29849e899bc6e78b3bd4e843), [`228228b`](https://github.com/mastra-ai/mastra/commit/228228b0b1de9291cb8887587f5cea1a8757ebad), [`ea33930`](https://github.com/mastra-ai/mastra/commit/ea339301e82d6318257720d811b043014ee44064), [`65493b3`](https://github.com/mastra-ai/mastra/commit/65493b31c36f6fdb78f9679f7e1ecf0c250aa5ee), [`a998b8f`](https://github.com/mastra-ai/mastra/commit/a998b8f858091c2ec47683e60766cf12d03001e4), [`b5a66b7`](https://github.com/mastra-ai/mastra/commit/b5a66b748a14fc8b3f63b04642ddb9621fbcc9e0), [`8a37bdd`](https://github.com/mastra-ai/mastra/commit/8a37bddb6d8614a32c5b70303d583d80c620ea61), [`135d6f2`](https://github.com/mastra-ai/mastra/commit/135d6f22a326ed1dffff858700669dff09d2c9eb)]:
|
|
33
|
+
- @mastra/core@0.21.0
|
|
34
|
+
|
|
35
|
+
## 0.14.0-alpha.0
|
|
36
|
+
|
|
37
|
+
### Minor Changes
|
|
38
|
+
|
|
39
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8626](https://github.com/mastra-ai/mastra/pull/8626))
|
|
40
|
+
|
|
41
|
+
- Standardize model configuration across all components to support flexible model resolution ([#8626](https://github.com/mastra-ai/mastra/pull/8626))
|
|
42
|
+
|
|
43
|
+
All model configuration points now accept `MastraModelConfig`, enabling consistent model specification across:
|
|
44
|
+
- Scorers (`createScorer` and all built-in scorers)
|
|
45
|
+
- Input/Output Processors (`ModerationProcessor`, `PIIDetector`)
|
|
46
|
+
- Relevance Scorers (`MastraAgentRelevanceScorer`)
|
|
47
|
+
|
|
48
|
+
**Supported formats:**
|
|
49
|
+
- Magic strings: `'openai/gpt-4o-mini'`
|
|
50
|
+
- Config objects: `{ id: 'openai/gpt-4o-mini' }` or `{ providerId: 'openai', modelId: 'gpt-4o-mini' }`
|
|
51
|
+
- Custom endpoints: `{ id: 'custom/model', url: 'https://...', apiKey: '...' }`
|
|
52
|
+
- Dynamic resolution: `(ctx) => 'openai/gpt-4o-mini'`
|
|
53
|
+
|
|
54
|
+
This change provides a unified model configuration experience matching the `Agent` class, making it easier to switch models and use custom providers across all Mastra components.
|
|
55
|
+
|
|
56
|
+
### Patch Changes
|
|
57
|
+
|
|
58
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8619](https://github.com/mastra-ai/mastra/pull/8619))
|
|
59
|
+
|
|
60
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8557](https://github.com/mastra-ai/mastra/pull/8557))
|
|
61
|
+
|
|
62
|
+
- Update peer dependencies to match core package version bump (0.21.0) ([#8686](https://github.com/mastra-ai/mastra/pull/8686))
|
|
63
|
+
|
|
64
|
+
- Updated dependencies [[`b5a66b7`](https://github.com/mastra-ai/mastra/commit/b5a66b748a14fc8b3f63b04642ddb9621fbcc9e0), [`7b1ef57`](https://github.com/mastra-ai/mastra/commit/7b1ef57fc071c2aa2a2e32905b18cd88719c5a39), [`ee68e82`](https://github.com/mastra-ai/mastra/commit/ee68e8289ea4408d29849e899bc6e78b3bd4e843), [`228228b`](https://github.com/mastra-ai/mastra/commit/228228b0b1de9291cb8887587f5cea1a8757ebad), [`ea33930`](https://github.com/mastra-ai/mastra/commit/ea339301e82d6318257720d811b043014ee44064), [`b5a66b7`](https://github.com/mastra-ai/mastra/commit/b5a66b748a14fc8b3f63b04642ddb9621fbcc9e0), [`135d6f2`](https://github.com/mastra-ai/mastra/commit/135d6f22a326ed1dffff858700669dff09d2c9eb), [`59d036d`](https://github.com/mastra-ai/mastra/commit/59d036d4c2706b430b0e3f1f1e0ee853ce16ca04)]:
|
|
65
|
+
- @mastra/core@0.21.0-alpha.0
|
|
66
|
+
|
|
3
67
|
## 0.13.10
|
|
4
68
|
|
|
5
69
|
### Patch Changes
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { MastraModelConfig } from '@mastra/core/llm';
|
|
2
2
|
export declare const DEFAULT_OPTIONS: Record<'uncertaintyWeight' | 'scale', number>;
|
|
3
3
|
export declare const ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = "\n You are a balanced and nuanced answer relevancy evaluator. Your job is to determine if LLM outputs are relevant to the input, including handling partially relevant or uncertain cases.\n\n Key Principles:\n 1. Evaluate whether the output addresses what the input is asking for\n 2. Consider both direct answers and related context\n 3. Prioritize relevance to the input over correctness\n 4. Recognize that responses can be partially relevant\n 5. Empty inputs or error messages should always be marked as \"no\"\n 6. Responses that discuss the type of information being asked show partial relevance\n";
|
|
4
4
|
export declare function createAnswerRelevancyScorer({ model, options, }: {
|
|
5
|
-
model:
|
|
5
|
+
model: MastraModelConfig;
|
|
6
6
|
options?: Record<'uncertaintyWeight' | 'scale', number>;
|
|
7
7
|
}): import("@mastra/core/scores").MastraScorer<"Answer Relevancy Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
8
8
|
statements: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/answer-relevancy/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/answer-relevancy/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAO1D,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,mBAAmB,GAAG,OAAO,EAAE,MAAM,CAGzE,CAAC;AAEF,eAAO,MAAM,mCAAmC,wnBAU/C,CAAC;AAMF,wBAAgB,2BAA2B,CAAC,EAC1C,KAAK,EACL,OAAyB,GAC1B,EAAE;IACD,KAAK,EAAE,iBAAiB,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC,mBAAmB,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC;CACzD;;;;;;;6FA0DA"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { MastraModelConfig } from '@mastra/core/llm';
|
|
2
2
|
import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
|
|
3
3
|
export interface BiasMetricOptions {
|
|
4
4
|
scale?: number;
|
|
5
5
|
}
|
|
6
6
|
export declare function createBiasScorer({ model, options }: {
|
|
7
|
-
model:
|
|
7
|
+
model: MastraModelConfig;
|
|
8
8
|
options?: BiasMetricOptions;
|
|
9
9
|
}): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
10
10
|
opinions: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAE1D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,gBAAgB,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAAE,KAAK,EAAE,iBAAiB,CAAC;IAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAAE;;;;;;;6FAgD7G"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { MastraModelConfig } from '@mastra/core/llm';
|
|
2
2
|
export interface FaithfulnessMetricOptions {
|
|
3
3
|
scale?: number;
|
|
4
4
|
context?: string[];
|
|
5
5
|
}
|
|
6
6
|
export declare function createFaithfulnessScorer({ model, options, }: {
|
|
7
|
-
model:
|
|
7
|
+
model: MastraModelConfig;
|
|
8
8
|
options?: FaithfulnessMetricOptions;
|
|
9
9
|
}): import("@mastra/core/scores").MastraScorer<"Faithfulness Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", string[]> & Record<"analyzeStepResult", {
|
|
10
10
|
verdicts: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAW1D,MAAM,WAAW,yBAAyB;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAgB,wBAAwB,CAAC,EACvC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,iBAAiB,CAAC;IACzB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACrC;;;;;6FAiEA"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { MastraModelConfig } from '@mastra/core/llm';
|
|
2
2
|
export interface HallucinationMetricOptions {
|
|
3
3
|
scale?: number;
|
|
4
4
|
context: string[];
|
|
5
5
|
}
|
|
6
6
|
export declare function createHallucinationScorer({ model, options, }: {
|
|
7
|
-
model:
|
|
7
|
+
model: MastraModelConfig;
|
|
8
8
|
options?: HallucinationMetricOptions;
|
|
9
9
|
}): import("@mastra/core/scores").MastraScorer<"Hallucination Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
10
10
|
claims: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAY1D,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,iBAAiB,CAAC;IACzB,OAAO,CAAC,EAAE,0BAA0B,CAAC;CACtC;;;;;;;;6FA2DA"}
|
|
@@ -1220,7 +1220,10 @@ ${toxics.join("\n")}`;
|
|
|
1220
1220
|
}
|
|
1221
1221
|
|
|
1222
1222
|
// src/scorers/llm/toxicity/index.ts
|
|
1223
|
-
function createToxicityScorer({
|
|
1223
|
+
function createToxicityScorer({
|
|
1224
|
+
model,
|
|
1225
|
+
options
|
|
1226
|
+
}) {
|
|
1224
1227
|
return scores.createScorer({
|
|
1225
1228
|
name: "Toxicity Scorer",
|
|
1226
1229
|
description: "A scorer that evaluates the toxicity of an LLM output to an input",
|