@mastra/evals 1.0.0-beta.4 → 1.0.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/evals/03-reference.md +13 -13
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,35 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 1.0.0-beta.5
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- Refactor workflow and tool types to remove Zod-specific constraints ([#11814](https://github.com/mastra-ai/mastra/pull/11814))
|
|
8
|
+
|
|
9
|
+
Removed Zod-specific type constraints across all workflow implementations and tool types, replacing them with generic types. This ensures type consistency across default, evented, and inngest workflows while preparing for Zod v4 migration.
|
|
10
|
+
|
|
11
|
+
**Workflow Changes:**
|
|
12
|
+
- Removed `z.ZodObject<any>` and `z.ZodType<any>` constraints from all workflow generic types
|
|
13
|
+
- Updated method signatures to use `TInput` and `TState` directly instead of `z.infer<TInput>` and `z.infer<TState>`
|
|
14
|
+
- Aligned conditional types across all workflow implementations using `TInput extends unknown` pattern
|
|
15
|
+
- Fixed `TSteps` generic to properly use `TEngineType` instead of `any`
|
|
16
|
+
|
|
17
|
+
**Tool Changes:**
|
|
18
|
+
- Removed Zod schema constraints from `ToolExecutionContext` and related interfaces
|
|
19
|
+
- Simplified type parameters from `TSuspendSchema extends ZodLikeSchema` to `TSuspend` and `TResume`
|
|
20
|
+
- Updated tool execution context types to use generic types
|
|
21
|
+
|
|
22
|
+
**Type Utilities:**
|
|
23
|
+
- Refactored type helpers to work with generic schemas instead of Zod-specific types
|
|
24
|
+
- Updated type extraction utilities for better compatibility
|
|
25
|
+
|
|
26
|
+
This change maintains backward compatibility while improving type consistency and preparing for Zod v4 support across all affected packages.
|
|
27
|
+
|
|
28
|
+
### Patch Changes
|
|
29
|
+
|
|
30
|
+
- Updated dependencies [[`ebae12a`](https://github.com/mastra-ai/mastra/commit/ebae12a2dd0212e75478981053b148a2c246962d), [`c61a0a5`](https://github.com/mastra-ai/mastra/commit/c61a0a5de4904c88fd8b3718bc26d1be1c2ec6e7), [`69136e7`](https://github.com/mastra-ai/mastra/commit/69136e748e32f57297728a4e0f9a75988462f1a7), [`449aed2`](https://github.com/mastra-ai/mastra/commit/449aed2ba9d507b75bf93d427646ea94f734dfd1), [`eb648a2`](https://github.com/mastra-ai/mastra/commit/eb648a2cc1728f7678768dd70cd77619b448dab9), [`0131105`](https://github.com/mastra-ai/mastra/commit/0131105532e83bdcbb73352fc7d0879eebf140dc), [`9d5059e`](https://github.com/mastra-ai/mastra/commit/9d5059eae810829935fb08e81a9bb7ecd5b144a7), [`ef756c6`](https://github.com/mastra-ai/mastra/commit/ef756c65f82d16531c43f49a27290a416611e526), [`b00ccd3`](https://github.com/mastra-ai/mastra/commit/b00ccd325ebd5d9e37e34dd0a105caae67eb568f), [`3bdfa75`](https://github.com/mastra-ai/mastra/commit/3bdfa7507a91db66f176ba8221aa28dd546e464a), [`e770de9`](https://github.com/mastra-ai/mastra/commit/e770de941a287a49b1964d44db5a5763d19890a6), [`52e2716`](https://github.com/mastra-ai/mastra/commit/52e2716b42df6eff443de72360ae83e86ec23993), [`27b4040`](https://github.com/mastra-ai/mastra/commit/27b4040bfa1a95d92546f420a02a626b1419a1d6), [`610a70b`](https://github.com/mastra-ai/mastra/commit/610a70bdad282079f0c630e0d7bb284578f20151), [`8dc7f55`](https://github.com/mastra-ai/mastra/commit/8dc7f55900395771da851dc7d78d53ae84fe34ec), [`8379099`](https://github.com/mastra-ai/mastra/commit/8379099fc467af6bef54dd7f80c9bd75bf8bbddf), [`8c0ec25`](https://github.com/mastra-ai/mastra/commit/8c0ec25646c8a7df253ed1e5ff4863a0d3f1316c), [`ff4d9a6`](https://github.com/mastra-ai/mastra/commit/ff4d9a6704fc87b31a380a76ed22736fdedbba5a), [`69821ef`](https://github.com/mastra-ai/mastra/commit/69821ef806482e2c44e2197ac0b050c3fe3a5285), [`1ed5716`](https://github.com/mastra-ai/mastra/commit/1ed5716830867b3774c4a1b43cc0d82935f32b96), [`4186bdd`](https://github.com/mastra-ai/mastra/commit/4186bdd00731305726fa06adba0b076a1d50b49f), [`7aaf973`](https://github.com/mastra-ai/mastra/commit/7aaf973f83fbbe9521f1f9e7a4fd99b8de464617)]:
|
|
31
|
+
- @mastra/core@1.0.0-beta.22
|
|
32
|
+
|
|
3
33
|
## 1.0.0-beta.4
|
|
4
34
|
|
|
5
35
|
### Patch Changes
|
package/dist/docs/README.md
CHANGED
package/dist/docs/SKILL.md
CHANGED
|
@@ -3496,7 +3496,7 @@ const scorer = createToolCallAccuracyScorerCode({
|
|
|
3496
3496
|
|
|
3497
3497
|
// Simulate LLM input and output with tool call
|
|
3498
3498
|
const inputMessages = [
|
|
3499
|
-
|
|
3499
|
+
createTestMessage({
|
|
3500
3500
|
content: "What is the weather like in New York today?",
|
|
3501
3501
|
role: "user",
|
|
3502
3502
|
id: "input-1",
|
|
@@ -3504,7 +3504,7 @@ const inputMessages = [
|
|
|
3504
3504
|
];
|
|
3505
3505
|
|
|
3506
3506
|
const output = [
|
|
3507
|
-
|
|
3507
|
+
createTestMessage({
|
|
3508
3508
|
content: "Let me check the weather for you.",
|
|
3509
3509
|
role: "assistant",
|
|
3510
3510
|
id: "output-1",
|
|
@@ -3539,7 +3539,7 @@ const strictScorer = createToolCallAccuracyScorerCode({
|
|
|
3539
3539
|
|
|
3540
3540
|
// Multiple tools called - fails in strict mode
|
|
3541
3541
|
const output = [
|
|
3542
|
-
|
|
3542
|
+
createTestMessage({
|
|
3543
3543
|
content: "Let me help you with that.",
|
|
3544
3544
|
role: "assistant",
|
|
3545
3545
|
id: "output-1",
|
|
@@ -3578,7 +3578,7 @@ const orderScorer = createToolCallAccuracyScorerCode({
|
|
|
3578
3578
|
});
|
|
3579
3579
|
|
|
3580
3580
|
const output = [
|
|
3581
|
-
|
|
3581
|
+
createTestMessage({
|
|
3582
3582
|
content: "I will authenticate and fetch the data.",
|
|
3583
3583
|
role: "assistant",
|
|
3584
3584
|
id: "output-1",
|
|
@@ -3617,7 +3617,7 @@ const flexibleOrderScorer = createToolCallAccuracyScorerCode({
|
|
|
3617
3617
|
});
|
|
3618
3618
|
|
|
3619
3619
|
const output = [
|
|
3620
|
-
|
|
3620
|
+
createTestMessage({
|
|
3621
3621
|
content: "Performing comprehensive operation.",
|
|
3622
3622
|
role: "assistant",
|
|
3623
3623
|
id: "output-1",
|
|
@@ -3743,7 +3743,7 @@ const llmScorer = createToolCallAccuracyScorerLLM({
|
|
|
3743
3743
|
});
|
|
3744
3744
|
|
|
3745
3745
|
const inputMessages = [
|
|
3746
|
-
|
|
3746
|
+
createTestMessage({
|
|
3747
3747
|
content: "What is the weather like in San Francisco today?",
|
|
3748
3748
|
role: "user",
|
|
3749
3749
|
id: "input-1",
|
|
@@ -3751,7 +3751,7 @@ const inputMessages = [
|
|
|
3751
3751
|
];
|
|
3752
3752
|
|
|
3753
3753
|
const output = [
|
|
3754
|
-
|
|
3754
|
+
createTestMessage({
|
|
3755
3755
|
content: "Let me check the current weather for you.",
|
|
3756
3756
|
role: "assistant",
|
|
3757
3757
|
id: "output-1",
|
|
@@ -3778,7 +3778,7 @@ console.log(result.reason); // "The agent correctly used the weather-tool to add
|
|
|
3778
3778
|
|
|
3779
3779
|
```typescript title="src/example-llm-inappropriate.ts"
|
|
3780
3780
|
const inputMessages = [
|
|
3781
|
-
|
|
3781
|
+
createTestMessage({
|
|
3782
3782
|
content: "What is the weather in Tokyo?",
|
|
3783
3783
|
role: "user",
|
|
3784
3784
|
id: "input-1",
|
|
@@ -3786,7 +3786,7 @@ const inputMessages = [
|
|
|
3786
3786
|
];
|
|
3787
3787
|
|
|
3788
3788
|
const inappropriateOutput = [
|
|
3789
|
-
|
|
3789
|
+
createTestMessage({
|
|
3790
3790
|
content: "Let me search for that information.",
|
|
3791
3791
|
role: "assistant",
|
|
3792
3792
|
id: "output-1",
|
|
@@ -3815,7 +3815,7 @@ The LLM scorer recognizes when agents appropriately ask for clarification:
|
|
|
3815
3815
|
|
|
3816
3816
|
```typescript title="src/example-llm-clarification.ts"
|
|
3817
3817
|
const vagueInput = [
|
|
3818
|
-
|
|
3818
|
+
createTestMessage({
|
|
3819
3819
|
content: 'I need help with something',
|
|
3820
3820
|
role: 'user',
|
|
3821
3821
|
id: 'input-1'
|
|
@@ -3823,7 +3823,7 @@ const vagueInput = [
|
|
|
3823
3823
|
];
|
|
3824
3824
|
|
|
3825
3825
|
const clarificationOutput = [
|
|
3826
|
-
|
|
3826
|
+
createTestMessage({
|
|
3827
3827
|
content: 'I'd be happy to help! Could you please provide more details about what you need assistance with?',
|
|
3828
3828
|
role: 'assistant',
|
|
3829
3829
|
id: 'output-1',
|
|
@@ -3868,14 +3868,14 @@ const llmScorer = createLLMScorer({
|
|
|
3868
3868
|
// Test data
|
|
3869
3869
|
const run = createAgentTestRun({
|
|
3870
3870
|
inputMessages: [
|
|
3871
|
-
|
|
3871
|
+
createTestMessage({
|
|
3872
3872
|
content: "What is the weather?",
|
|
3873
3873
|
role: "user",
|
|
3874
3874
|
id: "input-1",
|
|
3875
3875
|
}),
|
|
3876
3876
|
],
|
|
3877
3877
|
output: [
|
|
3878
|
-
|
|
3878
|
+
createTestMessage({
|
|
3879
3879
|
content: "Let me find that information.",
|
|
3880
3880
|
role: "assistant",
|
|
3881
3881
|
id: "output-1",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/evals",
|
|
3
|
-
"version": "1.0.0-beta.
|
|
3
|
+
"version": "1.0.0-beta.5",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -77,7 +77,7 @@
|
|
|
77
77
|
"vitest": "4.0.16",
|
|
78
78
|
"zod": "^3.25.76",
|
|
79
79
|
"@internal/types-builder": "0.0.28",
|
|
80
|
-
"@mastra/core": "1.0.0-beta.
|
|
80
|
+
"@mastra/core": "1.0.0-beta.22",
|
|
81
81
|
"@internal/lint": "0.0.53"
|
|
82
82
|
},
|
|
83
83
|
"engines": {
|