@mastra/evals 1.1.2-alpha.0 → 1.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -2
- package/LICENSE.md +15 -0
- package/dist/chunk-EVBNIL5M.js +606 -0
- package/dist/chunk-EVBNIL5M.js.map +1 -0
- package/dist/chunk-XRUR5PBK.cjs +632 -0
- package/dist/chunk-XRUR5PBK.cjs.map +1 -0
- package/dist/docs/SKILL.md +20 -19
- package/dist/docs/assets/SOURCE_MAP.json +1 -1
- package/dist/docs/references/docs-evals-built-in-scorers.md +2 -1
- package/dist/docs/references/docs-evals-overview.md +11 -16
- package/dist/docs/references/reference-evals-answer-relevancy.md +25 -25
- package/dist/docs/references/reference-evals-answer-similarity.md +33 -35
- package/dist/docs/references/reference-evals-bias.md +24 -24
- package/dist/docs/references/reference-evals-completeness.md +19 -20
- package/dist/docs/references/reference-evals-content-similarity.md +20 -20
- package/dist/docs/references/reference-evals-context-precision.md +36 -36
- package/dist/docs/references/reference-evals-context-relevance.md +136 -141
- package/dist/docs/references/reference-evals-faithfulness.md +24 -24
- package/dist/docs/references/reference-evals-hallucination.md +52 -69
- package/dist/docs/references/reference-evals-keyword-coverage.md +18 -18
- package/dist/docs/references/reference-evals-noise-sensitivity.md +167 -177
- package/dist/docs/references/reference-evals-prompt-alignment.md +111 -116
- package/dist/docs/references/reference-evals-scorer-utils.md +285 -105
- package/dist/docs/references/reference-evals-textual-difference.md +18 -18
- package/dist/docs/references/reference-evals-tone-consistency.md +19 -19
- package/dist/docs/references/reference-evals-tool-call-accuracy.md +165 -165
- package/dist/docs/references/reference-evals-toxicity.md +21 -21
- package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
- package/dist/scorers/code/index.d.ts +1 -0
- package/dist/scorers/code/index.d.ts.map +1 -1
- package/dist/scorers/code/trajectory/index.d.ts +147 -0
- package/dist/scorers/code/trajectory/index.d.ts.map +1 -0
- package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
- package/dist/scorers/llm/context-precision/index.d.ts +2 -2
- package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +2 -2
- package/dist/scorers/llm/index.d.ts +1 -0
- package/dist/scorers/llm/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts +5 -5
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +1 -1
- package/dist/scorers/llm/trajectory/index.d.ts +58 -0
- package/dist/scorers/llm/trajectory/index.d.ts.map +1 -0
- package/dist/scorers/llm/trajectory/prompts.d.ts +20 -0
- package/dist/scorers/llm/trajectory/prompts.d.ts.map +1 -0
- package/dist/scorers/prebuilt/index.cjs +638 -59
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +578 -2
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/dist/scorers/utils.cjs +41 -17
- package/dist/scorers/utils.d.ts +171 -1
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +14 -11
- package/dist/chunk-OEOE7ZHN.js +0 -195
- package/dist/chunk-OEOE7ZHN.js.map +0 -1
- package/dist/chunk-W3U7MMDX.cjs +0 -212
- package/dist/chunk-W3U7MMDX.cjs.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,62 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 1.2.0-alpha.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- **Trajectory scorers**: Added scorers for evaluating agent and workflow execution paths. ([#14697](https://github.com/mastra-ai/mastra/pull/14697))
|
|
8
|
+
- `createTrajectoryScorerCode` — unified scorer that evaluates accuracy, efficiency, blacklist violations, and tool failure patterns in a single pass. Supports per-item expectations from datasets with static defaults. Nested `ExpectedStep.children` configs allow recursive evaluation with different rules per hierarchy level.
|
|
9
|
+
- `createTrajectoryAccuracyScorerCode` — deterministic accuracy scorer with strict, relaxed, and unordered ordering modes.
|
|
10
|
+
- `createTrajectoryAccuracyScorerLLM` — LLM-based scorer for semantic trajectory evaluation.
|
|
11
|
+
|
|
12
|
+
**Utility functions:**
|
|
13
|
+
- `extractTrajectory` / `extractWorkflowTrajectory` — Convert agent runs and workflow executions into structured trajectories
|
|
14
|
+
- `extractTrajectoryFromTrace` — Build hierarchical trajectories from observability trace spans, including nested agent/tool calls
|
|
15
|
+
- `compareTrajectories` — Compare actual vs. expected trajectories with configurable ordering and data matching. Accepts `ExpectedStep[]` for simpler expected step definitions
|
|
16
|
+
- `checkTrajectoryEfficiency` — Evaluate step counts, token usage, and duration against budgets
|
|
17
|
+
- `checkTrajectoryBlacklist` — Detect forbidden tools or tool sequences
|
|
18
|
+
- `analyzeToolFailures` — Detect retry patterns, fallbacks, and argument corrections
|
|
19
|
+
|
|
20
|
+
**Example — unified scorer with defaults:**
|
|
21
|
+
|
|
22
|
+
```ts
|
|
23
|
+
import { createTrajectoryScorerCode } from '@mastra/evals/scorers';
|
|
24
|
+
|
|
25
|
+
const scorer = createTrajectoryScorerCode({
|
|
26
|
+
defaults: {
|
|
27
|
+
ordering: 'strict',
|
|
28
|
+
steps: [
|
|
29
|
+
{ name: 'validate-input' },
|
|
30
|
+
{
|
|
31
|
+
name: 'research-agent',
|
|
32
|
+
stepType: 'agent_run',
|
|
33
|
+
children: {
|
|
34
|
+
ordering: 'unordered',
|
|
35
|
+
steps: [{ name: 'search' }, { name: 'summarize' }],
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
{ name: 'save-result' },
|
|
39
|
+
],
|
|
40
|
+
maxSteps: 10,
|
|
41
|
+
blacklistedTools: ['deleteAll'],
|
|
42
|
+
},
|
|
43
|
+
});
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Patch Changes
|
|
47
|
+
|
|
48
|
+
- Updated dependencies [[`dc9fc19`](https://github.com/mastra-ai/mastra/commit/dc9fc19da4437f6b508cc355f346a8856746a76b), [`260fe12`](https://github.com/mastra-ai/mastra/commit/260fe1295fe7354e39d6def2775e0797a7a277f0)]:
|
|
49
|
+
- @mastra/core@1.18.0-alpha.1
|
|
50
|
+
|
|
51
|
+
## 1.1.2
|
|
52
|
+
|
|
53
|
+
### Patch Changes
|
|
54
|
+
|
|
55
|
+
- dependencies updates: ([#12998](https://github.com/mastra-ai/mastra/pull/12998))
|
|
56
|
+
- Updated dependency [`compromise@^14.14.5` ↗︎](https://www.npmjs.com/package/compromise/v/14.14.5) (from `^14.14.4`, in `dependencies`)
|
|
57
|
+
- Updated dependencies [[`252580a`](https://github.com/mastra-ai/mastra/commit/252580a71feb0e46d0ccab04a70a79ff6a2ee0ab), [`f8e819f`](https://github.com/mastra-ai/mastra/commit/f8e819fabdfdc43d2da546a3ad81ba23685f603d), [`5c75261`](https://github.com/mastra-ai/mastra/commit/5c7526120d936757d4ffb7b82232e1641ebd45cb), [`e27d832`](https://github.com/mastra-ai/mastra/commit/e27d83281b5e166fd63a13969689e928d8605944), [`e37ef84`](https://github.com/mastra-ai/mastra/commit/e37ef8404043c94ca0c8e35ecdedb093b8087878), [`6fdd3d4`](https://github.com/mastra-ai/mastra/commit/6fdd3d451a07a8e7e216c62ac364f8dd8e36c2af), [`10cf521`](https://github.com/mastra-ai/mastra/commit/10cf52183344743a0d7babe24cd24fd78870c354), [`efdb682`](https://github.com/mastra-ai/mastra/commit/efdb682887f6522149769383908f9790c188ab88), [`0dee7a0`](https://github.com/mastra-ai/mastra/commit/0dee7a0ff4c2507e6eb6e6ee5f9738877ebd4ad1), [`04c2c8e`](https://github.com/mastra-ai/mastra/commit/04c2c8e888984364194131aecb490a3d6e920e61), [`02dc07a`](https://github.com/mastra-ai/mastra/commit/02dc07acc4ad42d93335825e3308f5b42266eba2), [`bb7262b`](https://github.com/mastra-ai/mastra/commit/bb7262b7c0ca76320d985b40510b6ffbbb936582), [`cf1c6e7`](https://github.com/mastra-ai/mastra/commit/cf1c6e789b131f55638fed52183a89d5078b4876), [`5ffadfe`](https://github.com/mastra-ai/mastra/commit/5ffadfefb1468ac2612b20bb84d24c39de6961c0), [`1e1339c`](https://github.com/mastra-ai/mastra/commit/1e1339cc276e571a48cfff5014487877086bfe68), [`d03df73`](https://github.com/mastra-ai/mastra/commit/d03df73f8fe9496064a33e1c3b74ba0479bf9ee6), [`79b8f45`](https://github.com/mastra-ai/mastra/commit/79b8f45a6767e1a5c3d56cd3c5b1214326b81661), [`9bbf08e`](https://github.com/mastra-ai/mastra/commit/9bbf08e3c20731c79dea13a765895b9fcf29cbf1), [`0a25952`](https://github.com/mastra-ai/mastra/commit/0a259526b5e1ac11e6efa53db1f140272962af2d), [`ffa5468`](https://github.com/mastra-ai/mastra/commit/ffa546857fc4821753979b3a34e13b4d76fbbcd4), [`3264a04`](https://github.com/mastra-ai/mastra/commit/3264a04e30340c3c5447433300a035ea0878df85), [`6fdd3d4`](https://github.com/mastra-ai/mastra/commit/6fdd3d451a07a8e7e216c62ac364f8dd8e36c2af), [`088d9ba`](https://github.com/mastra-ai/mastra/commit/088d9ba2577518703c52b0dccd617178d9ee6b0d), [`74fbebd`](https://github.com/mastra-ai/mastra/commit/74fbebd918a03832a2864965a8bea59bf617d3a2), [`aea6217`](https://github.com/mastra-ai/mastra/commit/aea621790bfb2291431b08da0cc5e6e150303ae7), [`b6a855e`](https://github.com/mastra-ai/mastra/commit/b6a855edc056e088279075506442ba1d6fa6def9), [`ae408ea`](https://github.com/mastra-ai/mastra/commit/ae408ea7128f0d2710b78d8623185198e7cb19c1), [`17e942e`](https://github.com/mastra-ai/mastra/commit/17e942eee2ba44985b1f807e6208cdde672f82f9), [`2015cf9`](https://github.com/mastra-ai/mastra/commit/2015cf921649f44c3f5bcd32a2c052335f8e49b4), [`7ef454e`](https://github.com/mastra-ai/mastra/commit/7ef454eaf9dcec6de60021c8f42192052dd490d6), [`2be1d99`](https://github.com/mastra-ai/mastra/commit/2be1d99564ce79acc4846071082bff353035a87a), [`2708fa1`](https://github.com/mastra-ai/mastra/commit/2708fa1055ac91c03e08b598869f6b8fb51fa37f), [`ba74aef`](https://github.com/mastra-ai/mastra/commit/ba74aef5716142dbbe931351f5243c9c6e4128a9), [`ba74aef`](https://github.com/mastra-ai/mastra/commit/ba74aef5716142dbbe931351f5243c9c6e4128a9), [`ec53e89`](https://github.com/mastra-ai/mastra/commit/ec53e8939c76c638991e21af762e51378eff7543), [`9b5a8cb`](https://github.com/mastra-ai/mastra/commit/9b5a8cb13e120811b0bf14140ada314f1c067894), [`607e66b`](https://github.com/mastra-ai/mastra/commit/607e66b02dc7f531ee37799f3456aa2dc0ca7ac5), [`a215d06`](https://github.com/mastra-ai/mastra/commit/a215d06758dcf590eabfe0b7afd4ae39bdbf082c), [`6909c74`](https://github.com/mastra-ai/mastra/commit/6909c74a7781e0447d475e9dbc1dc871b700f426), [`192438f`](https://github.com/mastra-ai/mastra/commit/192438f8a90c4f375e955f8ff179bf8dc6821a83)]:
|
|
58
|
+
- @mastra/core@1.5.0
|
|
59
|
+
|
|
3
60
|
## 1.1.2-alpha.0
|
|
4
61
|
|
|
5
62
|
### Patch Changes
|
|
@@ -2360,7 +2417,7 @@
|
|
|
2360
2417
|
- 876b8a2: Rename difference metric to textual difference metric
|
|
2361
2418
|
- 1bbec77: Reorganized evals into nlp and llm
|
|
2362
2419
|
- 35764f4: Added workflow for eval tests
|
|
2363
|
-
- 8769a62: Split core into
|
|
2420
|
+
- 8769a62: Split core into separate entry files
|
|
2364
2421
|
- aea3c13: Fix evals export for llm and nlp
|
|
2365
2422
|
- 4f1d1a1: Enforce types ann cleanup package.json
|
|
2366
2423
|
- 202d404: Added instructions when generating evals
|
|
@@ -2774,7 +2831,7 @@
|
|
|
2774
2831
|
### Patch Changes
|
|
2775
2832
|
|
|
2776
2833
|
- 9625602: Use mastra core splitted bundles in other packages
|
|
2777
|
-
- 8769a62: Split core into
|
|
2834
|
+
- 8769a62: Split core into separate entry files
|
|
2778
2835
|
- Updated dependencies [30322ce]
|
|
2779
2836
|
- Updated dependencies [78eec7c]
|
|
2780
2837
|
- Updated dependencies [9625602]
|
package/LICENSE.md
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
Portions of this software are licensed as follows:
|
|
2
|
+
|
|
3
|
+
- All content that resides under any directory named "ee/" within this
|
|
4
|
+
repository, including but not limited to:
|
|
5
|
+
- `packages/core/src/auth/ee/`
|
|
6
|
+
- `packages/server/src/server/auth/ee/`
|
|
7
|
+
is licensed under the license defined in `ee/LICENSE`.
|
|
8
|
+
|
|
9
|
+
- All third-party components incorporated into the Mastra Software are
|
|
10
|
+
licensed under the original license provided by the owner of the
|
|
11
|
+
applicable component.
|
|
12
|
+
|
|
13
|
+
- Content outside of the above-mentioned directories or restrictions is
|
|
14
|
+
available under the "Apache License 2.0" as defined below.
|
|
15
|
+
|
|
1
16
|
# Apache License 2.0
|
|
2
17
|
|
|
3
18
|
Copyright (c) 2025 Kepler Software, Inc.
|