npm - @mastra/evals - Versions diffs - 0.14.4 → 1.0.0-beta.1 - Mend

@mastra/evals 0.14.4 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

package/CHANGELOG.md +34 -25
package/README.md +19 -159
package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
package/dist/chunk-CCLM7KPF.js.map +1 -0
package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
package/dist/chunk-TPQLLHZW.cjs.map +1 -0
package/dist/scorers/code/completeness/index.d.ts +1 -1
package/dist/scorers/code/completeness/index.d.ts.map +1 -1
package/dist/scorers/code/content-similarity/index.d.ts +1 -1
package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
package/dist/scorers/code/textual-difference/index.d.ts +1 -1
package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
package/dist/scorers/code/tone/index.d.ts +1 -1
package/dist/scorers/code/tone/index.d.ts.map +1 -1
package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
package/dist/scorers/llm/bias/index.d.ts +2 -2
package/dist/scorers/llm/bias/index.d.ts.map +1 -1
package/dist/scorers/llm/context-precision/index.d.ts +3 -3
package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
package/dist/scorers/llm/hallucination/index.d.ts +2 -2
package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
package/dist/scorers/llm/toxicity/index.d.ts +2 -2
package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
package/dist/scorers/prebuilt/index.cjs.map +1 -0
package/dist/scorers/prebuilt/index.d.ts +3 -0
package/dist/scorers/prebuilt/index.d.ts.map +1 -0
package/dist/scorers/{llm → prebuilt}/index.js +419 -15
package/dist/scorers/prebuilt/index.js.map +1 -0
package/dist/scorers/utils.cjs +21 -17
package/dist/scorers/utils.d.ts +21 -11
package/dist/scorers/utils.d.ts.map +1 -1
package/dist/scorers/utils.js +1 -1
package/package.json +15 -59
package/dist/attachListeners.d.ts +0 -4
package/dist/attachListeners.d.ts.map +0 -1
package/dist/chunk-44PMY5ES.js +0 -78
package/dist/chunk-44PMY5ES.js.map +0 -1
package/dist/chunk-7QAUEU4L.cjs +0 -10
package/dist/chunk-7QAUEU4L.cjs.map +0 -1
package/dist/chunk-EMMSS5I5.cjs +0 -37
package/dist/chunk-EMMSS5I5.cjs.map +0 -1
package/dist/chunk-G3PMV62Z.js +0 -33
package/dist/chunk-G3PMV62Z.js.map +0 -1
package/dist/chunk-IUSAD2BW.cjs +0 -19
package/dist/chunk-IUSAD2BW.cjs.map +0 -1
package/dist/chunk-KHEXN75Q.js.map +0 -1
package/dist/chunk-PWGOG6ML.cjs +0 -81
package/dist/chunk-PWGOG6ML.cjs.map +0 -1
package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
package/dist/chunk-QTWX6TKR.js +0 -8
package/dist/chunk-QTWX6TKR.js.map +0 -1
package/dist/chunk-YGTIO3J5.js +0 -17
package/dist/chunk-YGTIO3J5.js.map +0 -1
package/dist/dist-LDTK3TIP.cjs +0 -16759
package/dist/dist-LDTK3TIP.cjs.map +0 -1
package/dist/dist-OWYZEOJK.js +0 -16737
package/dist/dist-OWYZEOJK.js.map +0 -1
package/dist/evaluation.d.ts +0 -8
package/dist/evaluation.d.ts.map +0 -1
package/dist/index.cjs +0 -93
package/dist/index.cjs.map +0 -1
package/dist/index.d.ts +0 -3
package/dist/index.d.ts.map +0 -1
package/dist/index.js +0 -89
package/dist/index.js.map +0 -1
package/dist/magic-string.es-7ORA5OGR.js +0 -1305
package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
package/dist/metrics/index.d.ts +0 -4
package/dist/metrics/index.d.ts.map +0 -1
package/dist/metrics/judge/index.cjs +0 -12
package/dist/metrics/judge/index.cjs.map +0 -1
package/dist/metrics/judge/index.d.ts +0 -7
package/dist/metrics/judge/index.d.ts.map +0 -1
package/dist/metrics/judge/index.js +0 -3
package/dist/metrics/judge/index.js.map +0 -1
package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
package/dist/metrics/llm/bias/index.d.ts +0 -14
package/dist/metrics/llm/bias/index.d.ts.map +0 -1
package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/bias/prompts.d.ts +0 -14
package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
package/dist/metrics/llm/context-position/index.d.ts +0 -16
package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
package/dist/metrics/llm/context-precision/index.d.ts +0 -16
package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
package/dist/metrics/llm/hallucination/index.d.ts +0 -16
package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
package/dist/metrics/llm/index.cjs +0 -2481
package/dist/metrics/llm/index.cjs.map +0 -1
package/dist/metrics/llm/index.d.ts +0 -12
package/dist/metrics/llm/index.d.ts.map +0 -1
package/dist/metrics/llm/index.js +0 -2469
package/dist/metrics/llm/index.js.map +0 -1
package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
package/dist/metrics/llm/summarization/index.d.ts +0 -19
package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
package/dist/metrics/llm/toxicity/index.d.ts +0 -14
package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
package/dist/metrics/llm/types.d.ts +0 -7
package/dist/metrics/llm/types.d.ts.map +0 -1
package/dist/metrics/llm/utils.d.ts +0 -14
package/dist/metrics/llm/utils.d.ts.map +0 -1
package/dist/metrics/nlp/completeness/index.d.ts +0 -21
package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
package/dist/metrics/nlp/index.cjs +0 -201
package/dist/metrics/nlp/index.cjs.map +0 -1
package/dist/metrics/nlp/index.d.ts +0 -6
package/dist/metrics/nlp/index.d.ts.map +0 -1
package/dist/metrics/nlp/index.js +0 -188
package/dist/metrics/nlp/index.js.map +0 -1
package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
package/dist/metrics/nlp/tone/index.d.ts +0 -18
package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
package/dist/ratio.d.ts +0 -13
package/dist/ratio.d.ts.map +0 -1
package/dist/scorers/code/index.cjs +0 -327
package/dist/scorers/code/index.cjs.map +0 -1
package/dist/scorers/code/index.js +0 -313
package/dist/scorers/code/index.js.map +0 -1
package/dist/scorers/llm/index.cjs.map +0 -1
package/dist/scorers/llm/index.js.map +0 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,49 +1,58 @@
 # @mastra/evals
-## 0.14.4
+## 1.0.0-beta.1
 ### Patch Changes
-- Remove difflib ([#9931](https://github.com/mastra-ai/mastra/pull/9931))
+- Remove unused dependencies ([#10019](https://github.com/mastra-ai/mastra/pull/10019))
-- Updated dependencies [[`0a0aa87`](https://github.com/mastra-ai/mastra/commit/0a0aa87910dd9234ae11e8146fbf54d71f0b1516), [`56bbbd0`](https://github.com/mastra-ai/mastra/commit/56bbbd0eb4510455ff03d6bf2827fdbd307938be), [`d576fd8`](https://github.com/mastra-ai/mastra/commit/d576fd8f2a3c61bcf2f7d5d2bdfb496d442c46c2), [`5e48406`](https://github.com/mastra-ai/mastra/commit/5e48406766fa94e2b7c56b70fb7d6068cf7f3989), [`7fcce62`](https://github.com/mastra-ai/mastra/commit/7fcce62880c3525fbf752d59c0ac2c478cffe024), [`16a324f`](https://github.com/mastra-ai/mastra/commit/16a324f8c30a07d0d899bc2e4e7998c6b40a4cb6), [`26aee16`](https://github.com/mastra-ai/mastra/commit/26aee160149e7acb84a533bf45631aaed6dd7077), [`b063a81`](https://github.com/mastra-ai/mastra/commit/b063a8144176915a766ea15888e1e8a06a020776), [`5ff9462`](https://github.com/mastra-ai/mastra/commit/5ff9462691c80a6841b014bcc68f6a85c3fd3fbf)]:
-  - @mastra/core@0.24.1
+- Updated dependencies [[`2319326`](https://github.com/mastra-ai/mastra/commit/2319326f8c64e503a09bbcf14be2dd65405445e0), [`d629361`](https://github.com/mastra-ai/mastra/commit/d629361a60f6565b5bfb11976fdaf7308af858e2), [`08c31c1`](https://github.com/mastra-ai/mastra/commit/08c31c188ebccd598acaf55e888b6397d01f7eae), [`fd3d338`](https://github.com/mastra-ai/mastra/commit/fd3d338a2c362174ed5b383f1f011ad9fb0302aa), [`c30400a`](https://github.com/mastra-ai/mastra/commit/c30400a49b994b1b97256fe785eb6c906fc2b232), [`69e0a87`](https://github.com/mastra-ai/mastra/commit/69e0a878896a2da9494945d86e056a5f8f05b851), [`01f8878`](https://github.com/mastra-ai/mastra/commit/01f88783de25e4de048c1c8aace43e26373c6ea5), [`4c77209`](https://github.com/mastra-ai/mastra/commit/4c77209e6c11678808b365d545845918c40045c8), [`d827d08`](https://github.com/mastra-ai/mastra/commit/d827d0808ffe1f3553a84e975806cc989b9735dd), [`23c10a1`](https://github.com/mastra-ai/mastra/commit/23c10a1efdd9a693c405511ab2dc8a1236603162), [`676ccc7`](https://github.com/mastra-ai/mastra/commit/676ccc7fe92468d2d45d39c31a87825c89fd1ea0), [`c10398d`](https://github.com/mastra-ai/mastra/commit/c10398d5b88f1d4af556f4267ff06f1d11e89179), [`00c2387`](https://github.com/mastra-ai/mastra/commit/00c2387f5f04a365316f851e58666ac43f8c4edf), [`ad6250d`](https://github.com/mastra-ai/mastra/commit/ad6250dbdaad927e29f74a27b83f6c468b50a705), [`3a73998`](https://github.com/mastra-ai/mastra/commit/3a73998fa4ebeb7f3dc9301afe78095fc63e7999), [`e16d553`](https://github.com/mastra-ai/mastra/commit/e16d55338403c7553531cc568125c63d53653dff), [`4d59f58`](https://github.com/mastra-ai/mastra/commit/4d59f58de2d90d6e2810a19d4518e38ddddb9038), [`e1bb9c9`](https://github.com/mastra-ai/mastra/commit/e1bb9c94b4eb68b019ae275981be3feb769b5365), [`351a11f`](https://github.com/mastra-ai/mastra/commit/351a11fcaf2ed1008977fa9b9a489fc422e51cd4)]:
+  - @mastra/core@1.0.0-beta.3
-## 0.14.4-alpha.0
+## 1.0.0-beta.0
-### Patch Changes
-- Remove difflib ([#9931](https://github.com/mastra-ai/mastra/pull/9931))
+### Major Changes
-- Updated dependencies [[`0a0aa87`](https://github.com/mastra-ai/mastra/commit/0a0aa87910dd9234ae11e8146fbf54d71f0b1516), [`56bbbd0`](https://github.com/mastra-ai/mastra/commit/56bbbd0eb4510455ff03d6bf2827fdbd307938be), [`d576fd8`](https://github.com/mastra-ai/mastra/commit/d576fd8f2a3c61bcf2f7d5d2bdfb496d442c46c2), [`5e48406`](https://github.com/mastra-ai/mastra/commit/5e48406766fa94e2b7c56b70fb7d6068cf7f3989), [`7fcce62`](https://github.com/mastra-ai/mastra/commit/7fcce62880c3525fbf752d59c0ac2c478cffe024), [`16a324f`](https://github.com/mastra-ai/mastra/commit/16a324f8c30a07d0d899bc2e4e7998c6b40a4cb6), [`26aee16`](https://github.com/mastra-ai/mastra/commit/26aee160149e7acb84a533bf45631aaed6dd7077), [`b063a81`](https://github.com/mastra-ai/mastra/commit/b063a8144176915a766ea15888e1e8a06a020776), [`5ff9462`](https://github.com/mastra-ai/mastra/commit/5ff9462691c80a6841b014bcc68f6a85c3fd3fbf)]:
-  - @mastra/core@0.24.1-alpha.0
+- Moving scorers under the eval domain, api method consistency, prebuilt evals, scorers require ids. ([#9589](https://github.com/mastra-ai/mastra/pull/9589))
-## 0.14.3
-### Patch Changes
+- **BREAKING CHANGE**: Scorers for Agents will now use `MastraDBMessage` instead of `UIMessage` ([#9702](https://github.com/mastra-ai/mastra/pull/9702))
+  - Scorer input/output types now use `MastraDBMessage[]` with nested `content` object structure
+  - Added `getTextContentFromMastraDBMessage()` helper function to extract text content from `MastraDBMessage` objects
+  - Added `createTestMessage()` helper function for creating `MastraDBMessage` objects in tests with optional tool invocations support
+  - Updated `extractToolCalls()` to access tool invocations from nested `content` structure
+  - Updated `getUserMessageFromRunInput()` and `getAssistantMessageFromRunOutput()` to use new message structure
+  - Removed `createUIMessage()`
-- update peerdeps ([`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc))
+- Bump minimum required Node.js version to 22.13.0 ([#9706](https://github.com/mastra-ai/mastra/pull/9706))
-- Updated dependencies [[`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc), [`6d7e90d`](https://github.com/mastra-ai/mastra/commit/6d7e90db09713e6250f4d6c3d3cff1b4740e50f9), [`f78b908`](https://github.com/mastra-ai/mastra/commit/f78b9080e11af765969b36b4a619761056030840), [`23c2614`](https://github.com/mastra-ai/mastra/commit/23c26140fdbf04b8c59e8d7d52106d67dad962ec), [`e365eda`](https://github.com/mastra-ai/mastra/commit/e365eda45795b43707310531cac1e2ce4e5a0712)]:
-  - @mastra/core@0.24.0
+- Rename RuntimeContext to RequestContext ([#9511](https://github.com/mastra-ai/mastra/pull/9511))
-## 0.14.3-alpha.0
+- Remove various deprecated APIs from agent class. ([#9257](https://github.com/mastra-ai/mastra/pull/9257))
+  - `agent.llm` → `agent.getLLM()`
+  - `agent.tools` → `agent.getTools()`
+  - `agent.instructions` → `agent.getInstructions()`
+  - `agent.speak()` → `agent.voice.speak()`
+  - `agent.getSpeakers()` → `agent.voice.getSpeakers()`
+  - `agent.listen` → `agent.voice.listen()`
+  - `agent.fetchMemory` → `(await agent.getMemory()).query()`
+  - `agent.toStep` → Add agent directly to the step, workflows handle the transformation
-### Patch Changes
+- Mark as stable ([`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc))
-- update peerdeps ([`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc))
+- Remove legacy evals from Mastra ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
-- Updated dependencies [[`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc), [`6d7e90d`](https://github.com/mastra-ai/mastra/commit/6d7e90db09713e6250f4d6c3d3cff1b4740e50f9), [`f78b908`](https://github.com/mastra-ai/mastra/commit/f78b9080e11af765969b36b4a619761056030840), [`23c2614`](https://github.com/mastra-ai/mastra/commit/23c26140fdbf04b8c59e8d7d52106d67dad962ec), [`e365eda`](https://github.com/mastra-ai/mastra/commit/e365eda45795b43707310531cac1e2ce4e5a0712)]:
-  - @mastra/core@0.24.0-alpha.0
+### Minor Changes
-## 0.14.2
+- Update peer dependencies to match core package version bump (1.0.0) ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
 ### Patch Changes
-- Fix peerdependencies ([`eb7c1c8`](https://github.com/mastra-ai/mastra/commit/eb7c1c8c592d8fb16dfd250e337d9cdc73c8d5de))
+- Update peer dependencies to match core package version bump (1.0.0) ([#9237](https://github.com/mastra-ai/mastra/pull/9237))
-- Updated dependencies []:
-  - @mastra/core@0.23.1
+- Remove difflib ([#9756](https://github.com/mastra-ai/mastra/pull/9756))
+- Updated dependencies [[`39c9743`](https://github.com/mastra-ai/mastra/commit/39c97432d084294f8ba85fbf3ef28098ff21459e), [`f743dbb`](https://github.com/mastra-ai/mastra/commit/f743dbb8b40d1627b5c10c0e6fc154f4ebb6e394), [`fec5129`](https://github.com/mastra-ai/mastra/commit/fec5129de7fc64423ea03661a56cef31dc747a0d), [`0491e7c`](https://github.com/mastra-ai/mastra/commit/0491e7c9b714cb0ba22187ee062147ec2dd7c712), [`f6f4903`](https://github.com/mastra-ai/mastra/commit/f6f4903397314f73362061dc5a3e8e7c61ea34aa), [`0e8ed46`](https://github.com/mastra-ai/mastra/commit/0e8ed467c54d6901a6a365f270ec15d6faadb36c), [`6c049d9`](https://github.com/mastra-ai/mastra/commit/6c049d94063fdcbd5b81c4912a2bf82a92c9cc0b), [`2f897df`](https://github.com/mastra-ai/mastra/commit/2f897df208508f46f51b7625e5dd20c37f93e0e3), [`3443770`](https://github.com/mastra-ai/mastra/commit/3443770662df8eb24c9df3589b2792d78cfcb811), [`f0a07e0`](https://github.com/mastra-ai/mastra/commit/f0a07e0111b3307c5fabfa4094c5c2cfb734fbe6), [`aaa40e7`](https://github.com/mastra-ai/mastra/commit/aaa40e788628b319baa8e889407d11ad626547fa), [`1521d71`](https://github.com/mastra-ai/mastra/commit/1521d716e5daedc74690c983fbd961123c56756b), [`9e1911d`](https://github.com/mastra-ai/mastra/commit/9e1911db2b4db85e0e768c3f15e0d61e319869f6), [`ebac155`](https://github.com/mastra-ai/mastra/commit/ebac15564a590117db7078233f927a7e28a85106), [`dd1c38d`](https://github.com/mastra-ai/mastra/commit/dd1c38d1b75f1b695c27b40d8d9d6ed00d5e0f6f), [`5948e6a`](https://github.com/mastra-ai/mastra/commit/5948e6a5146c83666ba3f294b2be576c82a513fb), [`8940859`](https://github.com/mastra-ai/mastra/commit/89408593658199b4ad67f7b65e888f344e64a442), [`e629310`](https://github.com/mastra-ai/mastra/commit/e629310f1a73fa236d49ec7a1d1cceb6229dc7cc), [`4c6b492`](https://github.com/mastra-ai/mastra/commit/4c6b492c4dd591c6a592520c1f6855d6e936d71f), [`dff01d8`](https://github.com/mastra-ai/mastra/commit/dff01d81ce1f4e4087cfac20fa868e6db138dd14), [`9d819d5`](https://github.com/mastra-ai/mastra/commit/9d819d54b61481639f4008e4694791bddf187edd), [`71c8d6c`](https://github.com/mastra-ai/mastra/commit/71c8d6c161253207b2b9588bdadb7eed604f7253), [`6179a9b`](https://github.com/mastra-ai/mastra/commit/6179a9ba36ffac326de3cc3c43cdc8028d37c251), [`00f4921`](https://github.com/mastra-ai/mastra/commit/00f4921dd2c91a1e5446799599ef7116a8214a1a), [`ca8041c`](https://github.com/mastra-ai/mastra/commit/ca8041cce0379fda22ed293a565bcb5b6ddca68a), [`7051bf3`](https://github.com/mastra-ai/mastra/commit/7051bf38b3b122a069008f861f7bfc004a6d9f6e), [`a8f1494`](https://github.com/mastra-ai/mastra/commit/a8f1494f4bbdc2770bcf327d4c7d869e332183f1), [`0793497`](https://github.com/mastra-ai/mastra/commit/079349753620c40246ffd673e3f9d7d9820beff3), [`5df9cce`](https://github.com/mastra-ai/mastra/commit/5df9cce1a753438413f64c11eeef8f845745c2a8), [`a854ede`](https://github.com/mastra-ai/mastra/commit/a854ede62bf5ac0945a624ac48913dd69c73aabf), [`c576fc0`](https://github.com/mastra-ai/mastra/commit/c576fc0b100b2085afded91a37c97a0ea0ec09c7), [`3defc80`](https://github.com/mastra-ai/mastra/commit/3defc80cf2b88a1b7fc1cc4ddcb91e982a614609), [`16153fe`](https://github.com/mastra-ai/mastra/commit/16153fe7eb13c99401f48e6ca32707c965ee28b9), [`9f4a683`](https://github.com/mastra-ai/mastra/commit/9f4a6833e88b52574665c028fd5508ad5c2f6004), [`bc94344`](https://github.com/mastra-ai/mastra/commit/bc943444a1342d8a662151b7bce1df7dae32f59c), [`57d157f`](https://github.com/mastra-ai/mastra/commit/57d157f0b163a95c3e6c9eae31bdb11d1bfc64f9), [`903f67d`](https://github.com/mastra-ai/mastra/commit/903f67d184504a273893818c02b961f5423a79ad), [`2a90c55`](https://github.com/mastra-ai/mastra/commit/2a90c55a86a9210697d5adaab5ee94584b079adc), [`eb09742`](https://github.com/mastra-ai/mastra/commit/eb09742197f66c4c38154c3beec78313e69760b2), [`96d35f6`](https://github.com/mastra-ai/mastra/commit/96d35f61376bc2b1bf148648a2c1985bd51bef55), [`5cbe88a`](https://github.com/mastra-ai/mastra/commit/5cbe88aefbd9f933bca669fd371ea36bf939ac6d), [`a1bd7b8`](https://github.com/mastra-ai/mastra/commit/a1bd7b8571db16b94eb01588f451a74758c96d65), [`d78b38d`](https://github.com/mastra-ai/mastra/commit/d78b38d898fce285260d3bbb4befade54331617f), [`0633100`](https://github.com/mastra-ai/mastra/commit/0633100a911ad22f5256471bdf753da21c104742), [`c710c16`](https://github.com/mastra-ai/mastra/commit/c710c1652dccfdc4111c8412bca7a6bb1d48b441), [`354ad0b`](https://github.com/mastra-ai/mastra/commit/354ad0b7b1b8183ac567f236a884fc7ede6d7138), [`cfae733`](https://github.com/mastra-ai/mastra/commit/cfae73394f4920635e6c919c8e95ff9a0788e2e5), [`e3dfda7`](https://github.com/mastra-ai/mastra/commit/e3dfda7b11bf3b8c4bb55637028befb5f387fc74), [`844ea5d`](https://github.com/mastra-ai/mastra/commit/844ea5dc0c248961e7bf73629ae7dcff503e853c), [`398fde3`](https://github.com/mastra-ai/mastra/commit/398fde3f39e707cda79372cdae8f9870e3b57c8d), [`f0f8f12`](https://github.com/mastra-ai/mastra/commit/f0f8f125c308f2d0fd36942ef652fd852df7522f), [`0d7618b`](https://github.com/mastra-ai/mastra/commit/0d7618bc650bf2800934b243eca5648f4aeed9c2), [`7b763e5`](https://github.com/mastra-ai/mastra/commit/7b763e52fc3eaf699c2a99f2adf418dd46e4e9a5), [`d36cfbb`](https://github.com/mastra-ai/mastra/commit/d36cfbbb6565ba5f827883cc9bb648eb14befdc1), [`3697853`](https://github.com/mastra-ai/mastra/commit/3697853deeb72017d90e0f38a93c1e29221aeca0), [`b2e45ec`](https://github.com/mastra-ai/mastra/commit/b2e45eca727a8db01a81ba93f1a5219c7183c839), [`d6d49f7`](https://github.com/mastra-ai/mastra/commit/d6d49f7b8714fa19a52ff9c7cf7fb7e73751901e), [`a534e95`](https://github.com/mastra-ai/mastra/commit/a534e9591f83b3cc1ebff99c67edf4cda7bf81d3), [`9d0e7fe`](https://github.com/mastra-ai/mastra/commit/9d0e7feca8ed98de959f53476ee1456073673348), [`53d927c`](https://github.com/mastra-ai/mastra/commit/53d927cc6f03bff33655b7e2b788da445a08731d), [`3f2faf2`](https://github.com/mastra-ai/mastra/commit/3f2faf2e2d685d6c053cc5af1bf9fedf267b2ce5), [`22f64bc`](https://github.com/mastra-ai/mastra/commit/22f64bc1d37149480b58bf2fefe35b79a1e3e7d5), [`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc), [`b7959e6`](https://github.com/mastra-ai/mastra/commit/b7959e6e25a46b480f9ea2217c4c6c588c423791), [`bda6370`](https://github.com/mastra-ai/mastra/commit/bda637009360649aaf579919e7873e33553c273e), [`d7acd8e`](https://github.com/mastra-ai/mastra/commit/d7acd8e987b5d7eff4fd98b0906c17c06a2e83d5), [`c7f1f7d`](https://github.com/mastra-ai/mastra/commit/c7f1f7d24f61f247f018cc2d1f33bf63212959a7), [`0bddc6d`](https://github.com/mastra-ai/mastra/commit/0bddc6d8dbd6f6008c0cba2e4960a2da75a55af1), [`735d8c1`](https://github.com/mastra-ai/mastra/commit/735d8c1c0d19fbc09e6f8b66cf41bc7655993838), [`acf322e`](https://github.com/mastra-ai/mastra/commit/acf322e0f1fd0189684cf529d91c694bea918a45), [`c942802`](https://github.com/mastra-ai/mastra/commit/c942802a477a925b01859a7b8688d4355715caaa), [`a0c8c1b`](https://github.com/mastra-ai/mastra/commit/a0c8c1b87d4fee252aebda73e8637fbe01d761c9), [`cc34739`](https://github.com/mastra-ai/mastra/commit/cc34739c34b6266a91bea561119240a7acf47887), [`c218bd3`](https://github.com/mastra-ai/mastra/commit/c218bd3759e32423735b04843a09404572631014), [`2c4438b`](https://github.com/mastra-ai/mastra/commit/2c4438b87817ab7eed818c7990fef010475af1a3), [`2b8893c`](https://github.com/mastra-ai/mastra/commit/2b8893cb108ef9acb72ee7835cd625610d2c1a4a), [`8e5c75b`](https://github.com/mastra-ai/mastra/commit/8e5c75bdb1d08a42d45309a4c72def4b6890230f), [`e59e0d3`](https://github.com/mastra-ai/mastra/commit/e59e0d32afb5fcf2c9f3c00c8f81f6c21d3a63fa), [`fa8409b`](https://github.com/mastra-ai/mastra/commit/fa8409bc39cfd8ba6643b9db5269b90b22e2a2f7), [`173c535`](https://github.com/mastra-ai/mastra/commit/173c535c0645b0da404fe09f003778f0b0d4e019)]:
+  - @mastra/core@1.0.0-beta.0
 ## 0.14.1

package/README.md CHANGED Viewed

@@ -1,6 +1,11 @@
 # @mastra/evals
-A comprehensive evaluation framework for assessing AI model outputs across multiple dimensions.
+`@mastra/evals` ships a collection of scoring utilities you can run locally or inside your own evaluation pipelines. These scorers come in two flavors:
+- **LLM scorers** – leverage a judge model (e.g. OpenAI, Anthropic) to rate responses for qualities such as faithfulness or toxicity.
+- **Code/NLP scorers** – deterministic heuristics (keyword coverage, similarity, etc.) that do not require an external model.
+The scorers do not persist results or integrate with Mastra Storage; you decide where and how to record outcomes.
 ## Installation
@@ -8,171 +13,26 @@ A comprehensive evaluation framework for assessing AI model outputs across multi
 npm install @mastra/evals
 ```
-## Overview
-`@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
-## Features
-### LLM-Based Metrics
-1. **Answer Relevancy**
-   - Evaluates how well an answer addresses the input question
-   - Considers uncertainty weighting for more nuanced scoring
-   - Returns detailed reasoning for scores
-2. **Bias Detection**
-   - Identifies potential biases in model outputs
-   - Analyzes opinions and statements for bias indicators
-   - Provides explanations for detected biases
-   - Configurable scoring scale
-3. **Context Precision & Relevancy**
-   - Assesses how well responses use provided context
-   - Evaluates accuracy of context usage
-   - Measures relevance of context to the response
-   - Analyzes context positioning in responses
-4. **Faithfulness**
-   - Verifies that responses are faithful to provided context
-   - Detects hallucinations or fabricated information
-   - Evaluates claims against provided context
-   - Provides detailed analysis of faithfulness breaches
-5. **Prompt Alignment**
-   - Measures how well responses follow given instructions
-   - Evaluates adherence to multiple instruction criteria
-   - Provides per-instruction scoring
-   - Supports custom instruction sets
-6. **Toxicity**
-   - Detects toxic or harmful content in responses
-   - Provides detailed reasoning for toxicity verdicts
-   - Configurable scoring thresholds
-   - Considers both input and output context
+## Quick Start
-### NLP-Based Metrics
+```ts
+import { createFaithfulnessScorer, createContentSimilarityScorer } from '@mastra/evals/scorers/prebuilt';
-1. **Completeness**
-   - Analyzes structural completeness of responses
-   - Identifies missing elements from input requirements
-   - Provides detailed element coverage analysis
-   - Tracks input-output element ratios
-2. **Content Similarity**
-   - Measures text similarity between inputs and outputs
-   - Configurable for case and whitespace sensitivity
-   - Returns normalized similarity scores
-   - Uses string comparison algorithms for accuracy
-3. **Keyword Coverage**
-   - Tracks presence of key terms from input in output
-   - Provides detailed keyword matching statistics
-   - Calculates coverage ratios
-   - Useful for ensuring comprehensive responses
-## Usage
-### Basic Example
-```typescript
-import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
-// Initialize metrics
-const similarityMetric = new ContentSimilarityMetric({
-  ignoreCase: true,
-  ignoreWhitespace: true,
+const faithfulness = createFaithfulnessScorer({
+   model: 'openai/gpt-4o-mini')
 });
-const toxicityMetric = new ToxicityMetric({
-  model: openai('gpt-4'),
-  scale: 1, // Optional: adjust scoring scale
-});
-// Evaluate outputs
-const input = 'What is the capital of France?';
-const output = 'Paris is the capital of France.';
-const similarityResult = await similarityMetric.measure(input, output);
-const toxicityResult = await toxicityMetric.measure(input, output);
-console.log('Similarity Score:', similarityResult.score);
-console.log('Toxicity Score:', toxicityResult.score);
-```
+const similarity = createContentSimilarityScorer({ ignoreCase: true });
-### Context-Aware Evaluation
+const answer = 'Paris is the capital of France.';
+const context = ['Paris is the capital of France', 'France is in Europe'];
-```typescript
-import { FaithfulnessMetric } from '@mastra/evals';
+const faithfulnessScore = await faithfulness.score({ answer, context });
-// Initialize with context
-const faithfulnessMetric = new FaithfulnessMetric({
-  model: openai('gpt-4'),
-  context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
-  scale: 1,
+const similarityScore = similarity.score({
+   input: context[0],
+   output: answer
 });
-// Evaluate response against context
-const result = await faithfulnessMetric.measure(
-  'Tell me about Paris',
-  'Paris is the capital of France with 2.2 million residents',
-);
-console.log('Faithfulness Score:', result.score);
-console.log('Reasoning:', result.reason);
+console.log({ faithfulnessScore, similarityScore });
 ```
-## Metric Results
-Each metric returns a standardized result object containing:
-- `score`: Normalized score (typically 0-1)
-- `info`: Detailed information about the evaluation
-- Additional metric-specific data (e.g., matched keywords, missing elements)
-Some metrics also provide:
-- `reason`: Detailed explanation of the score
-- `verdicts`: Individual judgments that contributed to the final score
-## Telemetry and Logging
-The package includes built-in telemetry and logging capabilities:
-- Automatic evaluation tracking through Mastra Storage
-- Integration with OpenTelemetry for performance monitoring
-- Detailed evaluation traces for debugging
-```typescript
-import { attachListeners } from '@mastra/evals';
-// Enable basic evaluation tracking
-await attachListeners();
-// Store evals in Mastra Storage (if storage is enabled)
-await attachListeners(mastra);
-// Note: When using in-memory storage, evaluations are isolated to the test process.
-// When using file storage, evaluations are persisted and can be queried later.
-```
-## Environment Variables
-Required for LLM-based metrics:
-- `OPENAI_API_KEY`: For OpenAI model access
-- Additional provider keys as needed (Cohere, Anthropic, etc.)
-## Package Exports
-```typescript
-// Main package exports
-import { evaluate } from '@mastra/evals';
-// NLP-specific metrics
-import { ContentSimilarityMetric } from '@mastra/evals/nlp';
-```
-## Related Packages
-- `@mastra/core`: Core framework functionality
-- `@mastra/engine`: LLM execution engine
-- `@mastra/mcp`: Model Context Protocol integration

package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} RENAMED Viewed

@@ -1,22 +1,33 @@
-import { RuntimeContext } from '@mastra/core/runtime-context';
+import { RequestContext } from '@mastra/core/request-context';
 // src/scorers/utils.ts
+function getTextContentFromMastraDBMessage(message) {
+  if (typeof message.content.content === "string" && message.content.content !== "") {
+    return message.content.content;
+  }
+  if (message.content.parts && Array.isArray(message.content.parts)) {
+    const textParts = message.content.parts.filter((p) => p.type === "text");
+    return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
+  }
+  return "";
+}
 var roundToTwoDecimals = (num) => {
   return Math.round((num + Number.EPSILON) * 100) / 100;
 };
 function isCloserTo(value, target1, target2) {
   return Math.abs(value - target1) < Math.abs(value - target2);
 }
-var createTestRun = (input, output, additionalContext, runtimeContext) => {
+var createTestRun = (input, output, additionalContext, requestContext) => {
   return {
     input: [{ role: "user", content: input }],
     output: { role: "assistant", text: output },
     additionalContext: additionalContext ?? {},
-    runtimeContext: runtimeContext ?? {}
+    requestContext: requestContext ?? {}
   };
 };
 var getUserMessageFromRunInput = (input) => {
-  return input?.inputMessages.find(({ role }) => role === "user")?.content;
+  const message = input?.inputMessages.find(({ role }) => role === "user");
+  return message ? getTextContentFromMastraDBMessage(message) : void 0;
 };
 var getSystemMessagesFromRunInput = (input) => {
   const systemMessages = [];
@@ -48,7 +59,8 @@ var getCombinedSystemPrompt = (input) => {
   return systemMessages.join("\n\n");
 };
 var getAssistantMessageFromRunOutput = (output) => {
-  return output?.find(({ role }) => role === "assistant")?.content;
+  const message = output?.find(({ role }) => role === "assistant");
+  return message ? getTextContentFromMastraDBMessage(message) : void 0;
 };
 var createToolInvocation = ({
   toolCallId,
@@ -65,27 +77,39 @@ var createToolInvocation = ({
     state
   };
 };
-var createUIMessage = ({
+function createTestMessage({
   content,
   role,
   id = "test-message",
   toolInvocations = []
-}) => {
+}) {
   return {
     id,
     role,
-    content,
-    parts: [{ type: "text", text: content }],
-    toolInvocations
+    content: {
+      format: 2,
+      parts: [{ type: "text", text: content }],
+      content,
+      ...toolInvocations.length > 0 && {
+        toolInvocations: toolInvocations.map((ti) => ({
+          toolCallId: ti.toolCallId,
+          toolName: ti.toolName,
+          args: ti.args,
+          result: ti.result,
+          state: ti.state
+        }))
+      }
+    },
+    createdAt: /* @__PURE__ */ new Date()
   };
-};
+}
 var createAgentTestRun = ({
   inputMessages = [],
   output,
   rememberedMessages = [],
   systemMessages = [],
   taggedSystemMessages = {},
-  runtimeContext = new RuntimeContext(),
+  requestContext = new RequestContext(),
   runId = crypto.randomUUID()
 }) => {
   return {
@@ -96,7 +120,7 @@ var createAgentTestRun = ({
       taggedSystemMessages
     },
     output,
-    runtimeContext,
+    requestContext,
     runId
   };
 };
@@ -105,9 +129,9 @@ function extractToolCalls(output) {
   const toolCallInfos = [];
   for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
     const message = output[messageIndex];
-    if (message?.toolInvocations) {
-      for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
-        const invocation = message.toolInvocations[invocationIndex];
+    if (message?.content?.toolInvocations) {
+      for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
+        const invocation = message.content.toolInvocations[invocationIndex];
         if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
           toolCalls.push(invocation.toolName);
           toolCallInfos.push({
@@ -123,12 +147,12 @@ function extractToolCalls(output) {
   return { tools: toolCalls, toolCallInfos };
 }
 var extractInputMessages = (runInput) => {
-  return runInput?.inputMessages?.map((msg) => msg.content) || [];
+  return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
 };
 var extractAgentResponseMessages = (runOutput) => {
-  return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg.content);
+  return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
 };
-export { createAgentTestRun, createTestRun, createToolInvocation, createUIMessage, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
-//# sourceMappingURL=chunk-KHEXN75Q.js.map
-//# sourceMappingURL=chunk-KHEXN75Q.js.map
+export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
+//# sourceMappingURL=chunk-CCLM7KPF.js.map
+//# sourceMappingURL=chunk-CCLM7KPF.js.map

package/dist/chunk-CCLM7KPF.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-CCLM7KPF.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}

package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} RENAMED Viewed

@@ -1,24 +1,35 @@
 'use strict';
-var runtimeContext = require('@mastra/core/runtime-context');
+var requestContext = require('@mastra/core/request-context');
 // src/scorers/utils.ts
+function getTextContentFromMastraDBMessage(message) {
+  if (typeof message.content.content === "string" && message.content.content !== "") {
+    return message.content.content;
+  }
+  if (message.content.parts && Array.isArray(message.content.parts)) {
+    const textParts = message.content.parts.filter((p) => p.type === "text");
+    return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
+  }
+  return "";
+}
 var roundToTwoDecimals = (num) => {
   return Math.round((num + Number.EPSILON) * 100) / 100;
 };
 function isCloserTo(value, target1, target2) {
   return Math.abs(value - target1) < Math.abs(value - target2);
 }
-var createTestRun = (input, output, additionalContext, runtimeContext) => {
+var createTestRun = (input, output, additionalContext, requestContext) => {
   return {
     input: [{ role: "user", content: input }],
     output: { role: "assistant", text: output },
     additionalContext: additionalContext ?? {},
-    runtimeContext: runtimeContext ?? {}
+    requestContext: requestContext ?? {}
   };
 };
 var getUserMessageFromRunInput = (input) => {
-  return input?.inputMessages.find(({ role }) => role === "user")?.content;
+  const message = input?.inputMessages.find(({ role }) => role === "user");
+  return message ? getTextContentFromMastraDBMessage(message) : void 0;
 };
 var getSystemMessagesFromRunInput = (input) => {
   const systemMessages = [];
@@ -50,7 +61,8 @@ var getCombinedSystemPrompt = (input) => {
   return systemMessages.join("\n\n");
 };
 var getAssistantMessageFromRunOutput = (output) => {
-  return output?.find(({ role }) => role === "assistant")?.content;
+  const message = output?.find(({ role }) => role === "assistant");
+  return message ? getTextContentFromMastraDBMessage(message) : void 0;
 };
 var createToolInvocation = ({
   toolCallId,
@@ -67,27 +79,39 @@ var createToolInvocation = ({
     state
   };
 };
-var createUIMessage = ({
+function createTestMessage({
   content,
   role,
   id = "test-message",
   toolInvocations = []
-}) => {
+}) {
   return {
     id,
     role,
-    content,
-    parts: [{ type: "text", text: content }],
-    toolInvocations
+    content: {
+      format: 2,
+      parts: [{ type: "text", text: content }],
+      content,
+      ...toolInvocations.length > 0 && {
+        toolInvocations: toolInvocations.map((ti) => ({
+          toolCallId: ti.toolCallId,
+          toolName: ti.toolName,
+          args: ti.args,
+          result: ti.result,
+          state: ti.state
+        }))
+      }
+    },
+    createdAt: /* @__PURE__ */ new Date()
   };
-};
+}
 var createAgentTestRun = ({
   inputMessages = [],
   output,
   rememberedMessages = [],
   systemMessages = [],
   taggedSystemMessages = {},
-  runtimeContext: runtimeContext$1 = new runtimeContext.RuntimeContext(),
+  requestContext: requestContext$1 = new requestContext.RequestContext(),
   runId = crypto.randomUUID()
 }) => {
   return {
@@ -98,7 +122,7 @@ var createAgentTestRun = ({
       taggedSystemMessages
     },
     output,
-    runtimeContext: runtimeContext$1,
+    requestContext: requestContext$1,
     runId
   };
 };
@@ -107,9 +131,9 @@ function extractToolCalls(output) {
   const toolCallInfos = [];
   for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
     const message = output[messageIndex];
-    if (message?.toolInvocations) {
-      for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
-        const invocation = message.toolInvocations[invocationIndex];
+    if (message?.content?.toolInvocations) {
+      for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
+        const invocation = message.content.toolInvocations[invocationIndex];
         if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
           toolCalls.push(invocation.toolName);
           toolCallInfos.push({
@@ -125,24 +149,25 @@ function extractToolCalls(output) {
   return { tools: toolCalls, toolCallInfos };
 }
 var extractInputMessages = (runInput) => {
-  return runInput?.inputMessages?.map((msg) => msg.content) || [];
+  return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
 };
 var extractAgentResponseMessages = (runOutput) => {
-  return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg.content);
+  return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
 };
 exports.createAgentTestRun = createAgentTestRun;
+exports.createTestMessage = createTestMessage;
 exports.createTestRun = createTestRun;
 exports.createToolInvocation = createToolInvocation;
-exports.createUIMessage = createUIMessage;
 exports.extractAgentResponseMessages = extractAgentResponseMessages;
 exports.extractInputMessages = extractInputMessages;
 exports.extractToolCalls = extractToolCalls;
 exports.getAssistantMessageFromRunOutput = getAssistantMessageFromRunOutput;
 exports.getCombinedSystemPrompt = getCombinedSystemPrompt;
 exports.getSystemMessagesFromRunInput = getSystemMessagesFromRunInput;
+exports.getTextContentFromMastraDBMessage = getTextContentFromMastraDBMessage;
 exports.getUserMessageFromRunInput = getUserMessageFromRunInput;
 exports.isCloserTo = isCloserTo;
 exports.roundToTwoDecimals = roundToTwoDecimals;
-//# sourceMappingURL=chunk-QKR2PMLZ.cjs.map
-//# sourceMappingURL=chunk-QKR2PMLZ.cjs.map
+//# sourceMappingURL=chunk-TPQLLHZW.cjs.map
+//# sourceMappingURL=chunk-TPQLLHZW.cjs.map