@mastra/evals 0.14.3 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/CHANGELOG.md +32 -14
  2. package/README.md +19 -159
  3. package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
  4. package/dist/chunk-CCLM7KPF.js.map +1 -0
  5. package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
  6. package/dist/chunk-TPQLLHZW.cjs.map +1 -0
  7. package/dist/scorers/code/completeness/index.d.ts +1 -1
  8. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  9. package/dist/scorers/code/content-similarity/index.d.ts +1 -1
  10. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  11. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
  12. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  13. package/dist/scorers/code/textual-difference/index.d.ts +1 -1
  14. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  15. package/dist/scorers/code/tone/index.d.ts +1 -1
  16. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
  18. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
  20. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  21. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  22. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/bias/index.d.ts +2 -2
  24. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-precision/index.d.ts +3 -3
  26. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
  28. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
  30. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  32. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  33. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  34. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  35. package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
  36. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +2 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
  42. package/dist/scorers/prebuilt/index.cjs.map +1 -0
  43. package/dist/scorers/prebuilt/index.d.ts +3 -0
  44. package/dist/scorers/prebuilt/index.d.ts.map +1 -0
  45. package/dist/scorers/{llm → prebuilt}/index.js +419 -15
  46. package/dist/scorers/prebuilt/index.js.map +1 -0
  47. package/dist/scorers/utils.cjs +21 -17
  48. package/dist/scorers/utils.d.ts +21 -11
  49. package/dist/scorers/utils.d.ts.map +1 -1
  50. package/dist/scorers/utils.js +1 -1
  51. package/package.json +12 -58
  52. package/dist/attachListeners.d.ts +0 -4
  53. package/dist/attachListeners.d.ts.map +0 -1
  54. package/dist/chunk-7QAUEU4L.cjs +0 -10
  55. package/dist/chunk-7QAUEU4L.cjs.map +0 -1
  56. package/dist/chunk-EMMSS5I5.cjs +0 -37
  57. package/dist/chunk-EMMSS5I5.cjs.map +0 -1
  58. package/dist/chunk-G3PMV62Z.js +0 -33
  59. package/dist/chunk-G3PMV62Z.js.map +0 -1
  60. package/dist/chunk-IUSAD2BW.cjs +0 -19
  61. package/dist/chunk-IUSAD2BW.cjs.map +0 -1
  62. package/dist/chunk-KHEXN75Q.js.map +0 -1
  63. package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
  64. package/dist/chunk-QTWX6TKR.js +0 -8
  65. package/dist/chunk-QTWX6TKR.js.map +0 -1
  66. package/dist/chunk-YGTIO3J5.js +0 -17
  67. package/dist/chunk-YGTIO3J5.js.map +0 -1
  68. package/dist/dist-LDTK3TIP.cjs +0 -16759
  69. package/dist/dist-LDTK3TIP.cjs.map +0 -1
  70. package/dist/dist-OWYZEOJK.js +0 -16737
  71. package/dist/dist-OWYZEOJK.js.map +0 -1
  72. package/dist/evaluation.d.ts +0 -8
  73. package/dist/evaluation.d.ts.map +0 -1
  74. package/dist/index.cjs +0 -93
  75. package/dist/index.cjs.map +0 -1
  76. package/dist/index.d.ts +0 -3
  77. package/dist/index.d.ts.map +0 -1
  78. package/dist/index.js +0 -89
  79. package/dist/index.js.map +0 -1
  80. package/dist/magic-string.es-7ORA5OGR.js +0 -1305
  81. package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
  82. package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
  83. package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
  84. package/dist/metrics/index.d.ts +0 -4
  85. package/dist/metrics/index.d.ts.map +0 -1
  86. package/dist/metrics/judge/index.cjs +0 -12
  87. package/dist/metrics/judge/index.cjs.map +0 -1
  88. package/dist/metrics/judge/index.d.ts +0 -7
  89. package/dist/metrics/judge/index.d.ts.map +0 -1
  90. package/dist/metrics/judge/index.js +0 -3
  91. package/dist/metrics/judge/index.js.map +0 -1
  92. package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
  93. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
  94. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
  95. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
  96. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
  97. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
  98. package/dist/metrics/llm/bias/index.d.ts +0 -14
  99. package/dist/metrics/llm/bias/index.d.ts.map +0 -1
  100. package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
  101. package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
  102. package/dist/metrics/llm/bias/prompts.d.ts +0 -14
  103. package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
  104. package/dist/metrics/llm/context-position/index.d.ts +0 -16
  105. package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
  106. package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
  107. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
  108. package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
  109. package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
  110. package/dist/metrics/llm/context-precision/index.d.ts +0 -16
  111. package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
  112. package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
  113. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
  114. package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
  115. package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
  116. package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
  117. package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
  118. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
  119. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
  120. package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
  121. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
  122. package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
  123. package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
  124. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
  125. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
  126. package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
  127. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
  128. package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
  129. package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
  130. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
  131. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
  132. package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
  133. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
  134. package/dist/metrics/llm/hallucination/index.d.ts +0 -16
  135. package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
  136. package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
  137. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
  138. package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
  139. package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
  140. package/dist/metrics/llm/index.cjs +0 -2481
  141. package/dist/metrics/llm/index.cjs.map +0 -1
  142. package/dist/metrics/llm/index.d.ts +0 -12
  143. package/dist/metrics/llm/index.d.ts.map +0 -1
  144. package/dist/metrics/llm/index.js +0 -2469
  145. package/dist/metrics/llm/index.js.map +0 -1
  146. package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
  147. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
  148. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
  149. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
  150. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
  151. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
  152. package/dist/metrics/llm/summarization/index.d.ts +0 -19
  153. package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
  154. package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
  155. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
  156. package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
  157. package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
  158. package/dist/metrics/llm/toxicity/index.d.ts +0 -14
  159. package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
  160. package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
  161. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
  162. package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
  163. package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
  164. package/dist/metrics/llm/types.d.ts +0 -7
  165. package/dist/metrics/llm/types.d.ts.map +0 -1
  166. package/dist/metrics/llm/utils.d.ts +0 -14
  167. package/dist/metrics/llm/utils.d.ts.map +0 -1
  168. package/dist/metrics/nlp/completeness/index.d.ts +0 -21
  169. package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
  170. package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
  171. package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
  172. package/dist/metrics/nlp/index.cjs +0 -203
  173. package/dist/metrics/nlp/index.cjs.map +0 -1
  174. package/dist/metrics/nlp/index.d.ts +0 -6
  175. package/dist/metrics/nlp/index.d.ts.map +0 -1
  176. package/dist/metrics/nlp/index.js +0 -190
  177. package/dist/metrics/nlp/index.js.map +0 -1
  178. package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
  179. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
  180. package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
  181. package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
  182. package/dist/metrics/nlp/tone/index.d.ts +0 -18
  183. package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
  184. package/dist/scorers/code/index.cjs +0 -329
  185. package/dist/scorers/code/index.cjs.map +0 -1
  186. package/dist/scorers/code/index.js +0 -315
  187. package/dist/scorers/code/index.js.map +0 -1
  188. package/dist/scorers/llm/index.cjs.map +0 -1
  189. package/dist/scorers/llm/index.js.map +0 -1
package/CHANGELOG.md CHANGED
@@ -1,31 +1,49 @@
1
1
  # @mastra/evals
2
2
 
3
- ## 0.14.3
3
+ ## 1.0.0-beta.0
4
4
 
5
- ### Patch Changes
5
+ ### Major Changes
6
6
 
7
- - update peerdeps ([`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc))
7
+ - Moving scorers under the eval domain, api method consistency, prebuilt evals, scorers require ids. ([#9589](https://github.com/mastra-ai/mastra/pull/9589))
8
8
 
9
- - Updated dependencies [[`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc), [`6d7e90d`](https://github.com/mastra-ai/mastra/commit/6d7e90db09713e6250f4d6c3d3cff1b4740e50f9), [`f78b908`](https://github.com/mastra-ai/mastra/commit/f78b9080e11af765969b36b4a619761056030840), [`23c2614`](https://github.com/mastra-ai/mastra/commit/23c26140fdbf04b8c59e8d7d52106d67dad962ec), [`e365eda`](https://github.com/mastra-ai/mastra/commit/e365eda45795b43707310531cac1e2ce4e5a0712)]:
10
- - @mastra/core@0.24.0
9
+ - **BREAKING CHANGE**: Scorers for Agents will now use `MastraDBMessage` instead of `UIMessage` ([#9702](https://github.com/mastra-ai/mastra/pull/9702))
10
+ - Scorer input/output types now use `MastraDBMessage[]` with nested `content` object structure
11
+ - Added `getTextContentFromMastraDBMessage()` helper function to extract text content from `MastraDBMessage` objects
12
+ - Added `createTestMessage()` helper function for creating `MastraDBMessage` objects in tests with optional tool invocations support
13
+ - Updated `extractToolCalls()` to access tool invocations from nested `content` structure
14
+ - Updated `getUserMessageFromRunInput()` and `getAssistantMessageFromRunOutput()` to use new message structure
15
+ - Removed `createUIMessage()`
11
16
 
12
- ## 0.14.3-alpha.0
17
+ - Bump minimum required Node.js version to 22.13.0 ([#9706](https://github.com/mastra-ai/mastra/pull/9706))
13
18
 
14
- ### Patch Changes
19
+ - Rename RuntimeContext to RequestContext ([#9511](https://github.com/mastra-ai/mastra/pull/9511))
15
20
 
16
- - update peerdeps ([`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc))
21
+ - Remove various deprecated APIs from agent class. ([#9257](https://github.com/mastra-ai/mastra/pull/9257))
22
+ - `agent.llm` → `agent.getLLM()`
23
+ - `agent.tools` → `agent.getTools()`
24
+ - `agent.instructions` → `agent.getInstructions()`
25
+ - `agent.speak()` → `agent.voice.speak()`
26
+ - `agent.getSpeakers()` → `agent.voice.getSpeakers()`
27
+ - `agent.listen` → `agent.voice.listen()`
28
+ - `agent.fetchMemory` → `(await agent.getMemory()).query()`
29
+ - `agent.toStep` → Add agent directly to the step, workflows handle the transformation
17
30
 
18
- - Updated dependencies [[`5ca1cca`](https://github.com/mastra-ai/mastra/commit/5ca1ccac61ffa7141e6d9fa8f22d3ad4d03bf5dc), [`6d7e90d`](https://github.com/mastra-ai/mastra/commit/6d7e90db09713e6250f4d6c3d3cff1b4740e50f9), [`f78b908`](https://github.com/mastra-ai/mastra/commit/f78b9080e11af765969b36b4a619761056030840), [`23c2614`](https://github.com/mastra-ai/mastra/commit/23c26140fdbf04b8c59e8d7d52106d67dad962ec), [`e365eda`](https://github.com/mastra-ai/mastra/commit/e365eda45795b43707310531cac1e2ce4e5a0712)]:
19
- - @mastra/core@0.24.0-alpha.0
31
+ - Mark as stable ([`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc))
20
32
 
21
- ## 0.14.2
33
+ - Remove legacy evals from Mastra ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
34
+
35
+ ### Minor Changes
36
+
37
+ - Update peer dependencies to match core package version bump (1.0.0) ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
22
38
 
23
39
  ### Patch Changes
24
40
 
25
- - Fix peerdependencies ([`eb7c1c8`](https://github.com/mastra-ai/mastra/commit/eb7c1c8c592d8fb16dfd250e337d9cdc73c8d5de))
41
+ - Update peer dependencies to match core package version bump (1.0.0) ([#9237](https://github.com/mastra-ai/mastra/pull/9237))
26
42
 
27
- - Updated dependencies []:
28
- - @mastra/core@0.23.1
43
+ - Remove difflib ([#9756](https://github.com/mastra-ai/mastra/pull/9756))
44
+
45
+ - Updated dependencies [[`39c9743`](https://github.com/mastra-ai/mastra/commit/39c97432d084294f8ba85fbf3ef28098ff21459e), [`f743dbb`](https://github.com/mastra-ai/mastra/commit/f743dbb8b40d1627b5c10c0e6fc154f4ebb6e394), [`fec5129`](https://github.com/mastra-ai/mastra/commit/fec5129de7fc64423ea03661a56cef31dc747a0d), [`0491e7c`](https://github.com/mastra-ai/mastra/commit/0491e7c9b714cb0ba22187ee062147ec2dd7c712), [`f6f4903`](https://github.com/mastra-ai/mastra/commit/f6f4903397314f73362061dc5a3e8e7c61ea34aa), [`0e8ed46`](https://github.com/mastra-ai/mastra/commit/0e8ed467c54d6901a6a365f270ec15d6faadb36c), [`6c049d9`](https://github.com/mastra-ai/mastra/commit/6c049d94063fdcbd5b81c4912a2bf82a92c9cc0b), [`2f897df`](https://github.com/mastra-ai/mastra/commit/2f897df208508f46f51b7625e5dd20c37f93e0e3), [`3443770`](https://github.com/mastra-ai/mastra/commit/3443770662df8eb24c9df3589b2792d78cfcb811), [`f0a07e0`](https://github.com/mastra-ai/mastra/commit/f0a07e0111b3307c5fabfa4094c5c2cfb734fbe6), [`aaa40e7`](https://github.com/mastra-ai/mastra/commit/aaa40e788628b319baa8e889407d11ad626547fa), [`1521d71`](https://github.com/mastra-ai/mastra/commit/1521d716e5daedc74690c983fbd961123c56756b), [`9e1911d`](https://github.com/mastra-ai/mastra/commit/9e1911db2b4db85e0e768c3f15e0d61e319869f6), [`ebac155`](https://github.com/mastra-ai/mastra/commit/ebac15564a590117db7078233f927a7e28a85106), [`dd1c38d`](https://github.com/mastra-ai/mastra/commit/dd1c38d1b75f1b695c27b40d8d9d6ed00d5e0f6f), [`5948e6a`](https://github.com/mastra-ai/mastra/commit/5948e6a5146c83666ba3f294b2be576c82a513fb), [`8940859`](https://github.com/mastra-ai/mastra/commit/89408593658199b4ad67f7b65e888f344e64a442), [`e629310`](https://github.com/mastra-ai/mastra/commit/e629310f1a73fa236d49ec7a1d1cceb6229dc7cc), [`4c6b492`](https://github.com/mastra-ai/mastra/commit/4c6b492c4dd591c6a592520c1f6855d6e936d71f), [`dff01d8`](https://github.com/mastra-ai/mastra/commit/dff01d81ce1f4e4087cfac20fa868e6db138dd14), [`9d819d5`](https://github.com/mastra-ai/mastra/commit/9d819d54b61481639f4008e4694791bddf187edd), [`71c8d6c`](https://github.com/mastra-ai/mastra/commit/71c8d6c161253207b2b9588bdadb7eed604f7253), [`6179a9b`](https://github.com/mastra-ai/mastra/commit/6179a9ba36ffac326de3cc3c43cdc8028d37c251), [`00f4921`](https://github.com/mastra-ai/mastra/commit/00f4921dd2c91a1e5446799599ef7116a8214a1a), [`ca8041c`](https://github.com/mastra-ai/mastra/commit/ca8041cce0379fda22ed293a565bcb5b6ddca68a), [`7051bf3`](https://github.com/mastra-ai/mastra/commit/7051bf38b3b122a069008f861f7bfc004a6d9f6e), [`a8f1494`](https://github.com/mastra-ai/mastra/commit/a8f1494f4bbdc2770bcf327d4c7d869e332183f1), [`0793497`](https://github.com/mastra-ai/mastra/commit/079349753620c40246ffd673e3f9d7d9820beff3), [`5df9cce`](https://github.com/mastra-ai/mastra/commit/5df9cce1a753438413f64c11eeef8f845745c2a8), [`a854ede`](https://github.com/mastra-ai/mastra/commit/a854ede62bf5ac0945a624ac48913dd69c73aabf), [`c576fc0`](https://github.com/mastra-ai/mastra/commit/c576fc0b100b2085afded91a37c97a0ea0ec09c7), [`3defc80`](https://github.com/mastra-ai/mastra/commit/3defc80cf2b88a1b7fc1cc4ddcb91e982a614609), [`16153fe`](https://github.com/mastra-ai/mastra/commit/16153fe7eb13c99401f48e6ca32707c965ee28b9), [`9f4a683`](https://github.com/mastra-ai/mastra/commit/9f4a6833e88b52574665c028fd5508ad5c2f6004), [`bc94344`](https://github.com/mastra-ai/mastra/commit/bc943444a1342d8a662151b7bce1df7dae32f59c), [`57d157f`](https://github.com/mastra-ai/mastra/commit/57d157f0b163a95c3e6c9eae31bdb11d1bfc64f9), [`903f67d`](https://github.com/mastra-ai/mastra/commit/903f67d184504a273893818c02b961f5423a79ad), [`2a90c55`](https://github.com/mastra-ai/mastra/commit/2a90c55a86a9210697d5adaab5ee94584b079adc), [`eb09742`](https://github.com/mastra-ai/mastra/commit/eb09742197f66c4c38154c3beec78313e69760b2), [`96d35f6`](https://github.com/mastra-ai/mastra/commit/96d35f61376bc2b1bf148648a2c1985bd51bef55), [`5cbe88a`](https://github.com/mastra-ai/mastra/commit/5cbe88aefbd9f933bca669fd371ea36bf939ac6d), [`a1bd7b8`](https://github.com/mastra-ai/mastra/commit/a1bd7b8571db16b94eb01588f451a74758c96d65), [`d78b38d`](https://github.com/mastra-ai/mastra/commit/d78b38d898fce285260d3bbb4befade54331617f), [`0633100`](https://github.com/mastra-ai/mastra/commit/0633100a911ad22f5256471bdf753da21c104742), [`c710c16`](https://github.com/mastra-ai/mastra/commit/c710c1652dccfdc4111c8412bca7a6bb1d48b441), [`354ad0b`](https://github.com/mastra-ai/mastra/commit/354ad0b7b1b8183ac567f236a884fc7ede6d7138), [`cfae733`](https://github.com/mastra-ai/mastra/commit/cfae73394f4920635e6c919c8e95ff9a0788e2e5), [`e3dfda7`](https://github.com/mastra-ai/mastra/commit/e3dfda7b11bf3b8c4bb55637028befb5f387fc74), [`844ea5d`](https://github.com/mastra-ai/mastra/commit/844ea5dc0c248961e7bf73629ae7dcff503e853c), [`398fde3`](https://github.com/mastra-ai/mastra/commit/398fde3f39e707cda79372cdae8f9870e3b57c8d), [`f0f8f12`](https://github.com/mastra-ai/mastra/commit/f0f8f125c308f2d0fd36942ef652fd852df7522f), [`0d7618b`](https://github.com/mastra-ai/mastra/commit/0d7618bc650bf2800934b243eca5648f4aeed9c2), [`7b763e5`](https://github.com/mastra-ai/mastra/commit/7b763e52fc3eaf699c2a99f2adf418dd46e4e9a5), [`d36cfbb`](https://github.com/mastra-ai/mastra/commit/d36cfbbb6565ba5f827883cc9bb648eb14befdc1), [`3697853`](https://github.com/mastra-ai/mastra/commit/3697853deeb72017d90e0f38a93c1e29221aeca0), [`b2e45ec`](https://github.com/mastra-ai/mastra/commit/b2e45eca727a8db01a81ba93f1a5219c7183c839), [`d6d49f7`](https://github.com/mastra-ai/mastra/commit/d6d49f7b8714fa19a52ff9c7cf7fb7e73751901e), [`a534e95`](https://github.com/mastra-ai/mastra/commit/a534e9591f83b3cc1ebff99c67edf4cda7bf81d3), [`9d0e7fe`](https://github.com/mastra-ai/mastra/commit/9d0e7feca8ed98de959f53476ee1456073673348), [`53d927c`](https://github.com/mastra-ai/mastra/commit/53d927cc6f03bff33655b7e2b788da445a08731d), [`3f2faf2`](https://github.com/mastra-ai/mastra/commit/3f2faf2e2d685d6c053cc5af1bf9fedf267b2ce5), [`22f64bc`](https://github.com/mastra-ai/mastra/commit/22f64bc1d37149480b58bf2fefe35b79a1e3e7d5), [`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc), [`b7959e6`](https://github.com/mastra-ai/mastra/commit/b7959e6e25a46b480f9ea2217c4c6c588c423791), [`bda6370`](https://github.com/mastra-ai/mastra/commit/bda637009360649aaf579919e7873e33553c273e), [`d7acd8e`](https://github.com/mastra-ai/mastra/commit/d7acd8e987b5d7eff4fd98b0906c17c06a2e83d5), [`c7f1f7d`](https://github.com/mastra-ai/mastra/commit/c7f1f7d24f61f247f018cc2d1f33bf63212959a7), [`0bddc6d`](https://github.com/mastra-ai/mastra/commit/0bddc6d8dbd6f6008c0cba2e4960a2da75a55af1), [`735d8c1`](https://github.com/mastra-ai/mastra/commit/735d8c1c0d19fbc09e6f8b66cf41bc7655993838), [`acf322e`](https://github.com/mastra-ai/mastra/commit/acf322e0f1fd0189684cf529d91c694bea918a45), [`c942802`](https://github.com/mastra-ai/mastra/commit/c942802a477a925b01859a7b8688d4355715caaa), [`a0c8c1b`](https://github.com/mastra-ai/mastra/commit/a0c8c1b87d4fee252aebda73e8637fbe01d761c9), [`cc34739`](https://github.com/mastra-ai/mastra/commit/cc34739c34b6266a91bea561119240a7acf47887), [`c218bd3`](https://github.com/mastra-ai/mastra/commit/c218bd3759e32423735b04843a09404572631014), [`2c4438b`](https://github.com/mastra-ai/mastra/commit/2c4438b87817ab7eed818c7990fef010475af1a3), [`2b8893c`](https://github.com/mastra-ai/mastra/commit/2b8893cb108ef9acb72ee7835cd625610d2c1a4a), [`8e5c75b`](https://github.com/mastra-ai/mastra/commit/8e5c75bdb1d08a42d45309a4c72def4b6890230f), [`e59e0d3`](https://github.com/mastra-ai/mastra/commit/e59e0d32afb5fcf2c9f3c00c8f81f6c21d3a63fa), [`fa8409b`](https://github.com/mastra-ai/mastra/commit/fa8409bc39cfd8ba6643b9db5269b90b22e2a2f7), [`173c535`](https://github.com/mastra-ai/mastra/commit/173c535c0645b0da404fe09f003778f0b0d4e019)]:
46
+ - @mastra/core@1.0.0-beta.0
29
47
 
30
48
  ## 0.14.1
31
49
 
package/README.md CHANGED
@@ -1,6 +1,11 @@
1
1
  # @mastra/evals
2
2
 
3
- A comprehensive evaluation framework for assessing AI model outputs across multiple dimensions.
3
+ `@mastra/evals` ships a collection of scoring utilities you can run locally or inside your own evaluation pipelines. These scorers come in two flavors:
4
+
5
+ - **LLM scorers** – leverage a judge model (e.g. OpenAI, Anthropic) to rate responses for qualities such as faithfulness or toxicity.
6
+ - **Code/NLP scorers** – deterministic heuristics (keyword coverage, similarity, etc.) that do not require an external model.
7
+
8
+ The scorers do not persist results or integrate with Mastra Storage; you decide where and how to record outcomes.
4
9
 
5
10
  ## Installation
6
11
 
@@ -8,171 +13,26 @@ A comprehensive evaluation framework for assessing AI model outputs across multi
8
13
  npm install @mastra/evals
9
14
  ```
10
15
 
11
- ## Overview
12
-
13
- `@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
14
-
15
- ## Features
16
-
17
- ### LLM-Based Metrics
18
-
19
- 1. **Answer Relevancy**
20
- - Evaluates how well an answer addresses the input question
21
- - Considers uncertainty weighting for more nuanced scoring
22
- - Returns detailed reasoning for scores
23
-
24
- 2. **Bias Detection**
25
- - Identifies potential biases in model outputs
26
- - Analyzes opinions and statements for bias indicators
27
- - Provides explanations for detected biases
28
- - Configurable scoring scale
29
-
30
- 3. **Context Precision & Relevancy**
31
- - Assesses how well responses use provided context
32
- - Evaluates accuracy of context usage
33
- - Measures relevance of context to the response
34
- - Analyzes context positioning in responses
35
-
36
- 4. **Faithfulness**
37
- - Verifies that responses are faithful to provided context
38
- - Detects hallucinations or fabricated information
39
- - Evaluates claims against provided context
40
- - Provides detailed analysis of faithfulness breaches
41
-
42
- 5. **Prompt Alignment**
43
- - Measures how well responses follow given instructions
44
- - Evaluates adherence to multiple instruction criteria
45
- - Provides per-instruction scoring
46
- - Supports custom instruction sets
47
-
48
- 6. **Toxicity**
49
- - Detects toxic or harmful content in responses
50
- - Provides detailed reasoning for toxicity verdicts
51
- - Configurable scoring thresholds
52
- - Considers both input and output context
16
+ ## Quick Start
53
17
 
54
- ### NLP-Based Metrics
18
+ ```ts
19
+ import { createFaithfulnessScorer, createContentSimilarityScorer } from '@mastra/evals/scorers/prebuilt';
55
20
 
56
- 1. **Completeness**
57
- - Analyzes structural completeness of responses
58
- - Identifies missing elements from input requirements
59
- - Provides detailed element coverage analysis
60
- - Tracks input-output element ratios
61
-
62
- 2. **Content Similarity**
63
- - Measures text similarity between inputs and outputs
64
- - Configurable for case and whitespace sensitivity
65
- - Returns normalized similarity scores
66
- - Uses string comparison algorithms for accuracy
67
-
68
- 3. **Keyword Coverage**
69
- - Tracks presence of key terms from input in output
70
- - Provides detailed keyword matching statistics
71
- - Calculates coverage ratios
72
- - Useful for ensuring comprehensive responses
73
-
74
- ## Usage
75
-
76
- ### Basic Example
77
-
78
- ```typescript
79
- import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
80
-
81
- // Initialize metrics
82
- const similarityMetric = new ContentSimilarityMetric({
83
- ignoreCase: true,
84
- ignoreWhitespace: true,
21
+ const faithfulness = createFaithfulnessScorer({
22
+ model: 'openai/gpt-4o-mini')
85
23
  });
86
24
 
87
- const toxicityMetric = new ToxicityMetric({
88
- model: openai('gpt-4'),
89
- scale: 1, // Optional: adjust scoring scale
90
- });
91
-
92
- // Evaluate outputs
93
- const input = 'What is the capital of France?';
94
- const output = 'Paris is the capital of France.';
95
-
96
- const similarityResult = await similarityMetric.measure(input, output);
97
- const toxicityResult = await toxicityMetric.measure(input, output);
98
-
99
- console.log('Similarity Score:', similarityResult.score);
100
- console.log('Toxicity Score:', toxicityResult.score);
101
- ```
25
+ const similarity = createContentSimilarityScorer({ ignoreCase: true });
102
26
 
103
- ### Context-Aware Evaluation
27
+ const answer = 'Paris is the capital of France.';
28
+ const context = ['Paris is the capital of France', 'France is in Europe'];
104
29
 
105
- ```typescript
106
- import { FaithfulnessMetric } from '@mastra/evals';
30
+ const faithfulnessScore = await faithfulness.score({ answer, context });
107
31
 
108
- // Initialize with context
109
- const faithfulnessMetric = new FaithfulnessMetric({
110
- model: openai('gpt-4'),
111
- context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
112
- scale: 1,
32
+ const similarityScore = similarity.score({
33
+ input: context[0],
34
+ output: answer
113
35
  });
114
36
 
115
- // Evaluate response against context
116
- const result = await faithfulnessMetric.measure(
117
- 'Tell me about Paris',
118
- 'Paris is the capital of France with 2.2 million residents',
119
- );
120
-
121
- console.log('Faithfulness Score:', result.score);
122
- console.log('Reasoning:', result.reason);
37
+ console.log({ faithfulnessScore, similarityScore });
123
38
  ```
124
-
125
- ## Metric Results
126
-
127
- Each metric returns a standardized result object containing:
128
-
129
- - `score`: Normalized score (typically 0-1)
130
- - `info`: Detailed information about the evaluation
131
- - Additional metric-specific data (e.g., matched keywords, missing elements)
132
-
133
- Some metrics also provide:
134
-
135
- - `reason`: Detailed explanation of the score
136
- - `verdicts`: Individual judgments that contributed to the final score
137
-
138
- ## Telemetry and Logging
139
-
140
- The package includes built-in telemetry and logging capabilities:
141
-
142
- - Automatic evaluation tracking through Mastra Storage
143
- - Integration with OpenTelemetry for performance monitoring
144
- - Detailed evaluation traces for debugging
145
-
146
- ```typescript
147
- import { attachListeners } from '@mastra/evals';
148
-
149
- // Enable basic evaluation tracking
150
- await attachListeners();
151
-
152
- // Store evals in Mastra Storage (if storage is enabled)
153
- await attachListeners(mastra);
154
- // Note: When using in-memory storage, evaluations are isolated to the test process.
155
- // When using file storage, evaluations are persisted and can be queried later.
156
- ```
157
-
158
- ## Environment Variables
159
-
160
- Required for LLM-based metrics:
161
-
162
- - `OPENAI_API_KEY`: For OpenAI model access
163
- - Additional provider keys as needed (Cohere, Anthropic, etc.)
164
-
165
- ## Package Exports
166
-
167
- ```typescript
168
- // Main package exports
169
- import { evaluate } from '@mastra/evals';
170
- // NLP-specific metrics
171
- import { ContentSimilarityMetric } from '@mastra/evals/nlp';
172
- ```
173
-
174
- ## Related Packages
175
-
176
- - `@mastra/core`: Core framework functionality
177
- - `@mastra/engine`: LLM execution engine
178
- - `@mastra/mcp`: Model Context Protocol integration
@@ -1,22 +1,33 @@
1
- import { RuntimeContext } from '@mastra/core/runtime-context';
1
+ import { RequestContext } from '@mastra/core/request-context';
2
2
 
3
3
  // src/scorers/utils.ts
4
+ function getTextContentFromMastraDBMessage(message) {
5
+ if (typeof message.content.content === "string" && message.content.content !== "") {
6
+ return message.content.content;
7
+ }
8
+ if (message.content.parts && Array.isArray(message.content.parts)) {
9
+ const textParts = message.content.parts.filter((p) => p.type === "text");
10
+ return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
11
+ }
12
+ return "";
13
+ }
4
14
  var roundToTwoDecimals = (num) => {
5
15
  return Math.round((num + Number.EPSILON) * 100) / 100;
6
16
  };
7
17
  function isCloserTo(value, target1, target2) {
8
18
  return Math.abs(value - target1) < Math.abs(value - target2);
9
19
  }
10
- var createTestRun = (input, output, additionalContext, runtimeContext) => {
20
+ var createTestRun = (input, output, additionalContext, requestContext) => {
11
21
  return {
12
22
  input: [{ role: "user", content: input }],
13
23
  output: { role: "assistant", text: output },
14
24
  additionalContext: additionalContext ?? {},
15
- runtimeContext: runtimeContext ?? {}
25
+ requestContext: requestContext ?? {}
16
26
  };
17
27
  };
18
28
  var getUserMessageFromRunInput = (input) => {
19
- return input?.inputMessages.find(({ role }) => role === "user")?.content;
29
+ const message = input?.inputMessages.find(({ role }) => role === "user");
30
+ return message ? getTextContentFromMastraDBMessage(message) : void 0;
20
31
  };
21
32
  var getSystemMessagesFromRunInput = (input) => {
22
33
  const systemMessages = [];
@@ -48,7 +59,8 @@ var getCombinedSystemPrompt = (input) => {
48
59
  return systemMessages.join("\n\n");
49
60
  };
50
61
  var getAssistantMessageFromRunOutput = (output) => {
51
- return output?.find(({ role }) => role === "assistant")?.content;
62
+ const message = output?.find(({ role }) => role === "assistant");
63
+ return message ? getTextContentFromMastraDBMessage(message) : void 0;
52
64
  };
53
65
  var createToolInvocation = ({
54
66
  toolCallId,
@@ -65,27 +77,39 @@ var createToolInvocation = ({
65
77
  state
66
78
  };
67
79
  };
68
- var createUIMessage = ({
80
+ function createTestMessage({
69
81
  content,
70
82
  role,
71
83
  id = "test-message",
72
84
  toolInvocations = []
73
- }) => {
85
+ }) {
74
86
  return {
75
87
  id,
76
88
  role,
77
- content,
78
- parts: [{ type: "text", text: content }],
79
- toolInvocations
89
+ content: {
90
+ format: 2,
91
+ parts: [{ type: "text", text: content }],
92
+ content,
93
+ ...toolInvocations.length > 0 && {
94
+ toolInvocations: toolInvocations.map((ti) => ({
95
+ toolCallId: ti.toolCallId,
96
+ toolName: ti.toolName,
97
+ args: ti.args,
98
+ result: ti.result,
99
+ state: ti.state
100
+ }))
101
+ }
102
+ },
103
+ createdAt: /* @__PURE__ */ new Date()
80
104
  };
81
- };
105
+ }
82
106
  var createAgentTestRun = ({
83
107
  inputMessages = [],
84
108
  output,
85
109
  rememberedMessages = [],
86
110
  systemMessages = [],
87
111
  taggedSystemMessages = {},
88
- runtimeContext = new RuntimeContext(),
112
+ requestContext = new RequestContext(),
89
113
  runId = crypto.randomUUID()
90
114
  }) => {
91
115
  return {
@@ -96,7 +120,7 @@ var createAgentTestRun = ({
96
120
  taggedSystemMessages
97
121
  },
98
122
  output,
99
- runtimeContext,
123
+ requestContext,
100
124
  runId
101
125
  };
102
126
  };
@@ -105,9 +129,9 @@ function extractToolCalls(output) {
105
129
  const toolCallInfos = [];
106
130
  for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
107
131
  const message = output[messageIndex];
108
- if (message?.toolInvocations) {
109
- for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
110
- const invocation = message.toolInvocations[invocationIndex];
132
+ if (message?.content?.toolInvocations) {
133
+ for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
134
+ const invocation = message.content.toolInvocations[invocationIndex];
111
135
  if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
112
136
  toolCalls.push(invocation.toolName);
113
137
  toolCallInfos.push({
@@ -123,12 +147,12 @@ function extractToolCalls(output) {
123
147
  return { tools: toolCalls, toolCallInfos };
124
148
  }
125
149
  var extractInputMessages = (runInput) => {
126
- return runInput?.inputMessages?.map((msg) => msg.content) || [];
150
+ return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
127
151
  };
128
152
  var extractAgentResponseMessages = (runOutput) => {
129
- return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg.content);
153
+ return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
130
154
  };
131
155
 
132
- export { createAgentTestRun, createTestRun, createToolInvocation, createUIMessage, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
133
- //# sourceMappingURL=chunk-KHEXN75Q.js.map
134
- //# sourceMappingURL=chunk-KHEXN75Q.js.map
156
+ export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
157
+ //# sourceMappingURL=chunk-CCLM7KPF.js.map
158
+ //# sourceMappingURL=chunk-CCLM7KPF.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-CCLM7KPF.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
@@ -1,24 +1,35 @@
1
1
  'use strict';
2
2
 
3
- var runtimeContext = require('@mastra/core/runtime-context');
3
+ var requestContext = require('@mastra/core/request-context');
4
4
 
5
5
  // src/scorers/utils.ts
6
+ function getTextContentFromMastraDBMessage(message) {
7
+ if (typeof message.content.content === "string" && message.content.content !== "") {
8
+ return message.content.content;
9
+ }
10
+ if (message.content.parts && Array.isArray(message.content.parts)) {
11
+ const textParts = message.content.parts.filter((p) => p.type === "text");
12
+ return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
13
+ }
14
+ return "";
15
+ }
6
16
  var roundToTwoDecimals = (num) => {
7
17
  return Math.round((num + Number.EPSILON) * 100) / 100;
8
18
  };
9
19
  function isCloserTo(value, target1, target2) {
10
20
  return Math.abs(value - target1) < Math.abs(value - target2);
11
21
  }
12
- var createTestRun = (input, output, additionalContext, runtimeContext) => {
22
+ var createTestRun = (input, output, additionalContext, requestContext) => {
13
23
  return {
14
24
  input: [{ role: "user", content: input }],
15
25
  output: { role: "assistant", text: output },
16
26
  additionalContext: additionalContext ?? {},
17
- runtimeContext: runtimeContext ?? {}
27
+ requestContext: requestContext ?? {}
18
28
  };
19
29
  };
20
30
  var getUserMessageFromRunInput = (input) => {
21
- return input?.inputMessages.find(({ role }) => role === "user")?.content;
31
+ const message = input?.inputMessages.find(({ role }) => role === "user");
32
+ return message ? getTextContentFromMastraDBMessage(message) : void 0;
22
33
  };
23
34
  var getSystemMessagesFromRunInput = (input) => {
24
35
  const systemMessages = [];
@@ -50,7 +61,8 @@ var getCombinedSystemPrompt = (input) => {
50
61
  return systemMessages.join("\n\n");
51
62
  };
52
63
  var getAssistantMessageFromRunOutput = (output) => {
53
- return output?.find(({ role }) => role === "assistant")?.content;
64
+ const message = output?.find(({ role }) => role === "assistant");
65
+ return message ? getTextContentFromMastraDBMessage(message) : void 0;
54
66
  };
55
67
  var createToolInvocation = ({
56
68
  toolCallId,
@@ -67,27 +79,39 @@ var createToolInvocation = ({
67
79
  state
68
80
  };
69
81
  };
70
- var createUIMessage = ({
82
+ function createTestMessage({
71
83
  content,
72
84
  role,
73
85
  id = "test-message",
74
86
  toolInvocations = []
75
- }) => {
87
+ }) {
76
88
  return {
77
89
  id,
78
90
  role,
79
- content,
80
- parts: [{ type: "text", text: content }],
81
- toolInvocations
91
+ content: {
92
+ format: 2,
93
+ parts: [{ type: "text", text: content }],
94
+ content,
95
+ ...toolInvocations.length > 0 && {
96
+ toolInvocations: toolInvocations.map((ti) => ({
97
+ toolCallId: ti.toolCallId,
98
+ toolName: ti.toolName,
99
+ args: ti.args,
100
+ result: ti.result,
101
+ state: ti.state
102
+ }))
103
+ }
104
+ },
105
+ createdAt: /* @__PURE__ */ new Date()
82
106
  };
83
- };
107
+ }
84
108
  var createAgentTestRun = ({
85
109
  inputMessages = [],
86
110
  output,
87
111
  rememberedMessages = [],
88
112
  systemMessages = [],
89
113
  taggedSystemMessages = {},
90
- runtimeContext: runtimeContext$1 = new runtimeContext.RuntimeContext(),
114
+ requestContext: requestContext$1 = new requestContext.RequestContext(),
91
115
  runId = crypto.randomUUID()
92
116
  }) => {
93
117
  return {
@@ -98,7 +122,7 @@ var createAgentTestRun = ({
98
122
  taggedSystemMessages
99
123
  },
100
124
  output,
101
- runtimeContext: runtimeContext$1,
125
+ requestContext: requestContext$1,
102
126
  runId
103
127
  };
104
128
  };
@@ -107,9 +131,9 @@ function extractToolCalls(output) {
107
131
  const toolCallInfos = [];
108
132
  for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
109
133
  const message = output[messageIndex];
110
- if (message?.toolInvocations) {
111
- for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
112
- const invocation = message.toolInvocations[invocationIndex];
134
+ if (message?.content?.toolInvocations) {
135
+ for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
136
+ const invocation = message.content.toolInvocations[invocationIndex];
113
137
  if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
114
138
  toolCalls.push(invocation.toolName);
115
139
  toolCallInfos.push({
@@ -125,24 +149,25 @@ function extractToolCalls(output) {
125
149
  return { tools: toolCalls, toolCallInfos };
126
150
  }
127
151
  var extractInputMessages = (runInput) => {
128
- return runInput?.inputMessages?.map((msg) => msg.content) || [];
152
+ return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
129
153
  };
130
154
  var extractAgentResponseMessages = (runOutput) => {
131
- return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg.content);
155
+ return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
132
156
  };
133
157
 
134
158
  exports.createAgentTestRun = createAgentTestRun;
159
+ exports.createTestMessage = createTestMessage;
135
160
  exports.createTestRun = createTestRun;
136
161
  exports.createToolInvocation = createToolInvocation;
137
- exports.createUIMessage = createUIMessage;
138
162
  exports.extractAgentResponseMessages = extractAgentResponseMessages;
139
163
  exports.extractInputMessages = extractInputMessages;
140
164
  exports.extractToolCalls = extractToolCalls;
141
165
  exports.getAssistantMessageFromRunOutput = getAssistantMessageFromRunOutput;
142
166
  exports.getCombinedSystemPrompt = getCombinedSystemPrompt;
143
167
  exports.getSystemMessagesFromRunInput = getSystemMessagesFromRunInput;
168
+ exports.getTextContentFromMastraDBMessage = getTextContentFromMastraDBMessage;
144
169
  exports.getUserMessageFromRunInput = getUserMessageFromRunInput;
145
170
  exports.isCloserTo = isCloserTo;
146
171
  exports.roundToTwoDecimals = roundToTwoDecimals;
147
- //# sourceMappingURL=chunk-QKR2PMLZ.cjs.map
148
- //# sourceMappingURL=chunk-QKR2PMLZ.cjs.map
172
+ //# sourceMappingURL=chunk-TPQLLHZW.cjs.map
173
+ //# sourceMappingURL=chunk-TPQLLHZW.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/scorers/utils.ts"],"names":["requestContext","RequestContext"],"mappings":";;;;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,kBACxBA,gBAAA,GAAiB,IAAIC,6BAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,oBACAD,gBAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-TPQLLHZW.cjs","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
@@ -1,4 +1,4 @@
1
- export declare function createCompletenessScorer(): import("@mastra/core/scores").MastraScorer<"Completeness Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
1
+ export declare function createCompletenessScorer(): import("@mastra/core/evals").MastraScorer<"completeness-scorer", import("@mastra/core/evals").ScorerRunInputForAgent, import("@mastra/core/evals").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
2
2
  inputElements: string[];
3
3
  outputElements: string[];
4
4
  missingElements: string[];
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/code/completeness/index.ts"],"names":[],"mappings":"AA0EA,wBAAgB,wBAAwB;;;;;;;;gDAmDvC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/code/completeness/index.ts"],"names":[],"mappings":"AA4EA,wBAAgB,wBAAwB;;;;;;;;gDA2DvC"}