@mastra/evals 0.14.4 → 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -25
- package/README.md +19 -159
- package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
- package/dist/chunk-CCLM7KPF.js.map +1 -0
- package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
- package/dist/chunk-TPQLLHZW.cjs.map +1 -0
- package/dist/scorers/code/completeness/index.d.ts +1 -1
- package/dist/scorers/code/completeness/index.d.ts.map +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
- package/dist/scorers/code/tone/index.d.ts +1 -1
- package/dist/scorers/code/tone/index.d.ts.map +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
- package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
- package/dist/scorers/llm/bias/index.d.ts +2 -2
- package/dist/scorers/llm/bias/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts +3 -3
- package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
- package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +2 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
- package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +2 -2
- package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
- package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
- package/dist/scorers/prebuilt/index.cjs.map +1 -0
- package/dist/scorers/prebuilt/index.d.ts +3 -0
- package/dist/scorers/prebuilt/index.d.ts.map +1 -0
- package/dist/scorers/{llm → prebuilt}/index.js +419 -15
- package/dist/scorers/prebuilt/index.js.map +1 -0
- package/dist/scorers/utils.cjs +21 -17
- package/dist/scorers/utils.d.ts +21 -11
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +15 -59
- package/dist/attachListeners.d.ts +0 -4
- package/dist/attachListeners.d.ts.map +0 -1
- package/dist/chunk-44PMY5ES.js +0 -78
- package/dist/chunk-44PMY5ES.js.map +0 -1
- package/dist/chunk-7QAUEU4L.cjs +0 -10
- package/dist/chunk-7QAUEU4L.cjs.map +0 -1
- package/dist/chunk-EMMSS5I5.cjs +0 -37
- package/dist/chunk-EMMSS5I5.cjs.map +0 -1
- package/dist/chunk-G3PMV62Z.js +0 -33
- package/dist/chunk-G3PMV62Z.js.map +0 -1
- package/dist/chunk-IUSAD2BW.cjs +0 -19
- package/dist/chunk-IUSAD2BW.cjs.map +0 -1
- package/dist/chunk-KHEXN75Q.js.map +0 -1
- package/dist/chunk-PWGOG6ML.cjs +0 -81
- package/dist/chunk-PWGOG6ML.cjs.map +0 -1
- package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
- package/dist/chunk-QTWX6TKR.js +0 -8
- package/dist/chunk-QTWX6TKR.js.map +0 -1
- package/dist/chunk-YGTIO3J5.js +0 -17
- package/dist/chunk-YGTIO3J5.js.map +0 -1
- package/dist/dist-LDTK3TIP.cjs +0 -16759
- package/dist/dist-LDTK3TIP.cjs.map +0 -1
- package/dist/dist-OWYZEOJK.js +0 -16737
- package/dist/dist-OWYZEOJK.js.map +0 -1
- package/dist/evaluation.d.ts +0 -8
- package/dist/evaluation.d.ts.map +0 -1
- package/dist/index.cjs +0 -93
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -89
- package/dist/index.js.map +0 -1
- package/dist/magic-string.es-7ORA5OGR.js +0 -1305
- package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
- package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
- package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
- package/dist/metrics/index.d.ts +0 -4
- package/dist/metrics/index.d.ts.map +0 -1
- package/dist/metrics/judge/index.cjs +0 -12
- package/dist/metrics/judge/index.cjs.map +0 -1
- package/dist/metrics/judge/index.d.ts +0 -7
- package/dist/metrics/judge/index.d.ts.map +0 -1
- package/dist/metrics/judge/index.js +0 -3
- package/dist/metrics/judge/index.js.map +0 -1
- package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
- package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
- package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
- package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/bias/index.d.ts +0 -14
- package/dist/metrics/llm/bias/index.d.ts.map +0 -1
- package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
- package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/bias/prompts.d.ts +0 -14
- package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/index.d.ts +0 -16
- package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
- package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/index.d.ts +0 -16
- package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
- package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
- package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
- package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
- package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
- package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
- package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
- package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
- package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
- package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
- package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/index.d.ts +0 -16
- package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
- package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
- package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/index.cjs +0 -2481
- package/dist/metrics/llm/index.cjs.map +0 -1
- package/dist/metrics/llm/index.d.ts +0 -12
- package/dist/metrics/llm/index.d.ts.map +0 -1
- package/dist/metrics/llm/index.js +0 -2469
- package/dist/metrics/llm/index.js.map +0 -1
- package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
- package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
- package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
- package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/index.d.ts +0 -19
- package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
- package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
- package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/index.d.ts +0 -14
- package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
- package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
- package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/types.d.ts +0 -7
- package/dist/metrics/llm/types.d.ts.map +0 -1
- package/dist/metrics/llm/utils.d.ts +0 -14
- package/dist/metrics/llm/utils.d.ts.map +0 -1
- package/dist/metrics/nlp/completeness/index.d.ts +0 -21
- package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
- package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
- package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
- package/dist/metrics/nlp/index.cjs +0 -201
- package/dist/metrics/nlp/index.cjs.map +0 -1
- package/dist/metrics/nlp/index.d.ts +0 -6
- package/dist/metrics/nlp/index.d.ts.map +0 -1
- package/dist/metrics/nlp/index.js +0 -188
- package/dist/metrics/nlp/index.js.map +0 -1
- package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
- package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
- package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
- package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
- package/dist/metrics/nlp/tone/index.d.ts +0 -18
- package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
- package/dist/ratio.d.ts +0 -13
- package/dist/ratio.d.ts.map +0 -1
- package/dist/scorers/code/index.cjs +0 -327
- package/dist/scorers/code/index.cjs.map +0 -1
- package/dist/scorers/code/index.js +0 -313
- package/dist/scorers/code/index.js.map +0 -1
- package/dist/scorers/llm/index.cjs.map +0 -1
- package/dist/scorers/llm/index.js.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,49 +1,58 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
-
## 0.
|
|
3
|
+
## 1.0.0-beta.1
|
|
4
4
|
|
|
5
5
|
### Patch Changes
|
|
6
6
|
|
|
7
|
-
- Remove
|
|
7
|
+
- Remove unused dependencies ([#10019](https://github.com/mastra-ai/mastra/pull/10019))
|
|
8
8
|
|
|
9
|
-
- Updated dependencies [[`
|
|
10
|
-
- @mastra/core@0.
|
|
9
|
+
- Updated dependencies [[`2319326`](https://github.com/mastra-ai/mastra/commit/2319326f8c64e503a09bbcf14be2dd65405445e0), [`d629361`](https://github.com/mastra-ai/mastra/commit/d629361a60f6565b5bfb11976fdaf7308af858e2), [`08c31c1`](https://github.com/mastra-ai/mastra/commit/08c31c188ebccd598acaf55e888b6397d01f7eae), [`fd3d338`](https://github.com/mastra-ai/mastra/commit/fd3d338a2c362174ed5b383f1f011ad9fb0302aa), [`c30400a`](https://github.com/mastra-ai/mastra/commit/c30400a49b994b1b97256fe785eb6c906fc2b232), [`69e0a87`](https://github.com/mastra-ai/mastra/commit/69e0a878896a2da9494945d86e056a5f8f05b851), [`01f8878`](https://github.com/mastra-ai/mastra/commit/01f88783de25e4de048c1c8aace43e26373c6ea5), [`4c77209`](https://github.com/mastra-ai/mastra/commit/4c77209e6c11678808b365d545845918c40045c8), [`d827d08`](https://github.com/mastra-ai/mastra/commit/d827d0808ffe1f3553a84e975806cc989b9735dd), [`23c10a1`](https://github.com/mastra-ai/mastra/commit/23c10a1efdd9a693c405511ab2dc8a1236603162), [`676ccc7`](https://github.com/mastra-ai/mastra/commit/676ccc7fe92468d2d45d39c31a87825c89fd1ea0), [`c10398d`](https://github.com/mastra-ai/mastra/commit/c10398d5b88f1d4af556f4267ff06f1d11e89179), [`00c2387`](https://github.com/mastra-ai/mastra/commit/00c2387f5f04a365316f851e58666ac43f8c4edf), [`ad6250d`](https://github.com/mastra-ai/mastra/commit/ad6250dbdaad927e29f74a27b83f6c468b50a705), [`3a73998`](https://github.com/mastra-ai/mastra/commit/3a73998fa4ebeb7f3dc9301afe78095fc63e7999), [`e16d553`](https://github.com/mastra-ai/mastra/commit/e16d55338403c7553531cc568125c63d53653dff), [`4d59f58`](https://github.com/mastra-ai/mastra/commit/4d59f58de2d90d6e2810a19d4518e38ddddb9038), [`e1bb9c9`](https://github.com/mastra-ai/mastra/commit/e1bb9c94b4eb68b019ae275981be3feb769b5365), [`351a11f`](https://github.com/mastra-ai/mastra/commit/351a11fcaf2ed1008977fa9b9a489fc422e51cd4)]:
|
|
10
|
+
- @mastra/core@1.0.0-beta.3
|
|
11
11
|
|
|
12
|
-
## 0.
|
|
12
|
+
## 1.0.0-beta.0
|
|
13
13
|
|
|
14
|
-
###
|
|
15
|
-
|
|
16
|
-
- Remove difflib ([#9931](https://github.com/mastra-ai/mastra/pull/9931))
|
|
14
|
+
### Major Changes
|
|
17
15
|
|
|
18
|
-
-
|
|
19
|
-
- @mastra/core@0.24.1-alpha.0
|
|
16
|
+
- Moving scorers under the eval domain, api method consistency, prebuilt evals, scorers require ids. ([#9589](https://github.com/mastra-ai/mastra/pull/9589))
|
|
20
17
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
- **BREAKING CHANGE**: Scorers for Agents will now use `MastraDBMessage` instead of `UIMessage` ([#9702](https://github.com/mastra-ai/mastra/pull/9702))
|
|
19
|
+
- Scorer input/output types now use `MastraDBMessage[]` with nested `content` object structure
|
|
20
|
+
- Added `getTextContentFromMastraDBMessage()` helper function to extract text content from `MastraDBMessage` objects
|
|
21
|
+
- Added `createTestMessage()` helper function for creating `MastraDBMessage` objects in tests with optional tool invocations support
|
|
22
|
+
- Updated `extractToolCalls()` to access tool invocations from nested `content` structure
|
|
23
|
+
- Updated `getUserMessageFromRunInput()` and `getAssistantMessageFromRunOutput()` to use new message structure
|
|
24
|
+
- Removed `createUIMessage()`
|
|
24
25
|
|
|
25
|
-
-
|
|
26
|
+
- Bump minimum required Node.js version to 22.13.0 ([#9706](https://github.com/mastra-ai/mastra/pull/9706))
|
|
26
27
|
|
|
27
|
-
-
|
|
28
|
-
- @mastra/core@0.24.0
|
|
28
|
+
- Rename RuntimeContext to RequestContext ([#9511](https://github.com/mastra-ai/mastra/pull/9511))
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
- Remove various deprecated APIs from agent class. ([#9257](https://github.com/mastra-ai/mastra/pull/9257))
|
|
31
|
+
- `agent.llm` → `agent.getLLM()`
|
|
32
|
+
- `agent.tools` → `agent.getTools()`
|
|
33
|
+
- `agent.instructions` → `agent.getInstructions()`
|
|
34
|
+
- `agent.speak()` → `agent.voice.speak()`
|
|
35
|
+
- `agent.getSpeakers()` → `agent.voice.getSpeakers()`
|
|
36
|
+
- `agent.listen` → `agent.voice.listen()`
|
|
37
|
+
- `agent.fetchMemory` → `(await agent.getMemory()).query()`
|
|
38
|
+
- `agent.toStep` → Add agent directly to the step, workflows handle the transformation
|
|
31
39
|
|
|
32
|
-
|
|
40
|
+
- Mark as stable ([`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc))
|
|
33
41
|
|
|
34
|
-
-
|
|
42
|
+
- Remove legacy evals from Mastra ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
|
|
35
43
|
|
|
36
|
-
|
|
37
|
-
- @mastra/core@0.24.0-alpha.0
|
|
44
|
+
### Minor Changes
|
|
38
45
|
|
|
39
|
-
|
|
46
|
+
- Update peer dependencies to match core package version bump (1.0.0) ([#9491](https://github.com/mastra-ai/mastra/pull/9491))
|
|
40
47
|
|
|
41
48
|
### Patch Changes
|
|
42
49
|
|
|
43
|
-
-
|
|
50
|
+
- Update peer dependencies to match core package version bump (1.0.0) ([#9237](https://github.com/mastra-ai/mastra/pull/9237))
|
|
44
51
|
|
|
45
|
-
-
|
|
46
|
-
|
|
52
|
+
- Remove difflib ([#9756](https://github.com/mastra-ai/mastra/pull/9756))
|
|
53
|
+
|
|
54
|
+
- Updated dependencies [[`39c9743`](https://github.com/mastra-ai/mastra/commit/39c97432d084294f8ba85fbf3ef28098ff21459e), [`f743dbb`](https://github.com/mastra-ai/mastra/commit/f743dbb8b40d1627b5c10c0e6fc154f4ebb6e394), [`fec5129`](https://github.com/mastra-ai/mastra/commit/fec5129de7fc64423ea03661a56cef31dc747a0d), [`0491e7c`](https://github.com/mastra-ai/mastra/commit/0491e7c9b714cb0ba22187ee062147ec2dd7c712), [`f6f4903`](https://github.com/mastra-ai/mastra/commit/f6f4903397314f73362061dc5a3e8e7c61ea34aa), [`0e8ed46`](https://github.com/mastra-ai/mastra/commit/0e8ed467c54d6901a6a365f270ec15d6faadb36c), [`6c049d9`](https://github.com/mastra-ai/mastra/commit/6c049d94063fdcbd5b81c4912a2bf82a92c9cc0b), [`2f897df`](https://github.com/mastra-ai/mastra/commit/2f897df208508f46f51b7625e5dd20c37f93e0e3), [`3443770`](https://github.com/mastra-ai/mastra/commit/3443770662df8eb24c9df3589b2792d78cfcb811), [`f0a07e0`](https://github.com/mastra-ai/mastra/commit/f0a07e0111b3307c5fabfa4094c5c2cfb734fbe6), [`aaa40e7`](https://github.com/mastra-ai/mastra/commit/aaa40e788628b319baa8e889407d11ad626547fa), [`1521d71`](https://github.com/mastra-ai/mastra/commit/1521d716e5daedc74690c983fbd961123c56756b), [`9e1911d`](https://github.com/mastra-ai/mastra/commit/9e1911db2b4db85e0e768c3f15e0d61e319869f6), [`ebac155`](https://github.com/mastra-ai/mastra/commit/ebac15564a590117db7078233f927a7e28a85106), [`dd1c38d`](https://github.com/mastra-ai/mastra/commit/dd1c38d1b75f1b695c27b40d8d9d6ed00d5e0f6f), [`5948e6a`](https://github.com/mastra-ai/mastra/commit/5948e6a5146c83666ba3f294b2be576c82a513fb), [`8940859`](https://github.com/mastra-ai/mastra/commit/89408593658199b4ad67f7b65e888f344e64a442), [`e629310`](https://github.com/mastra-ai/mastra/commit/e629310f1a73fa236d49ec7a1d1cceb6229dc7cc), [`4c6b492`](https://github.com/mastra-ai/mastra/commit/4c6b492c4dd591c6a592520c1f6855d6e936d71f), [`dff01d8`](https://github.com/mastra-ai/mastra/commit/dff01d81ce1f4e4087cfac20fa868e6db138dd14), [`9d819d5`](https://github.com/mastra-ai/mastra/commit/9d819d54b61481639f4008e4694791bddf187edd), [`71c8d6c`](https://github.com/mastra-ai/mastra/commit/71c8d6c161253207b2b9588bdadb7eed604f7253), [`6179a9b`](https://github.com/mastra-ai/mastra/commit/6179a9ba36ffac326de3cc3c43cdc8028d37c251), [`00f4921`](https://github.com/mastra-ai/mastra/commit/00f4921dd2c91a1e5446799599ef7116a8214a1a), [`ca8041c`](https://github.com/mastra-ai/mastra/commit/ca8041cce0379fda22ed293a565bcb5b6ddca68a), [`7051bf3`](https://github.com/mastra-ai/mastra/commit/7051bf38b3b122a069008f861f7bfc004a6d9f6e), [`a8f1494`](https://github.com/mastra-ai/mastra/commit/a8f1494f4bbdc2770bcf327d4c7d869e332183f1), [`0793497`](https://github.com/mastra-ai/mastra/commit/079349753620c40246ffd673e3f9d7d9820beff3), [`5df9cce`](https://github.com/mastra-ai/mastra/commit/5df9cce1a753438413f64c11eeef8f845745c2a8), [`a854ede`](https://github.com/mastra-ai/mastra/commit/a854ede62bf5ac0945a624ac48913dd69c73aabf), [`c576fc0`](https://github.com/mastra-ai/mastra/commit/c576fc0b100b2085afded91a37c97a0ea0ec09c7), [`3defc80`](https://github.com/mastra-ai/mastra/commit/3defc80cf2b88a1b7fc1cc4ddcb91e982a614609), [`16153fe`](https://github.com/mastra-ai/mastra/commit/16153fe7eb13c99401f48e6ca32707c965ee28b9), [`9f4a683`](https://github.com/mastra-ai/mastra/commit/9f4a6833e88b52574665c028fd5508ad5c2f6004), [`bc94344`](https://github.com/mastra-ai/mastra/commit/bc943444a1342d8a662151b7bce1df7dae32f59c), [`57d157f`](https://github.com/mastra-ai/mastra/commit/57d157f0b163a95c3e6c9eae31bdb11d1bfc64f9), [`903f67d`](https://github.com/mastra-ai/mastra/commit/903f67d184504a273893818c02b961f5423a79ad), [`2a90c55`](https://github.com/mastra-ai/mastra/commit/2a90c55a86a9210697d5adaab5ee94584b079adc), [`eb09742`](https://github.com/mastra-ai/mastra/commit/eb09742197f66c4c38154c3beec78313e69760b2), [`96d35f6`](https://github.com/mastra-ai/mastra/commit/96d35f61376bc2b1bf148648a2c1985bd51bef55), [`5cbe88a`](https://github.com/mastra-ai/mastra/commit/5cbe88aefbd9f933bca669fd371ea36bf939ac6d), [`a1bd7b8`](https://github.com/mastra-ai/mastra/commit/a1bd7b8571db16b94eb01588f451a74758c96d65), [`d78b38d`](https://github.com/mastra-ai/mastra/commit/d78b38d898fce285260d3bbb4befade54331617f), [`0633100`](https://github.com/mastra-ai/mastra/commit/0633100a911ad22f5256471bdf753da21c104742), [`c710c16`](https://github.com/mastra-ai/mastra/commit/c710c1652dccfdc4111c8412bca7a6bb1d48b441), [`354ad0b`](https://github.com/mastra-ai/mastra/commit/354ad0b7b1b8183ac567f236a884fc7ede6d7138), [`cfae733`](https://github.com/mastra-ai/mastra/commit/cfae73394f4920635e6c919c8e95ff9a0788e2e5), [`e3dfda7`](https://github.com/mastra-ai/mastra/commit/e3dfda7b11bf3b8c4bb55637028befb5f387fc74), [`844ea5d`](https://github.com/mastra-ai/mastra/commit/844ea5dc0c248961e7bf73629ae7dcff503e853c), [`398fde3`](https://github.com/mastra-ai/mastra/commit/398fde3f39e707cda79372cdae8f9870e3b57c8d), [`f0f8f12`](https://github.com/mastra-ai/mastra/commit/f0f8f125c308f2d0fd36942ef652fd852df7522f), [`0d7618b`](https://github.com/mastra-ai/mastra/commit/0d7618bc650bf2800934b243eca5648f4aeed9c2), [`7b763e5`](https://github.com/mastra-ai/mastra/commit/7b763e52fc3eaf699c2a99f2adf418dd46e4e9a5), [`d36cfbb`](https://github.com/mastra-ai/mastra/commit/d36cfbbb6565ba5f827883cc9bb648eb14befdc1), [`3697853`](https://github.com/mastra-ai/mastra/commit/3697853deeb72017d90e0f38a93c1e29221aeca0), [`b2e45ec`](https://github.com/mastra-ai/mastra/commit/b2e45eca727a8db01a81ba93f1a5219c7183c839), [`d6d49f7`](https://github.com/mastra-ai/mastra/commit/d6d49f7b8714fa19a52ff9c7cf7fb7e73751901e), [`a534e95`](https://github.com/mastra-ai/mastra/commit/a534e9591f83b3cc1ebff99c67edf4cda7bf81d3), [`9d0e7fe`](https://github.com/mastra-ai/mastra/commit/9d0e7feca8ed98de959f53476ee1456073673348), [`53d927c`](https://github.com/mastra-ai/mastra/commit/53d927cc6f03bff33655b7e2b788da445a08731d), [`3f2faf2`](https://github.com/mastra-ai/mastra/commit/3f2faf2e2d685d6c053cc5af1bf9fedf267b2ce5), [`22f64bc`](https://github.com/mastra-ai/mastra/commit/22f64bc1d37149480b58bf2fefe35b79a1e3e7d5), [`83d5942`](https://github.com/mastra-ai/mastra/commit/83d5942669ce7bba4a6ca4fd4da697a10eb5ebdc), [`b7959e6`](https://github.com/mastra-ai/mastra/commit/b7959e6e25a46b480f9ea2217c4c6c588c423791), [`bda6370`](https://github.com/mastra-ai/mastra/commit/bda637009360649aaf579919e7873e33553c273e), [`d7acd8e`](https://github.com/mastra-ai/mastra/commit/d7acd8e987b5d7eff4fd98b0906c17c06a2e83d5), [`c7f1f7d`](https://github.com/mastra-ai/mastra/commit/c7f1f7d24f61f247f018cc2d1f33bf63212959a7), [`0bddc6d`](https://github.com/mastra-ai/mastra/commit/0bddc6d8dbd6f6008c0cba2e4960a2da75a55af1), [`735d8c1`](https://github.com/mastra-ai/mastra/commit/735d8c1c0d19fbc09e6f8b66cf41bc7655993838), [`acf322e`](https://github.com/mastra-ai/mastra/commit/acf322e0f1fd0189684cf529d91c694bea918a45), [`c942802`](https://github.com/mastra-ai/mastra/commit/c942802a477a925b01859a7b8688d4355715caaa), [`a0c8c1b`](https://github.com/mastra-ai/mastra/commit/a0c8c1b87d4fee252aebda73e8637fbe01d761c9), [`cc34739`](https://github.com/mastra-ai/mastra/commit/cc34739c34b6266a91bea561119240a7acf47887), [`c218bd3`](https://github.com/mastra-ai/mastra/commit/c218bd3759e32423735b04843a09404572631014), [`2c4438b`](https://github.com/mastra-ai/mastra/commit/2c4438b87817ab7eed818c7990fef010475af1a3), [`2b8893c`](https://github.com/mastra-ai/mastra/commit/2b8893cb108ef9acb72ee7835cd625610d2c1a4a), [`8e5c75b`](https://github.com/mastra-ai/mastra/commit/8e5c75bdb1d08a42d45309a4c72def4b6890230f), [`e59e0d3`](https://github.com/mastra-ai/mastra/commit/e59e0d32afb5fcf2c9f3c00c8f81f6c21d3a63fa), [`fa8409b`](https://github.com/mastra-ai/mastra/commit/fa8409bc39cfd8ba6643b9db5269b90b22e2a2f7), [`173c535`](https://github.com/mastra-ai/mastra/commit/173c535c0645b0da404fe09f003778f0b0d4e019)]:
|
|
55
|
+
- @mastra/core@1.0.0-beta.0
|
|
47
56
|
|
|
48
57
|
## 0.14.1
|
|
49
58
|
|
package/README.md
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
`@mastra/evals` ships a collection of scoring utilities you can run locally or inside your own evaluation pipelines. These scorers come in two flavors:
|
|
4
|
+
|
|
5
|
+
- **LLM scorers** – leverage a judge model (e.g. OpenAI, Anthropic) to rate responses for qualities such as faithfulness or toxicity.
|
|
6
|
+
- **Code/NLP scorers** – deterministic heuristics (keyword coverage, similarity, etc.) that do not require an external model.
|
|
7
|
+
|
|
8
|
+
The scorers do not persist results or integrate with Mastra Storage; you decide where and how to record outcomes.
|
|
4
9
|
|
|
5
10
|
## Installation
|
|
6
11
|
|
|
@@ -8,171 +13,26 @@ A comprehensive evaluation framework for assessing AI model outputs across multi
|
|
|
8
13
|
npm install @mastra/evals
|
|
9
14
|
```
|
|
10
15
|
|
|
11
|
-
##
|
|
12
|
-
|
|
13
|
-
`@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
|
|
14
|
-
|
|
15
|
-
## Features
|
|
16
|
-
|
|
17
|
-
### LLM-Based Metrics
|
|
18
|
-
|
|
19
|
-
1. **Answer Relevancy**
|
|
20
|
-
- Evaluates how well an answer addresses the input question
|
|
21
|
-
- Considers uncertainty weighting for more nuanced scoring
|
|
22
|
-
- Returns detailed reasoning for scores
|
|
23
|
-
|
|
24
|
-
2. **Bias Detection**
|
|
25
|
-
- Identifies potential biases in model outputs
|
|
26
|
-
- Analyzes opinions and statements for bias indicators
|
|
27
|
-
- Provides explanations for detected biases
|
|
28
|
-
- Configurable scoring scale
|
|
29
|
-
|
|
30
|
-
3. **Context Precision & Relevancy**
|
|
31
|
-
- Assesses how well responses use provided context
|
|
32
|
-
- Evaluates accuracy of context usage
|
|
33
|
-
- Measures relevance of context to the response
|
|
34
|
-
- Analyzes context positioning in responses
|
|
35
|
-
|
|
36
|
-
4. **Faithfulness**
|
|
37
|
-
- Verifies that responses are faithful to provided context
|
|
38
|
-
- Detects hallucinations or fabricated information
|
|
39
|
-
- Evaluates claims against provided context
|
|
40
|
-
- Provides detailed analysis of faithfulness breaches
|
|
41
|
-
|
|
42
|
-
5. **Prompt Alignment**
|
|
43
|
-
- Measures how well responses follow given instructions
|
|
44
|
-
- Evaluates adherence to multiple instruction criteria
|
|
45
|
-
- Provides per-instruction scoring
|
|
46
|
-
- Supports custom instruction sets
|
|
47
|
-
|
|
48
|
-
6. **Toxicity**
|
|
49
|
-
- Detects toxic or harmful content in responses
|
|
50
|
-
- Provides detailed reasoning for toxicity verdicts
|
|
51
|
-
- Configurable scoring thresholds
|
|
52
|
-
- Considers both input and output context
|
|
16
|
+
## Quick Start
|
|
53
17
|
|
|
54
|
-
|
|
18
|
+
```ts
|
|
19
|
+
import { createFaithfulnessScorer, createContentSimilarityScorer } from '@mastra/evals/scorers/prebuilt';
|
|
55
20
|
|
|
56
|
-
|
|
57
|
-
-
|
|
58
|
-
- Identifies missing elements from input requirements
|
|
59
|
-
- Provides detailed element coverage analysis
|
|
60
|
-
- Tracks input-output element ratios
|
|
61
|
-
|
|
62
|
-
2. **Content Similarity**
|
|
63
|
-
- Measures text similarity between inputs and outputs
|
|
64
|
-
- Configurable for case and whitespace sensitivity
|
|
65
|
-
- Returns normalized similarity scores
|
|
66
|
-
- Uses string comparison algorithms for accuracy
|
|
67
|
-
|
|
68
|
-
3. **Keyword Coverage**
|
|
69
|
-
- Tracks presence of key terms from input in output
|
|
70
|
-
- Provides detailed keyword matching statistics
|
|
71
|
-
- Calculates coverage ratios
|
|
72
|
-
- Useful for ensuring comprehensive responses
|
|
73
|
-
|
|
74
|
-
## Usage
|
|
75
|
-
|
|
76
|
-
### Basic Example
|
|
77
|
-
|
|
78
|
-
```typescript
|
|
79
|
-
import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
|
|
80
|
-
|
|
81
|
-
// Initialize metrics
|
|
82
|
-
const similarityMetric = new ContentSimilarityMetric({
|
|
83
|
-
ignoreCase: true,
|
|
84
|
-
ignoreWhitespace: true,
|
|
21
|
+
const faithfulness = createFaithfulnessScorer({
|
|
22
|
+
model: 'openai/gpt-4o-mini')
|
|
85
23
|
});
|
|
86
24
|
|
|
87
|
-
const
|
|
88
|
-
model: openai('gpt-4'),
|
|
89
|
-
scale: 1, // Optional: adjust scoring scale
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
// Evaluate outputs
|
|
93
|
-
const input = 'What is the capital of France?';
|
|
94
|
-
const output = 'Paris is the capital of France.';
|
|
95
|
-
|
|
96
|
-
const similarityResult = await similarityMetric.measure(input, output);
|
|
97
|
-
const toxicityResult = await toxicityMetric.measure(input, output);
|
|
98
|
-
|
|
99
|
-
console.log('Similarity Score:', similarityResult.score);
|
|
100
|
-
console.log('Toxicity Score:', toxicityResult.score);
|
|
101
|
-
```
|
|
25
|
+
const similarity = createContentSimilarityScorer({ ignoreCase: true });
|
|
102
26
|
|
|
103
|
-
|
|
27
|
+
const answer = 'Paris is the capital of France.';
|
|
28
|
+
const context = ['Paris is the capital of France', 'France is in Europe'];
|
|
104
29
|
|
|
105
|
-
|
|
106
|
-
import { FaithfulnessMetric } from '@mastra/evals';
|
|
30
|
+
const faithfulnessScore = await faithfulness.score({ answer, context });
|
|
107
31
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
|
|
112
|
-
scale: 1,
|
|
32
|
+
const similarityScore = similarity.score({
|
|
33
|
+
input: context[0],
|
|
34
|
+
output: answer
|
|
113
35
|
});
|
|
114
36
|
|
|
115
|
-
|
|
116
|
-
const result = await faithfulnessMetric.measure(
|
|
117
|
-
'Tell me about Paris',
|
|
118
|
-
'Paris is the capital of France with 2.2 million residents',
|
|
119
|
-
);
|
|
120
|
-
|
|
121
|
-
console.log('Faithfulness Score:', result.score);
|
|
122
|
-
console.log('Reasoning:', result.reason);
|
|
37
|
+
console.log({ faithfulnessScore, similarityScore });
|
|
123
38
|
```
|
|
124
|
-
|
|
125
|
-
## Metric Results
|
|
126
|
-
|
|
127
|
-
Each metric returns a standardized result object containing:
|
|
128
|
-
|
|
129
|
-
- `score`: Normalized score (typically 0-1)
|
|
130
|
-
- `info`: Detailed information about the evaluation
|
|
131
|
-
- Additional metric-specific data (e.g., matched keywords, missing elements)
|
|
132
|
-
|
|
133
|
-
Some metrics also provide:
|
|
134
|
-
|
|
135
|
-
- `reason`: Detailed explanation of the score
|
|
136
|
-
- `verdicts`: Individual judgments that contributed to the final score
|
|
137
|
-
|
|
138
|
-
## Telemetry and Logging
|
|
139
|
-
|
|
140
|
-
The package includes built-in telemetry and logging capabilities:
|
|
141
|
-
|
|
142
|
-
- Automatic evaluation tracking through Mastra Storage
|
|
143
|
-
- Integration with OpenTelemetry for performance monitoring
|
|
144
|
-
- Detailed evaluation traces for debugging
|
|
145
|
-
|
|
146
|
-
```typescript
|
|
147
|
-
import { attachListeners } from '@mastra/evals';
|
|
148
|
-
|
|
149
|
-
// Enable basic evaluation tracking
|
|
150
|
-
await attachListeners();
|
|
151
|
-
|
|
152
|
-
// Store evals in Mastra Storage (if storage is enabled)
|
|
153
|
-
await attachListeners(mastra);
|
|
154
|
-
// Note: When using in-memory storage, evaluations are isolated to the test process.
|
|
155
|
-
// When using file storage, evaluations are persisted and can be queried later.
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
## Environment Variables
|
|
159
|
-
|
|
160
|
-
Required for LLM-based metrics:
|
|
161
|
-
|
|
162
|
-
- `OPENAI_API_KEY`: For OpenAI model access
|
|
163
|
-
- Additional provider keys as needed (Cohere, Anthropic, etc.)
|
|
164
|
-
|
|
165
|
-
## Package Exports
|
|
166
|
-
|
|
167
|
-
```typescript
|
|
168
|
-
// Main package exports
|
|
169
|
-
import { evaluate } from '@mastra/evals';
|
|
170
|
-
// NLP-specific metrics
|
|
171
|
-
import { ContentSimilarityMetric } from '@mastra/evals/nlp';
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
## Related Packages
|
|
175
|
-
|
|
176
|
-
- `@mastra/core`: Core framework functionality
|
|
177
|
-
- `@mastra/engine`: LLM execution engine
|
|
178
|
-
- `@mastra/mcp`: Model Context Protocol integration
|
|
@@ -1,22 +1,33 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { RequestContext } from '@mastra/core/request-context';
|
|
2
2
|
|
|
3
3
|
// src/scorers/utils.ts
|
|
4
|
+
function getTextContentFromMastraDBMessage(message) {
|
|
5
|
+
if (typeof message.content.content === "string" && message.content.content !== "") {
|
|
6
|
+
return message.content.content;
|
|
7
|
+
}
|
|
8
|
+
if (message.content.parts && Array.isArray(message.content.parts)) {
|
|
9
|
+
const textParts = message.content.parts.filter((p) => p.type === "text");
|
|
10
|
+
return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
|
|
11
|
+
}
|
|
12
|
+
return "";
|
|
13
|
+
}
|
|
4
14
|
var roundToTwoDecimals = (num) => {
|
|
5
15
|
return Math.round((num + Number.EPSILON) * 100) / 100;
|
|
6
16
|
};
|
|
7
17
|
function isCloserTo(value, target1, target2) {
|
|
8
18
|
return Math.abs(value - target1) < Math.abs(value - target2);
|
|
9
19
|
}
|
|
10
|
-
var createTestRun = (input, output, additionalContext,
|
|
20
|
+
var createTestRun = (input, output, additionalContext, requestContext) => {
|
|
11
21
|
return {
|
|
12
22
|
input: [{ role: "user", content: input }],
|
|
13
23
|
output: { role: "assistant", text: output },
|
|
14
24
|
additionalContext: additionalContext ?? {},
|
|
15
|
-
|
|
25
|
+
requestContext: requestContext ?? {}
|
|
16
26
|
};
|
|
17
27
|
};
|
|
18
28
|
var getUserMessageFromRunInput = (input) => {
|
|
19
|
-
|
|
29
|
+
const message = input?.inputMessages.find(({ role }) => role === "user");
|
|
30
|
+
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
20
31
|
};
|
|
21
32
|
var getSystemMessagesFromRunInput = (input) => {
|
|
22
33
|
const systemMessages = [];
|
|
@@ -48,7 +59,8 @@ var getCombinedSystemPrompt = (input) => {
|
|
|
48
59
|
return systemMessages.join("\n\n");
|
|
49
60
|
};
|
|
50
61
|
var getAssistantMessageFromRunOutput = (output) => {
|
|
51
|
-
|
|
62
|
+
const message = output?.find(({ role }) => role === "assistant");
|
|
63
|
+
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
52
64
|
};
|
|
53
65
|
var createToolInvocation = ({
|
|
54
66
|
toolCallId,
|
|
@@ -65,27 +77,39 @@ var createToolInvocation = ({
|
|
|
65
77
|
state
|
|
66
78
|
};
|
|
67
79
|
};
|
|
68
|
-
|
|
80
|
+
function createTestMessage({
|
|
69
81
|
content,
|
|
70
82
|
role,
|
|
71
83
|
id = "test-message",
|
|
72
84
|
toolInvocations = []
|
|
73
|
-
})
|
|
85
|
+
}) {
|
|
74
86
|
return {
|
|
75
87
|
id,
|
|
76
88
|
role,
|
|
77
|
-
content
|
|
78
|
-
|
|
79
|
-
|
|
89
|
+
content: {
|
|
90
|
+
format: 2,
|
|
91
|
+
parts: [{ type: "text", text: content }],
|
|
92
|
+
content,
|
|
93
|
+
...toolInvocations.length > 0 && {
|
|
94
|
+
toolInvocations: toolInvocations.map((ti) => ({
|
|
95
|
+
toolCallId: ti.toolCallId,
|
|
96
|
+
toolName: ti.toolName,
|
|
97
|
+
args: ti.args,
|
|
98
|
+
result: ti.result,
|
|
99
|
+
state: ti.state
|
|
100
|
+
}))
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
createdAt: /* @__PURE__ */ new Date()
|
|
80
104
|
};
|
|
81
|
-
}
|
|
105
|
+
}
|
|
82
106
|
var createAgentTestRun = ({
|
|
83
107
|
inputMessages = [],
|
|
84
108
|
output,
|
|
85
109
|
rememberedMessages = [],
|
|
86
110
|
systemMessages = [],
|
|
87
111
|
taggedSystemMessages = {},
|
|
88
|
-
|
|
112
|
+
requestContext = new RequestContext(),
|
|
89
113
|
runId = crypto.randomUUID()
|
|
90
114
|
}) => {
|
|
91
115
|
return {
|
|
@@ -96,7 +120,7 @@ var createAgentTestRun = ({
|
|
|
96
120
|
taggedSystemMessages
|
|
97
121
|
},
|
|
98
122
|
output,
|
|
99
|
-
|
|
123
|
+
requestContext,
|
|
100
124
|
runId
|
|
101
125
|
};
|
|
102
126
|
};
|
|
@@ -105,9 +129,9 @@ function extractToolCalls(output) {
|
|
|
105
129
|
const toolCallInfos = [];
|
|
106
130
|
for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
|
|
107
131
|
const message = output[messageIndex];
|
|
108
|
-
if (message?.toolInvocations) {
|
|
109
|
-
for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
|
|
110
|
-
const invocation = message.toolInvocations[invocationIndex];
|
|
132
|
+
if (message?.content?.toolInvocations) {
|
|
133
|
+
for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
|
|
134
|
+
const invocation = message.content.toolInvocations[invocationIndex];
|
|
111
135
|
if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
|
|
112
136
|
toolCalls.push(invocation.toolName);
|
|
113
137
|
toolCallInfos.push({
|
|
@@ -123,12 +147,12 @@ function extractToolCalls(output) {
|
|
|
123
147
|
return { tools: toolCalls, toolCallInfos };
|
|
124
148
|
}
|
|
125
149
|
var extractInputMessages = (runInput) => {
|
|
126
|
-
return runInput?.inputMessages?.map((msg) => msg
|
|
150
|
+
return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
|
|
127
151
|
};
|
|
128
152
|
var extractAgentResponseMessages = (runOutput) => {
|
|
129
|
-
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg
|
|
153
|
+
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
|
|
130
154
|
};
|
|
131
155
|
|
|
132
|
-
export { createAgentTestRun, createTestRun, createToolInvocation,
|
|
133
|
-
//# sourceMappingURL=chunk-
|
|
134
|
-
//# sourceMappingURL=chunk-
|
|
156
|
+
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
|
|
157
|
+
//# sourceMappingURL=chunk-CCLM7KPF.js.map
|
|
158
|
+
//# sourceMappingURL=chunk-CCLM7KPF.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-CCLM7KPF.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
|
|
@@ -1,24 +1,35 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var requestContext = require('@mastra/core/request-context');
|
|
4
4
|
|
|
5
5
|
// src/scorers/utils.ts
|
|
6
|
+
function getTextContentFromMastraDBMessage(message) {
|
|
7
|
+
if (typeof message.content.content === "string" && message.content.content !== "") {
|
|
8
|
+
return message.content.content;
|
|
9
|
+
}
|
|
10
|
+
if (message.content.parts && Array.isArray(message.content.parts)) {
|
|
11
|
+
const textParts = message.content.parts.filter((p) => p.type === "text");
|
|
12
|
+
return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
|
|
13
|
+
}
|
|
14
|
+
return "";
|
|
15
|
+
}
|
|
6
16
|
var roundToTwoDecimals = (num) => {
|
|
7
17
|
return Math.round((num + Number.EPSILON) * 100) / 100;
|
|
8
18
|
};
|
|
9
19
|
function isCloserTo(value, target1, target2) {
|
|
10
20
|
return Math.abs(value - target1) < Math.abs(value - target2);
|
|
11
21
|
}
|
|
12
|
-
var createTestRun = (input, output, additionalContext,
|
|
22
|
+
var createTestRun = (input, output, additionalContext, requestContext) => {
|
|
13
23
|
return {
|
|
14
24
|
input: [{ role: "user", content: input }],
|
|
15
25
|
output: { role: "assistant", text: output },
|
|
16
26
|
additionalContext: additionalContext ?? {},
|
|
17
|
-
|
|
27
|
+
requestContext: requestContext ?? {}
|
|
18
28
|
};
|
|
19
29
|
};
|
|
20
30
|
var getUserMessageFromRunInput = (input) => {
|
|
21
|
-
|
|
31
|
+
const message = input?.inputMessages.find(({ role }) => role === "user");
|
|
32
|
+
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
22
33
|
};
|
|
23
34
|
var getSystemMessagesFromRunInput = (input) => {
|
|
24
35
|
const systemMessages = [];
|
|
@@ -50,7 +61,8 @@ var getCombinedSystemPrompt = (input) => {
|
|
|
50
61
|
return systemMessages.join("\n\n");
|
|
51
62
|
};
|
|
52
63
|
var getAssistantMessageFromRunOutput = (output) => {
|
|
53
|
-
|
|
64
|
+
const message = output?.find(({ role }) => role === "assistant");
|
|
65
|
+
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
54
66
|
};
|
|
55
67
|
var createToolInvocation = ({
|
|
56
68
|
toolCallId,
|
|
@@ -67,27 +79,39 @@ var createToolInvocation = ({
|
|
|
67
79
|
state
|
|
68
80
|
};
|
|
69
81
|
};
|
|
70
|
-
|
|
82
|
+
function createTestMessage({
|
|
71
83
|
content,
|
|
72
84
|
role,
|
|
73
85
|
id = "test-message",
|
|
74
86
|
toolInvocations = []
|
|
75
|
-
})
|
|
87
|
+
}) {
|
|
76
88
|
return {
|
|
77
89
|
id,
|
|
78
90
|
role,
|
|
79
|
-
content
|
|
80
|
-
|
|
81
|
-
|
|
91
|
+
content: {
|
|
92
|
+
format: 2,
|
|
93
|
+
parts: [{ type: "text", text: content }],
|
|
94
|
+
content,
|
|
95
|
+
...toolInvocations.length > 0 && {
|
|
96
|
+
toolInvocations: toolInvocations.map((ti) => ({
|
|
97
|
+
toolCallId: ti.toolCallId,
|
|
98
|
+
toolName: ti.toolName,
|
|
99
|
+
args: ti.args,
|
|
100
|
+
result: ti.result,
|
|
101
|
+
state: ti.state
|
|
102
|
+
}))
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
createdAt: /* @__PURE__ */ new Date()
|
|
82
106
|
};
|
|
83
|
-
}
|
|
107
|
+
}
|
|
84
108
|
var createAgentTestRun = ({
|
|
85
109
|
inputMessages = [],
|
|
86
110
|
output,
|
|
87
111
|
rememberedMessages = [],
|
|
88
112
|
systemMessages = [],
|
|
89
113
|
taggedSystemMessages = {},
|
|
90
|
-
|
|
114
|
+
requestContext: requestContext$1 = new requestContext.RequestContext(),
|
|
91
115
|
runId = crypto.randomUUID()
|
|
92
116
|
}) => {
|
|
93
117
|
return {
|
|
@@ -98,7 +122,7 @@ var createAgentTestRun = ({
|
|
|
98
122
|
taggedSystemMessages
|
|
99
123
|
},
|
|
100
124
|
output,
|
|
101
|
-
|
|
125
|
+
requestContext: requestContext$1,
|
|
102
126
|
runId
|
|
103
127
|
};
|
|
104
128
|
};
|
|
@@ -107,9 +131,9 @@ function extractToolCalls(output) {
|
|
|
107
131
|
const toolCallInfos = [];
|
|
108
132
|
for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
|
|
109
133
|
const message = output[messageIndex];
|
|
110
|
-
if (message?.toolInvocations) {
|
|
111
|
-
for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {
|
|
112
|
-
const invocation = message.toolInvocations[invocationIndex];
|
|
134
|
+
if (message?.content?.toolInvocations) {
|
|
135
|
+
for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
|
|
136
|
+
const invocation = message.content.toolInvocations[invocationIndex];
|
|
113
137
|
if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
|
|
114
138
|
toolCalls.push(invocation.toolName);
|
|
115
139
|
toolCallInfos.push({
|
|
@@ -125,24 +149,25 @@ function extractToolCalls(output) {
|
|
|
125
149
|
return { tools: toolCalls, toolCallInfos };
|
|
126
150
|
}
|
|
127
151
|
var extractInputMessages = (runInput) => {
|
|
128
|
-
return runInput?.inputMessages?.map((msg) => msg
|
|
152
|
+
return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
|
|
129
153
|
};
|
|
130
154
|
var extractAgentResponseMessages = (runOutput) => {
|
|
131
|
-
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => msg
|
|
155
|
+
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
|
|
132
156
|
};
|
|
133
157
|
|
|
134
158
|
exports.createAgentTestRun = createAgentTestRun;
|
|
159
|
+
exports.createTestMessage = createTestMessage;
|
|
135
160
|
exports.createTestRun = createTestRun;
|
|
136
161
|
exports.createToolInvocation = createToolInvocation;
|
|
137
|
-
exports.createUIMessage = createUIMessage;
|
|
138
162
|
exports.extractAgentResponseMessages = extractAgentResponseMessages;
|
|
139
163
|
exports.extractInputMessages = extractInputMessages;
|
|
140
164
|
exports.extractToolCalls = extractToolCalls;
|
|
141
165
|
exports.getAssistantMessageFromRunOutput = getAssistantMessageFromRunOutput;
|
|
142
166
|
exports.getCombinedSystemPrompt = getCombinedSystemPrompt;
|
|
143
167
|
exports.getSystemMessagesFromRunInput = getSystemMessagesFromRunInput;
|
|
168
|
+
exports.getTextContentFromMastraDBMessage = getTextContentFromMastraDBMessage;
|
|
144
169
|
exports.getUserMessageFromRunInput = getUserMessageFromRunInput;
|
|
145
170
|
exports.isCloserTo = isCloserTo;
|
|
146
171
|
exports.roundToTwoDecimals = roundToTwoDecimals;
|
|
147
|
-
//# sourceMappingURL=chunk-
|
|
148
|
-
//# sourceMappingURL=chunk-
|
|
172
|
+
//# sourceMappingURL=chunk-TPQLLHZW.cjs.map
|
|
173
|
+
//# sourceMappingURL=chunk-TPQLLHZW.cjs.map
|