@mastra/evals 1.0.0-beta.0 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/{chunk-TPQLLHZW.cjs → chunk-AT7HXT3U.cjs} +22 -2
- package/dist/chunk-AT7HXT3U.cjs.map +1 -0
- package/dist/{chunk-CCLM7KPF.js → chunk-CKKVCGRB.js} +22 -3
- package/dist/chunk-CKKVCGRB.js.map +1 -0
- package/dist/scorers/prebuilt/index.cjs +59 -59
- package/dist/scorers/prebuilt/index.js +1 -1
- package/dist/scorers/utils.cjs +19 -15
- package/dist/scorers/utils.d.ts +319 -4
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +6 -6
- package/dist/chunk-CCLM7KPF.js.map +0 -1
- package/dist/chunk-TPQLLHZW.cjs.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 1.0.0-beta.2
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- Add `getReasoningFromRunOutput` utility function for extracting reasoning text from scorer run outputs. This enables scorers to access chain-of-thought reasoning from models like deepseek-reasoner in preprocess functions. ([#10684](https://github.com/mastra-ai/mastra/pull/10684))
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [[`ac0d2f4`](https://github.com/mastra-ai/mastra/commit/ac0d2f4ff8831f72c1c66c2be809706d17f65789), [`1a0d3fc`](https://github.com/mastra-ai/mastra/commit/1a0d3fc811482c9c376cdf79ee615c23bae9b2d6), [`85a628b`](https://github.com/mastra-ai/mastra/commit/85a628b1224a8f64cd82ea7f033774bf22df7a7e), [`c237233`](https://github.com/mastra-ai/mastra/commit/c23723399ccedf7f5744b3f40997b79246bfbe64), [`15f9e21`](https://github.com/mastra-ai/mastra/commit/15f9e216177201ea6e3f6d0bfb063fcc0953444f), [`ff94dea`](https://github.com/mastra-ai/mastra/commit/ff94dea935f4e34545c63bcb6c29804732698809), [`5b2ff46`](https://github.com/mastra-ai/mastra/commit/5b2ff4651df70c146523a7fca773f8eb0a2272f8), [`db41688`](https://github.com/mastra-ai/mastra/commit/db4168806d007417e2e60b4f68656dca4e5f40c9), [`5ca599d`](https://github.com/mastra-ai/mastra/commit/5ca599d0bb59a1595f19f58473fcd67cc71cef58), [`bff1145`](https://github.com/mastra-ai/mastra/commit/bff114556b3cbadad9b2768488708f8ad0e91475), [`5c8ca24`](https://github.com/mastra-ai/mastra/commit/5c8ca247094e0cc2cdbd7137822fb47241f86e77), [`e191844`](https://github.com/mastra-ai/mastra/commit/e1918444ca3f80e82feef1dad506cd4ec6e2875f), [`22553f1`](https://github.com/mastra-ai/mastra/commit/22553f11c63ee5e966a9c034a349822249584691), [`7237163`](https://github.com/mastra-ai/mastra/commit/72371635dbf96a87df4b073cc48fc655afbdce3d), [`2500740`](https://github.com/mastra-ai/mastra/commit/2500740ea23da067d6e50ec71c625ab3ce275e64), [`873ecbb`](https://github.com/mastra-ai/mastra/commit/873ecbb517586aa17d2f1e99283755b3ebb2863f), [`4f9bbe5`](https://github.com/mastra-ai/mastra/commit/4f9bbe5968f42c86f4930b8193de3c3c17e5bd36), [`02e51fe`](https://github.com/mastra-ai/mastra/commit/02e51feddb3d4155cfbcc42624fd0d0970d032c0), [`8f3fa3a`](https://github.com/mastra-ai/mastra/commit/8f3fa3a652bb77da092f913ec51ae46e3a7e27dc), [`cd29ad2`](https://github.com/mastra-ai/mastra/commit/cd29ad23a255534e8191f249593849ed29160886), [`bdf4d8c`](https://github.com/mastra-ai/mastra/commit/bdf4d8cdc656d8a2c21d81834bfa3bfa70f56c16), [`854e3da`](https://github.com/mastra-ai/mastra/commit/854e3dad5daac17a91a20986399d3a51f54bf68b), [`ce18d38`](https://github.com/mastra-ai/mastra/commit/ce18d38678c65870350d123955014a8432075fd9), [`cccf9c8`](https://github.com/mastra-ai/mastra/commit/cccf9c8b2d2dfc1a5e63919395b83d78c89682a0), [`61a5705`](https://github.com/mastra-ai/mastra/commit/61a570551278b6743e64243b3ce7d73de915ca8a), [`db70a48`](https://github.com/mastra-ai/mastra/commit/db70a48aeeeeb8e5f92007e8ede52c364ce15287), [`f0fdc14`](https://github.com/mastra-ai/mastra/commit/f0fdc14ee233d619266b3d2bbdeea7d25cfc6d13), [`db18bc9`](https://github.com/mastra-ai/mastra/commit/db18bc9c3825e2c1a0ad9a183cc9935f6691bfa1), [`9b37b56`](https://github.com/mastra-ai/mastra/commit/9b37b565e1f2a76c24f728945cc740c2b09be9da), [`41a23c3`](https://github.com/mastra-ai/mastra/commit/41a23c32f9877d71810f37e24930515df2ff7a0f), [`5d171ad`](https://github.com/mastra-ai/mastra/commit/5d171ad9ef340387276b77c2bb3e83e83332d729), [`f03ae60`](https://github.com/mastra-ai/mastra/commit/f03ae60500fe350c9d828621006cdafe1975fdd8), [`d1e74a0`](https://github.com/mastra-ai/mastra/commit/d1e74a0a293866dece31022047f5dbab65a304d0), [`39e7869`](https://github.com/mastra-ai/mastra/commit/39e7869bc7d0ee391077ce291474d8a84eedccff), [`5761926`](https://github.com/mastra-ai/mastra/commit/57619260c4a2cdd598763abbacd90de594c6bc76), [`c900fdd`](https://github.com/mastra-ai/mastra/commit/c900fdd504c41348efdffb205cfe80d48c38fa33), [`604a79f`](https://github.com/mastra-ai/mastra/commit/604a79fecf276e26a54a3fe01bb94e65315d2e0e), [`887f0b4`](https://github.com/mastra-ai/mastra/commit/887f0b4746cdbd7cb7d6b17ac9f82aeb58037ea5), [`2562143`](https://github.com/mastra-ai/mastra/commit/256214336b4faa78646c9c1776612393790d8784), [`ef11a61`](https://github.com/mastra-ai/mastra/commit/ef11a61920fa0ed08a5b7ceedd192875af119749)]:
|
|
12
|
+
- @mastra/core@1.0.0-beta.6
|
|
13
|
+
|
|
14
|
+
## 1.0.0-beta.1
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- Remove unused dependencies ([#10019](https://github.com/mastra-ai/mastra/pull/10019))
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [[`2319326`](https://github.com/mastra-ai/mastra/commit/2319326f8c64e503a09bbcf14be2dd65405445e0), [`d629361`](https://github.com/mastra-ai/mastra/commit/d629361a60f6565b5bfb11976fdaf7308af858e2), [`08c31c1`](https://github.com/mastra-ai/mastra/commit/08c31c188ebccd598acaf55e888b6397d01f7eae), [`fd3d338`](https://github.com/mastra-ai/mastra/commit/fd3d338a2c362174ed5b383f1f011ad9fb0302aa), [`c30400a`](https://github.com/mastra-ai/mastra/commit/c30400a49b994b1b97256fe785eb6c906fc2b232), [`69e0a87`](https://github.com/mastra-ai/mastra/commit/69e0a878896a2da9494945d86e056a5f8f05b851), [`01f8878`](https://github.com/mastra-ai/mastra/commit/01f88783de25e4de048c1c8aace43e26373c6ea5), [`4c77209`](https://github.com/mastra-ai/mastra/commit/4c77209e6c11678808b365d545845918c40045c8), [`d827d08`](https://github.com/mastra-ai/mastra/commit/d827d0808ffe1f3553a84e975806cc989b9735dd), [`23c10a1`](https://github.com/mastra-ai/mastra/commit/23c10a1efdd9a693c405511ab2dc8a1236603162), [`676ccc7`](https://github.com/mastra-ai/mastra/commit/676ccc7fe92468d2d45d39c31a87825c89fd1ea0), [`c10398d`](https://github.com/mastra-ai/mastra/commit/c10398d5b88f1d4af556f4267ff06f1d11e89179), [`00c2387`](https://github.com/mastra-ai/mastra/commit/00c2387f5f04a365316f851e58666ac43f8c4edf), [`ad6250d`](https://github.com/mastra-ai/mastra/commit/ad6250dbdaad927e29f74a27b83f6c468b50a705), [`3a73998`](https://github.com/mastra-ai/mastra/commit/3a73998fa4ebeb7f3dc9301afe78095fc63e7999), [`e16d553`](https://github.com/mastra-ai/mastra/commit/e16d55338403c7553531cc568125c63d53653dff), [`4d59f58`](https://github.com/mastra-ai/mastra/commit/4d59f58de2d90d6e2810a19d4518e38ddddb9038), [`e1bb9c9`](https://github.com/mastra-ai/mastra/commit/e1bb9c94b4eb68b019ae275981be3feb769b5365), [`351a11f`](https://github.com/mastra-ai/mastra/commit/351a11fcaf2ed1008977fa9b9a489fc422e51cd4)]:
|
|
21
|
+
- @mastra/core@1.0.0-beta.3
|
|
22
|
+
|
|
3
23
|
## 1.0.0-beta.0
|
|
4
24
|
|
|
5
25
|
### Major Changes
|
|
@@ -64,6 +64,25 @@ var getAssistantMessageFromRunOutput = (output) => {
|
|
|
64
64
|
const message = output?.find(({ role }) => role === "assistant");
|
|
65
65
|
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
66
66
|
};
|
|
67
|
+
var getReasoningFromRunOutput = (output) => {
|
|
68
|
+
if (!output) return void 0;
|
|
69
|
+
const message = output.find(({ role }) => role === "assistant");
|
|
70
|
+
if (!message) return void 0;
|
|
71
|
+
if (message.content.reasoning) {
|
|
72
|
+
return message.content.reasoning;
|
|
73
|
+
}
|
|
74
|
+
const reasoningParts = message.content.parts?.filter((p) => p.type === "reasoning");
|
|
75
|
+
if (reasoningParts && reasoningParts.length > 0) {
|
|
76
|
+
const reasoningTexts = reasoningParts.map((p) => {
|
|
77
|
+
if (p.details && Array.isArray(p.details)) {
|
|
78
|
+
return p.details.filter((d) => d.type === "text").map((d) => d.text).join("");
|
|
79
|
+
}
|
|
80
|
+
return p.reasoning || "";
|
|
81
|
+
}).filter(Boolean);
|
|
82
|
+
return reasoningTexts.length > 0 ? reasoningTexts.join("\n") : void 0;
|
|
83
|
+
}
|
|
84
|
+
return void 0;
|
|
85
|
+
};
|
|
67
86
|
var createToolInvocation = ({
|
|
68
87
|
toolCallId,
|
|
69
88
|
toolName,
|
|
@@ -164,10 +183,11 @@ exports.extractInputMessages = extractInputMessages;
|
|
|
164
183
|
exports.extractToolCalls = extractToolCalls;
|
|
165
184
|
exports.getAssistantMessageFromRunOutput = getAssistantMessageFromRunOutput;
|
|
166
185
|
exports.getCombinedSystemPrompt = getCombinedSystemPrompt;
|
|
186
|
+
exports.getReasoningFromRunOutput = getReasoningFromRunOutput;
|
|
167
187
|
exports.getSystemMessagesFromRunInput = getSystemMessagesFromRunInput;
|
|
168
188
|
exports.getTextContentFromMastraDBMessage = getTextContentFromMastraDBMessage;
|
|
169
189
|
exports.getUserMessageFromRunInput = getUserMessageFromRunInput;
|
|
170
190
|
exports.isCloserTo = isCloserTo;
|
|
171
191
|
exports.roundToTwoDecimals = roundToTwoDecimals;
|
|
172
|
-
//# sourceMappingURL=chunk-
|
|
173
|
-
//# sourceMappingURL=chunk-
|
|
192
|
+
//# sourceMappingURL=chunk-AT7HXT3U.cjs.map
|
|
193
|
+
//# sourceMappingURL=chunk-AT7HXT3U.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/scorers/utils.ts"],"names":["requestContext","RequestContext"],"mappings":";;;;;AA0BO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAgBO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAgBO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AA6CO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAmBO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAoBO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAmBO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAmBO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAiCO,IAAM,yBAAA,GAA4B,CAAC,MAAA,KAAyD;AACjG,EAAA,IAAI,CAAC,QAAQ,OAAO,MAAA;AAEpB,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC9D,EAAA,IAAI,CAAC,SAAS,OAAO,MAAA;AAGrB,EAAA,IAAI,OAAA,CAAQ,QAAQ,SAAA,EAAW;AAC7B,IAAA,OAAO,QAAQ,OAAA,CAAQ,SAAA;AAAA,EACzB;AAIA,EAAA,MAAM,cAAA,GAAiB,QAAQ,OAAA,CAAQ,KAAA,EAAO,OAAO,CAAC,CAAA,KAAW,CAAA,CAAE,IAAA,KAAS,WAAW,CAAA;AACvF,EAAA,IAAI,cAAA,IAAkB,cAAA,CAAe,MAAA,GAAS,CAAA,EAAG;AAC/C,IAAA,MAAM,cAAA,GAAiB,cAAA,CACpB,GAAA,CAAI,CAAC,CAAA,KAAW;AAEf,MAAA,IAAI,EAAE,OAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,CAAA,CAAE,OAAO,CAAA,EAAG;AACzC,QAAA,OAAO,EAAE,OAAA,CACN,MAAA,CAAO,CAAC,CAAA,KAAW,EAAE,IAAA,KAAS,MAAM,CAAA,CACpC,GAAA,CAAI,CAAC,CAAA,KAAW,CAAA,CAAE,IAAI,CAAA,CACtB,KAAK,EAAE,CAAA;AAAA,MACZ;AACA,MAAA,OAAO,EAAE,SAAA,IAAa,EAAA;AAAA,IACxB,CAAC,CAAA,CACA,MAAA,CAAO,OAAO,CAAA;AAEjB,IAAA,OAAO,eAAe,MAAA,GAAS,CAAA,GAAI,cAAA,CAAe,IAAA,CAAK,IAAI,CAAA,GAAI,MAAA;AAAA,EACjE;AAEA,EAAA,OAAO,MAAA;AACT;AAuBO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAmCO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AA+BO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,kBACxBA,gBAAA,GAAiB,IAAIC,6BAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,oBACAD,gBAAA;AAAA,IACA;AAAA,GACF;AACF;AAqCO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAiBO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAmBO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-AT7HXT3U.cjs","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extracts text content from a MastraDBMessage.\n *\n * This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.\n * It first checks for a string `content.content` field, then falls back to extracting\n * text from the `parts` array (returning only the last text part, like AI SDK does).\n *\n * @param message - The MastraDBMessage to extract text from\n * @returns The extracted text content, or an empty string if no text is found\n *\n * @example\n * ```ts\n * const message: MastraDBMessage = {\n * id: 'msg-1',\n * role: 'assistant',\n * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },\n * createdAt: new Date(),\n * };\n * const text = getTextContentFromMastraDBMessage(message); // 'Hello!'\n * ```\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\n/**\n * Rounds a number to two decimal places.\n *\n * Uses `Number.EPSILON` to handle floating-point precision issues.\n *\n * @param num - The number to round\n * @returns The number rounded to two decimal places\n *\n * @example\n * ```ts\n * roundToTwoDecimals(0.1 + 0.2); // 0.3\n * roundToTwoDecimals(1.005); // 1.01\n * ```\n */\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\n/**\n * Determines if a value is closer to the first target than the second.\n *\n * @param value - The value to compare\n * @param target1 - The first target value\n * @param target2 - The second target value\n * @returns `true` if `value` is closer to `target1` than `target2`\n *\n * @example\n * ```ts\n * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)\n * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)\n * ```\n */\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\n/**\n * Represents a test case for scorer evaluation.\n */\nexport type TestCase = {\n /** The input text to evaluate */\n input: string;\n /** The output text to evaluate */\n output: string;\n /** The expected result of the evaluation */\n expectedResult: {\n /** The expected score */\n score: number;\n /** The optional expected reason */\n reason?: string;\n };\n};\n\n/**\n * Represents a test case with additional context for scorer evaluation.\n */\nexport type TestCaseWithContext = TestCase & {\n /** Additional context strings for the evaluation */\n context: string[];\n};\n\n/**\n * Creates a scoring input object for testing purposes.\n *\n * @param input - The user input text\n * @param output - The assistant output text\n * @param additionalContext - Optional additional context data\n * @param requestContext - Optional request context data\n * @returns A ScoringInput object ready for use in scorer tests\n *\n * @example\n * ```ts\n * const run = createTestRun(\n * 'What is 2+2?',\n * 'The answer is 4.',\n * { topic: 'math' }\n * );\n * ```\n */\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\n/**\n * Extracts the user message text from a scorer run input.\n *\n * Finds the first message with role 'user' and extracts its text content.\n *\n * @param input - The scorer run input containing input messages\n * @returns The user message text, or `undefined` if no user message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const userText = getUserMessageFromRunInput(run.input);\n * return { userText };\n * });\n * ```\n */\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts all system messages from a scorer run input.\n *\n * Collects text from both standard system messages and tagged system messages\n * (specialized system prompts like memory instructions).\n *\n * @param input - The scorer run input containing system messages\n * @returns An array of system message strings\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemMessages = getSystemMessagesFromRunInput(run.input);\n * return { systemPrompt: systemMessages.join('\\n') };\n * });\n * ```\n */\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\n/**\n * Combines all system messages into a single prompt string.\n *\n * Joins all system messages (standard and tagged) with double newlines.\n *\n * @param input - The scorer run input containing system messages\n * @returns A combined system prompt string\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemPrompt = getCombinedSystemPrompt(run.input);\n * return { systemPrompt };\n * });\n * ```\n */\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\n/**\n * Extracts the assistant message text from a scorer run output.\n *\n * Finds the first message with role 'assistant' and extracts its text content.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The assistant message text, or `undefined` if no assistant message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { response };\n * });\n * ```\n */\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts reasoning text from a scorer run output.\n *\n * This function extracts reasoning content from assistant messages, which is\n * produced by reasoning models like `deepseek-reasoner`. The reasoning can be\n * stored in two places:\n * 1. `content.reasoning` - a string field on the message content\n * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The reasoning text, or `undefined` if no reasoning is present\n *\n * @example\n * ```ts\n * const reasoningScorer = createScorer({\n * id: 'reasoning-scorer',\n * name: 'Reasoning Quality',\n * description: 'Evaluates the quality of model reasoning',\n * type: 'agent',\n * })\n * .preprocess(({ run }) => {\n * const reasoning = getReasoningFromRunOutput(run.output);\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { reasoning, response };\n * })\n * .generateScore(({ results }) => {\n * // Score based on reasoning quality\n * return results.preprocessStepResult?.reasoning ? 1 : 0;\n * });\n * ```\n */\nexport const getReasoningFromRunOutput = (output?: ScorerRunOutputForAgent): string | undefined => {\n if (!output) return undefined;\n\n const message = output.find(({ role }) => role === 'assistant');\n if (!message) return undefined;\n\n // Check for reasoning in content.reasoning (string format)\n if (message.content.reasoning) {\n return message.content.reasoning;\n }\n\n // Check for reasoning in parts with type 'reasoning'\n // Reasoning models store reasoning in parts as { type: 'reasoning', details: [{ type: 'text', text: '...' }] }\n const reasoningParts = message.content.parts?.filter((p: any) => p.type === 'reasoning');\n if (reasoningParts && reasoningParts.length > 0) {\n const reasoningTexts = reasoningParts\n .map((p: any) => {\n // The reasoning text can be in p.reasoning or in p.details[].text\n if (p.details && Array.isArray(p.details)) {\n return p.details\n .filter((d: any) => d.type === 'text')\n .map((d: any) => d.text)\n .join('');\n }\n return p.reasoning || '';\n })\n .filter(Boolean);\n\n return reasoningTexts.length > 0 ? reasoningTexts.join('\\n') : undefined;\n }\n\n return undefined;\n};\n\n/**\n * Creates a tool invocation object for testing purposes.\n *\n * @param options - The tool invocation configuration\n * @param options.toolCallId - Unique identifier for the tool call\n * @param options.toolName - Name of the tool being called\n * @param options.args - Arguments passed to the tool\n * @param options.result - Result returned by the tool\n * @param options.state - State of the invocation (default: 'result')\n * @returns A tool invocation object\n *\n * @example\n * ```ts\n * const invocation = createToolInvocation({\n * toolCallId: 'call-123',\n * toolName: 'weatherTool',\n * args: { location: 'London' },\n * result: { temperature: 20, condition: 'sunny' },\n * });\n * ```\n */\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Creates a MastraDBMessage object for testing purposes.\n *\n * Supports optional tool invocations for testing tool call scenarios.\n *\n * @param options - The message configuration\n * @param options.content - The text content of the message\n * @param options.role - The role of the message sender ('user', 'assistant', or 'system')\n * @param options.id - Optional message ID (default: 'test-message')\n * @param options.toolInvocations - Optional array of tool invocations\n * @returns A MastraDBMessage object\n *\n * @example\n * ```ts\n * const message = createTestMessage({\n * content: 'Hello, how can I help?',\n * role: 'assistant',\n * });\n *\n * // With tool invocations\n * const messageWithTools = createTestMessage({\n * content: 'Let me check the weather.',\n * role: 'assistant',\n * toolInvocations: [{\n * toolCallId: 'call-1',\n * toolName: 'weatherTool',\n * args: { location: 'Paris' },\n * result: { temp: 22 },\n * state: 'result',\n * }],\n * });\n * ```\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\n/**\n * Creates a complete agent test run object for testing scorers.\n *\n * Provides a convenient way to construct the full run object that scorers receive,\n * including input messages, output, system messages, and request context.\n *\n * @param options - The test run configuration\n * @param options.inputMessages - Array of input messages (default: [])\n * @param options.output - The output messages (required)\n * @param options.rememberedMessages - Array of remembered messages from memory (default: [])\n * @param options.systemMessages - Array of system messages (default: [])\n * @param options.taggedSystemMessages - Tagged system messages map (default: {})\n * @param options.requestContext - Request context (default: new RequestContext())\n * @param options.runId - Unique run ID (default: random UUID)\n * @returns A complete test run object\n *\n * @example\n * ```ts\n * const testRun = createAgentTestRun({\n * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],\n * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],\n * });\n *\n * const result = await scorer.run({\n * input: testRun.input,\n * output: testRun.output,\n * });\n * ```\n */\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\n/**\n * Information about a tool call extracted from scorer output.\n */\nexport type ToolCallInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Index of the message containing this tool call */\n messageIndex: number;\n /** Index of the invocation within the message's tool invocations */\n invocationIndex: number;\n};\n\n/**\n * Extracts all tool calls from a scorer run output.\n *\n * Iterates through all messages and their tool invocations to collect\n * information about tools that were called (with state 'result' or 'call').\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An object containing tool names and detailed tool call info\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const { tools, toolCallInfos } = extractToolCalls(run.output);\n * return {\n * toolsUsed: tools,\n * toolCount: tools.length,\n * };\n * });\n * ```\n */\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\n/**\n * Extracts text content from all input messages.\n *\n * @param runInput - The scorer run input\n * @returns An array of text strings from each input message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const messages = extractInputMessages(run.input);\n * return { allUserMessages: messages.join('\\n') };\n * });\n * ```\n */\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\n/**\n * Extracts text content from all assistant response messages.\n *\n * Filters for messages with role 'assistant' and extracts their text content.\n *\n * @param runOutput - The scorer run output (array of MastraDBMessage)\n * @returns An array of text strings from each assistant message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const responses = extractAgentResponseMessages(run.output);\n * return { allResponses: responses.join('\\n') };\n * });\n * ```\n */\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
|
|
@@ -62,6 +62,25 @@ var getAssistantMessageFromRunOutput = (output) => {
|
|
|
62
62
|
const message = output?.find(({ role }) => role === "assistant");
|
|
63
63
|
return message ? getTextContentFromMastraDBMessage(message) : void 0;
|
|
64
64
|
};
|
|
65
|
+
var getReasoningFromRunOutput = (output) => {
|
|
66
|
+
if (!output) return void 0;
|
|
67
|
+
const message = output.find(({ role }) => role === "assistant");
|
|
68
|
+
if (!message) return void 0;
|
|
69
|
+
if (message.content.reasoning) {
|
|
70
|
+
return message.content.reasoning;
|
|
71
|
+
}
|
|
72
|
+
const reasoningParts = message.content.parts?.filter((p) => p.type === "reasoning");
|
|
73
|
+
if (reasoningParts && reasoningParts.length > 0) {
|
|
74
|
+
const reasoningTexts = reasoningParts.map((p) => {
|
|
75
|
+
if (p.details && Array.isArray(p.details)) {
|
|
76
|
+
return p.details.filter((d) => d.type === "text").map((d) => d.text).join("");
|
|
77
|
+
}
|
|
78
|
+
return p.reasoning || "";
|
|
79
|
+
}).filter(Boolean);
|
|
80
|
+
return reasoningTexts.length > 0 ? reasoningTexts.join("\n") : void 0;
|
|
81
|
+
}
|
|
82
|
+
return void 0;
|
|
83
|
+
};
|
|
65
84
|
var createToolInvocation = ({
|
|
66
85
|
toolCallId,
|
|
67
86
|
toolName,
|
|
@@ -153,6 +172,6 @@ var extractAgentResponseMessages = (runOutput) => {
|
|
|
153
172
|
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
|
|
154
173
|
};
|
|
155
174
|
|
|
156
|
-
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
|
|
157
|
-
//# sourceMappingURL=chunk-
|
|
158
|
-
//# sourceMappingURL=chunk-
|
|
175
|
+
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
|
|
176
|
+
//# sourceMappingURL=chunk-CKKVCGRB.js.map
|
|
177
|
+
//# sourceMappingURL=chunk-CKKVCGRB.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AA0BO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAgBO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAgBO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AA6CO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAmBO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAoBO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAmBO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAmBO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAiCO,IAAM,yBAAA,GAA4B,CAAC,MAAA,KAAyD;AACjG,EAAA,IAAI,CAAC,QAAQ,OAAO,MAAA;AAEpB,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC9D,EAAA,IAAI,CAAC,SAAS,OAAO,MAAA;AAGrB,EAAA,IAAI,OAAA,CAAQ,QAAQ,SAAA,EAAW;AAC7B,IAAA,OAAO,QAAQ,OAAA,CAAQ,SAAA;AAAA,EACzB;AAIA,EAAA,MAAM,cAAA,GAAiB,QAAQ,OAAA,CAAQ,KAAA,EAAO,OAAO,CAAC,CAAA,KAAW,CAAA,CAAE,IAAA,KAAS,WAAW,CAAA;AACvF,EAAA,IAAI,cAAA,IAAkB,cAAA,CAAe,MAAA,GAAS,CAAA,EAAG;AAC/C,IAAA,MAAM,cAAA,GAAiB,cAAA,CACpB,GAAA,CAAI,CAAC,CAAA,KAAW;AAEf,MAAA,IAAI,EAAE,OAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,CAAA,CAAE,OAAO,CAAA,EAAG;AACzC,QAAA,OAAO,EAAE,OAAA,CACN,MAAA,CAAO,CAAC,CAAA,KAAW,EAAE,IAAA,KAAS,MAAM,CAAA,CACpC,GAAA,CAAI,CAAC,CAAA,KAAW,CAAA,CAAE,IAAI,CAAA,CACtB,KAAK,EAAE,CAAA;AAAA,MACZ;AACA,MAAA,OAAO,EAAE,SAAA,IAAa,EAAA;AAAA,IACxB,CAAC,CAAA,CACA,MAAA,CAAO,OAAO,CAAA;AAEjB,IAAA,OAAO,eAAe,MAAA,GAAS,CAAA,GAAI,cAAA,CAAe,IAAA,CAAK,IAAI,CAAA,GAAI,MAAA;AAAA,EACjE;AAEA,EAAA,OAAO,MAAA;AACT;AAuBO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAmCO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AA+BO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AAqCO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAiBO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAmBO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-CKKVCGRB.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extracts text content from a MastraDBMessage.\n *\n * This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.\n * It first checks for a string `content.content` field, then falls back to extracting\n * text from the `parts` array (returning only the last text part, like AI SDK does).\n *\n * @param message - The MastraDBMessage to extract text from\n * @returns The extracted text content, or an empty string if no text is found\n *\n * @example\n * ```ts\n * const message: MastraDBMessage = {\n * id: 'msg-1',\n * role: 'assistant',\n * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },\n * createdAt: new Date(),\n * };\n * const text = getTextContentFromMastraDBMessage(message); // 'Hello!'\n * ```\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\n/**\n * Rounds a number to two decimal places.\n *\n * Uses `Number.EPSILON` to handle floating-point precision issues.\n *\n * @param num - The number to round\n * @returns The number rounded to two decimal places\n *\n * @example\n * ```ts\n * roundToTwoDecimals(0.1 + 0.2); // 0.3\n * roundToTwoDecimals(1.005); // 1.01\n * ```\n */\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\n/**\n * Determines if a value is closer to the first target than the second.\n *\n * @param value - The value to compare\n * @param target1 - The first target value\n * @param target2 - The second target value\n * @returns `true` if `value` is closer to `target1` than `target2`\n *\n * @example\n * ```ts\n * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)\n * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)\n * ```\n */\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\n/**\n * Represents a test case for scorer evaluation.\n */\nexport type TestCase = {\n /** The input text to evaluate */\n input: string;\n /** The output text to evaluate */\n output: string;\n /** The expected result of the evaluation */\n expectedResult: {\n /** The expected score */\n score: number;\n /** The optional expected reason */\n reason?: string;\n };\n};\n\n/**\n * Represents a test case with additional context for scorer evaluation.\n */\nexport type TestCaseWithContext = TestCase & {\n /** Additional context strings for the evaluation */\n context: string[];\n};\n\n/**\n * Creates a scoring input object for testing purposes.\n *\n * @param input - The user input text\n * @param output - The assistant output text\n * @param additionalContext - Optional additional context data\n * @param requestContext - Optional request context data\n * @returns A ScoringInput object ready for use in scorer tests\n *\n * @example\n * ```ts\n * const run = createTestRun(\n * 'What is 2+2?',\n * 'The answer is 4.',\n * { topic: 'math' }\n * );\n * ```\n */\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\n/**\n * Extracts the user message text from a scorer run input.\n *\n * Finds the first message with role 'user' and extracts its text content.\n *\n * @param input - The scorer run input containing input messages\n * @returns The user message text, or `undefined` if no user message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const userText = getUserMessageFromRunInput(run.input);\n * return { userText };\n * });\n * ```\n */\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts all system messages from a scorer run input.\n *\n * Collects text from both standard system messages and tagged system messages\n * (specialized system prompts like memory instructions).\n *\n * @param input - The scorer run input containing system messages\n * @returns An array of system message strings\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemMessages = getSystemMessagesFromRunInput(run.input);\n * return { systemPrompt: systemMessages.join('\\n') };\n * });\n * ```\n */\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\n/**\n * Combines all system messages into a single prompt string.\n *\n * Joins all system messages (standard and tagged) with double newlines.\n *\n * @param input - The scorer run input containing system messages\n * @returns A combined system prompt string\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemPrompt = getCombinedSystemPrompt(run.input);\n * return { systemPrompt };\n * });\n * ```\n */\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\n/**\n * Extracts the assistant message text from a scorer run output.\n *\n * Finds the first message with role 'assistant' and extracts its text content.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The assistant message text, or `undefined` if no assistant message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { response };\n * });\n * ```\n */\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts reasoning text from a scorer run output.\n *\n * This function extracts reasoning content from assistant messages, which is\n * produced by reasoning models like `deepseek-reasoner`. The reasoning can be\n * stored in two places:\n * 1. `content.reasoning` - a string field on the message content\n * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The reasoning text, or `undefined` if no reasoning is present\n *\n * @example\n * ```ts\n * const reasoningScorer = createScorer({\n * id: 'reasoning-scorer',\n * name: 'Reasoning Quality',\n * description: 'Evaluates the quality of model reasoning',\n * type: 'agent',\n * })\n * .preprocess(({ run }) => {\n * const reasoning = getReasoningFromRunOutput(run.output);\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { reasoning, response };\n * })\n * .generateScore(({ results }) => {\n * // Score based on reasoning quality\n * return results.preprocessStepResult?.reasoning ? 1 : 0;\n * });\n * ```\n */\nexport const getReasoningFromRunOutput = (output?: ScorerRunOutputForAgent): string | undefined => {\n if (!output) return undefined;\n\n const message = output.find(({ role }) => role === 'assistant');\n if (!message) return undefined;\n\n // Check for reasoning in content.reasoning (string format)\n if (message.content.reasoning) {\n return message.content.reasoning;\n }\n\n // Check for reasoning in parts with type 'reasoning'\n // Reasoning models store reasoning in parts as { type: 'reasoning', details: [{ type: 'text', text: '...' }] }\n const reasoningParts = message.content.parts?.filter((p: any) => p.type === 'reasoning');\n if (reasoningParts && reasoningParts.length > 0) {\n const reasoningTexts = reasoningParts\n .map((p: any) => {\n // The reasoning text can be in p.reasoning or in p.details[].text\n if (p.details && Array.isArray(p.details)) {\n return p.details\n .filter((d: any) => d.type === 'text')\n .map((d: any) => d.text)\n .join('');\n }\n return p.reasoning || '';\n })\n .filter(Boolean);\n\n return reasoningTexts.length > 0 ? reasoningTexts.join('\\n') : undefined;\n }\n\n return undefined;\n};\n\n/**\n * Creates a tool invocation object for testing purposes.\n *\n * @param options - The tool invocation configuration\n * @param options.toolCallId - Unique identifier for the tool call\n * @param options.toolName - Name of the tool being called\n * @param options.args - Arguments passed to the tool\n * @param options.result - Result returned by the tool\n * @param options.state - State of the invocation (default: 'result')\n * @returns A tool invocation object\n *\n * @example\n * ```ts\n * const invocation = createToolInvocation({\n * toolCallId: 'call-123',\n * toolName: 'weatherTool',\n * args: { location: 'London' },\n * result: { temperature: 20, condition: 'sunny' },\n * });\n * ```\n */\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Creates a MastraDBMessage object for testing purposes.\n *\n * Supports optional tool invocations for testing tool call scenarios.\n *\n * @param options - The message configuration\n * @param options.content - The text content of the message\n * @param options.role - The role of the message sender ('user', 'assistant', or 'system')\n * @param options.id - Optional message ID (default: 'test-message')\n * @param options.toolInvocations - Optional array of tool invocations\n * @returns A MastraDBMessage object\n *\n * @example\n * ```ts\n * const message = createTestMessage({\n * content: 'Hello, how can I help?',\n * role: 'assistant',\n * });\n *\n * // With tool invocations\n * const messageWithTools = createTestMessage({\n * content: 'Let me check the weather.',\n * role: 'assistant',\n * toolInvocations: [{\n * toolCallId: 'call-1',\n * toolName: 'weatherTool',\n * args: { location: 'Paris' },\n * result: { temp: 22 },\n * state: 'result',\n * }],\n * });\n * ```\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\n/**\n * Creates a complete agent test run object for testing scorers.\n *\n * Provides a convenient way to construct the full run object that scorers receive,\n * including input messages, output, system messages, and request context.\n *\n * @param options - The test run configuration\n * @param options.inputMessages - Array of input messages (default: [])\n * @param options.output - The output messages (required)\n * @param options.rememberedMessages - Array of remembered messages from memory (default: [])\n * @param options.systemMessages - Array of system messages (default: [])\n * @param options.taggedSystemMessages - Tagged system messages map (default: {})\n * @param options.requestContext - Request context (default: new RequestContext())\n * @param options.runId - Unique run ID (default: random UUID)\n * @returns A complete test run object\n *\n * @example\n * ```ts\n * const testRun = createAgentTestRun({\n * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],\n * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],\n * });\n *\n * const result = await scorer.run({\n * input: testRun.input,\n * output: testRun.output,\n * });\n * ```\n */\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\n/**\n * Information about a tool call extracted from scorer output.\n */\nexport type ToolCallInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Index of the message containing this tool call */\n messageIndex: number;\n /** Index of the invocation within the message's tool invocations */\n invocationIndex: number;\n};\n\n/**\n * Extracts all tool calls from a scorer run output.\n *\n * Iterates through all messages and their tool invocations to collect\n * information about tools that were called (with state 'result' or 'call').\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An object containing tool names and detailed tool call info\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const { tools, toolCallInfos } = extractToolCalls(run.output);\n * return {\n * toolsUsed: tools,\n * toolCount: tools.length,\n * };\n * });\n * ```\n */\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\n/**\n * Extracts text content from all input messages.\n *\n * @param runInput - The scorer run input\n * @returns An array of text strings from each input message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const messages = extractInputMessages(run.input);\n * return { allUserMessages: messages.join('\\n') };\n * });\n * ```\n */\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\n/**\n * Extracts text content from all assistant response messages.\n *\n * Filters for messages with role 'assistant' and extracts their text content.\n *\n * @param runOutput - The scorer run output (array of MastraDBMessage)\n * @returns An array of text strings from each assistant message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const responses = extractAgentResponseMessages(run.output);\n * return { allResponses: responses.join('\\n') };\n * });\n * ```\n */\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var chunkAT7HXT3U_cjs = require('../../chunk-AT7HXT3U.cjs');
|
|
4
4
|
var evals = require('@mastra/core/evals');
|
|
5
5
|
var zod = require('zod');
|
|
6
6
|
var nlp = require('compromise');
|
|
@@ -239,14 +239,14 @@ function createAnswerRelevancyScorer({
|
|
|
239
239
|
description: "Extract relevant statements from the LLM output",
|
|
240
240
|
outputSchema: extractOutputSchema,
|
|
241
241
|
createPrompt: ({ run }) => {
|
|
242
|
-
const assistantMessage =
|
|
242
|
+
const assistantMessage = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
243
243
|
return createExtractPrompt(assistantMessage);
|
|
244
244
|
}
|
|
245
245
|
}).analyze({
|
|
246
246
|
description: "Score the relevance of the statements to the input",
|
|
247
247
|
outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
|
|
248
248
|
createPrompt: ({ run, results }) => {
|
|
249
|
-
const input =
|
|
249
|
+
const input = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
250
250
|
return createScorePrompt(JSON.stringify(input), results.preprocessStepResult?.statements || []);
|
|
251
251
|
}
|
|
252
252
|
}).generateScore(({ results }) => {
|
|
@@ -263,13 +263,13 @@ function createAnswerRelevancyScorer({
|
|
|
263
263
|
}
|
|
264
264
|
}
|
|
265
265
|
const score = relevancyCount / numberOfResults;
|
|
266
|
-
return
|
|
266
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score * options.scale);
|
|
267
267
|
}).generateReason({
|
|
268
268
|
description: "Reason about the results",
|
|
269
269
|
createPrompt: ({ run, results, score }) => {
|
|
270
270
|
return createReasonPrompt({
|
|
271
|
-
input:
|
|
272
|
-
output:
|
|
271
|
+
input: chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
272
|
+
output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
273
273
|
score,
|
|
274
274
|
results: results.analyzeStepResult.results,
|
|
275
275
|
scale: options.scale
|
|
@@ -466,7 +466,7 @@ function createAnswerSimilarityScorer({
|
|
|
466
466
|
groundTruth: ""
|
|
467
467
|
});
|
|
468
468
|
}
|
|
469
|
-
const output =
|
|
469
|
+
const output = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
470
470
|
const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
|
|
471
471
|
return createExtractPrompt2({
|
|
472
472
|
output,
|
|
@@ -524,14 +524,14 @@ function createAnswerSimilarityScorer({
|
|
|
524
524
|
);
|
|
525
525
|
score -= extraInfoPenalty;
|
|
526
526
|
score = Math.max(0, Math.min(1, score));
|
|
527
|
-
return
|
|
527
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score * mergedOptions.scale);
|
|
528
528
|
}).generateReason({
|
|
529
529
|
description: "Generate explanation of similarity score",
|
|
530
530
|
createPrompt: ({ run, results, score }) => {
|
|
531
531
|
if (!run.groundTruth) {
|
|
532
532
|
return "No ground truth was provided for comparison. Score is 0 by default.";
|
|
533
533
|
}
|
|
534
|
-
const output =
|
|
534
|
+
const output = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
535
535
|
const groundTruth = typeof run.groundTruth === "string" ? run.groundTruth : JSON.stringify(run.groundTruth);
|
|
536
536
|
return createReasonPrompt2({
|
|
537
537
|
output,
|
|
@@ -715,7 +715,7 @@ function createFaithfulnessScorer({
|
|
|
715
715
|
description: "Extract relevant statements from the LLM output",
|
|
716
716
|
outputSchema: zod.z.array(zod.z.string()),
|
|
717
717
|
createPrompt: ({ run }) => {
|
|
718
|
-
const prompt = createFaithfulnessExtractPrompt({ output:
|
|
718
|
+
const prompt = createFaithfulnessExtractPrompt({ output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
719
719
|
return prompt;
|
|
720
720
|
}
|
|
721
721
|
}).analyze({
|
|
@@ -739,14 +739,14 @@ function createFaithfulnessScorer({
|
|
|
739
739
|
return 0;
|
|
740
740
|
}
|
|
741
741
|
const score = supportedClaims / totalClaims * (options?.scale || 1);
|
|
742
|
-
return
|
|
742
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score);
|
|
743
743
|
}).generateReason({
|
|
744
744
|
description: "Reason about the results",
|
|
745
745
|
createPrompt: ({ run, results, score }) => {
|
|
746
746
|
const assistantMessage = run.output.find(({ role }) => role === "assistant");
|
|
747
747
|
const prompt = createFaithfulnessReasonPrompt({
|
|
748
|
-
input:
|
|
749
|
-
output:
|
|
748
|
+
input: chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
749
|
+
output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
750
750
|
context: assistantMessage?.content?.toolInvocations?.map((toolCall) => JSON.stringify(toolCall)) || [],
|
|
751
751
|
score,
|
|
752
752
|
scale: options?.scale || 1,
|
|
@@ -879,13 +879,13 @@ function createBiasScorer({ model, options }) {
|
|
|
879
879
|
outputSchema: zod.z.object({
|
|
880
880
|
opinions: zod.z.array(zod.z.string())
|
|
881
881
|
}),
|
|
882
|
-
createPrompt: ({ run }) => createBiasExtractPrompt({ output:
|
|
882
|
+
createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
|
|
883
883
|
}).analyze({
|
|
884
884
|
description: "Score the relevance of the statements to the input",
|
|
885
885
|
outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
|
|
886
886
|
createPrompt: ({ run, results }) => {
|
|
887
887
|
const prompt = createBiasAnalyzePrompt({
|
|
888
|
-
output:
|
|
888
|
+
output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
889
889
|
opinions: results.preprocessStepResult?.opinions || []
|
|
890
890
|
});
|
|
891
891
|
return prompt;
|
|
@@ -896,7 +896,7 @@ function createBiasScorer({ model, options }) {
|
|
|
896
896
|
}
|
|
897
897
|
const biasedVerdicts = results.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
|
|
898
898
|
const score = biasedVerdicts.length / results.analyzeStepResult.results.length;
|
|
899
|
-
return
|
|
899
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score * (options?.scale || 1));
|
|
900
900
|
}).generateReason({
|
|
901
901
|
description: "Reason about the results",
|
|
902
902
|
createPrompt: ({ score, results }) => {
|
|
@@ -1115,7 +1115,7 @@ function createHallucinationScorer({
|
|
|
1115
1115
|
claims: zod.z.array(zod.z.string())
|
|
1116
1116
|
}),
|
|
1117
1117
|
createPrompt: ({ run }) => {
|
|
1118
|
-
const prompt = createHallucinationExtractPrompt({ output:
|
|
1118
|
+
const prompt = createHallucinationExtractPrompt({ output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
1119
1119
|
return prompt;
|
|
1120
1120
|
}
|
|
1121
1121
|
}).analyze({
|
|
@@ -1137,13 +1137,13 @@ function createHallucinationScorer({
|
|
|
1137
1137
|
return 0;
|
|
1138
1138
|
}
|
|
1139
1139
|
const score = contradictedStatements / totalStatements * (options?.scale || 1);
|
|
1140
|
-
return
|
|
1140
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score);
|
|
1141
1141
|
}).generateReason({
|
|
1142
1142
|
description: "Reason about the results",
|
|
1143
1143
|
createPrompt: ({ run, results, score }) => {
|
|
1144
1144
|
const prompt = createHallucinationReasonPrompt({
|
|
1145
|
-
input:
|
|
1146
|
-
output:
|
|
1145
|
+
input: chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
1146
|
+
output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
1147
1147
|
context: options?.context || [],
|
|
1148
1148
|
score,
|
|
1149
1149
|
scale: options?.scale || 1,
|
|
@@ -1257,8 +1257,8 @@ function createToxicityScorer({
|
|
|
1257
1257
|
outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
|
|
1258
1258
|
createPrompt: ({ run }) => {
|
|
1259
1259
|
const prompt = createToxicityAnalyzePrompt({
|
|
1260
|
-
input:
|
|
1261
|
-
output:
|
|
1260
|
+
input: chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
1261
|
+
output: chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
|
|
1262
1262
|
});
|
|
1263
1263
|
return prompt;
|
|
1264
1264
|
}
|
|
@@ -1274,7 +1274,7 @@ function createToxicityScorer({
|
|
|
1274
1274
|
}
|
|
1275
1275
|
}
|
|
1276
1276
|
const score = toxicityCount / numberOfVerdicts;
|
|
1277
|
-
return
|
|
1277
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score * (options?.scale || 1));
|
|
1278
1278
|
}).generateReason({
|
|
1279
1279
|
description: "Reason about the results",
|
|
1280
1280
|
createPrompt: ({ results, score }) => {
|
|
@@ -1408,7 +1408,7 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
|
1408
1408
|
if (isInputInvalid || isOutputInvalid) {
|
|
1409
1409
|
throw new Error("Input and output messages cannot be null or empty");
|
|
1410
1410
|
}
|
|
1411
|
-
const { tools: actualTools, toolCallInfos } =
|
|
1411
|
+
const { tools: actualTools, toolCallInfos } = chunkAT7HXT3U_cjs.extractToolCalls(run.output);
|
|
1412
1412
|
return {
|
|
1413
1413
|
actualTools,
|
|
1414
1414
|
hasToolCalls: actualTools.length > 0,
|
|
@@ -1418,8 +1418,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
|
1418
1418
|
description: "Analyze the appropriateness of tool selections",
|
|
1419
1419
|
outputSchema: analyzeOutputSchema2,
|
|
1420
1420
|
createPrompt: ({ run, results }) => {
|
|
1421
|
-
const userInput =
|
|
1422
|
-
const agentResponse =
|
|
1421
|
+
const userInput = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1422
|
+
const agentResponse = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
1423
1423
|
const toolsCalled = results.preprocessStepResult?.actualTools || [];
|
|
1424
1424
|
return createAnalyzePrompt2({
|
|
1425
1425
|
userInput,
|
|
@@ -1436,11 +1436,11 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
|
1436
1436
|
}
|
|
1437
1437
|
const appropriateToolCalls = evaluations.filter((e) => e.wasAppropriate).length;
|
|
1438
1438
|
const totalToolCalls = evaluations.length;
|
|
1439
|
-
return
|
|
1439
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
|
|
1440
1440
|
}).generateReason({
|
|
1441
1441
|
description: "Generate human-readable explanation of tool selection evaluation",
|
|
1442
1442
|
createPrompt: ({ run, results, score }) => {
|
|
1443
|
-
const userInput =
|
|
1443
|
+
const userInput = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1444
1444
|
const evaluations = results.analyzeStepResult?.evaluations || [];
|
|
1445
1445
|
const missingTools = results.analyzeStepResult?.missingTools || [];
|
|
1446
1446
|
return createReasonPrompt3({
|
|
@@ -1645,8 +1645,8 @@ function createContextRelevanceScorerLLM({
|
|
|
1645
1645
|
description: "Analyze the relevance and utility of provided context",
|
|
1646
1646
|
outputSchema: analyzeOutputSchema3,
|
|
1647
1647
|
createPrompt: ({ run }) => {
|
|
1648
|
-
const userQuery =
|
|
1649
|
-
const agentResponse =
|
|
1648
|
+
const userQuery = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1649
|
+
const agentResponse = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
1650
1650
|
const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
|
|
1651
1651
|
if (context.length === 0) {
|
|
1652
1652
|
return createAnalyzePrompt3({
|
|
@@ -1694,11 +1694,11 @@ function createContextRelevanceScorerLLM({
|
|
|
1694
1694
|
const missingContextPenalty = Math.min(missingContext.length * missingPenaltyRate, maxMissingPenalty);
|
|
1695
1695
|
const finalScore = Math.max(0, relevanceScore - usagePenalty - missingContextPenalty);
|
|
1696
1696
|
const scaledScore = finalScore * (options.scale || 1);
|
|
1697
|
-
return
|
|
1697
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(scaledScore);
|
|
1698
1698
|
}).generateReason({
|
|
1699
1699
|
description: "Generate human-readable explanation of context relevance evaluation",
|
|
1700
1700
|
createPrompt: ({ run, results, score }) => {
|
|
1701
|
-
const userQuery =
|
|
1701
|
+
const userQuery = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1702
1702
|
const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
|
|
1703
1703
|
if (context.length === 0) {
|
|
1704
1704
|
return `No context was available for evaluation. The agent response was generated without any supporting context. Score: ${score}`;
|
|
@@ -1869,8 +1869,8 @@ function createContextPrecisionScorer({
|
|
|
1869
1869
|
description: "Evaluate the relevance of each context piece for generating the expected output",
|
|
1870
1870
|
outputSchema: contextRelevanceOutputSchema,
|
|
1871
1871
|
createPrompt: ({ run }) => {
|
|
1872
|
-
const input =
|
|
1873
|
-
const output =
|
|
1872
|
+
const input = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1873
|
+
const output = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
1874
1874
|
const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
|
|
1875
1875
|
if (context.length === 0) {
|
|
1876
1876
|
throw new Error("No context available for evaluation");
|
|
@@ -1903,12 +1903,12 @@ function createContextPrecisionScorer({
|
|
|
1903
1903
|
}
|
|
1904
1904
|
const map = sumPrecision / relevantCount;
|
|
1905
1905
|
const score = map * (options.scale || 1);
|
|
1906
|
-
return
|
|
1906
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(score);
|
|
1907
1907
|
}).generateReason({
|
|
1908
1908
|
description: "Reason about the context precision results",
|
|
1909
1909
|
createPrompt: ({ run, results, score }) => {
|
|
1910
|
-
const input =
|
|
1911
|
-
const output =
|
|
1910
|
+
const input = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1911
|
+
const output = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
1912
1912
|
const context = options.contextExtractor ? options.contextExtractor(run.input, run.output) : options.context;
|
|
1913
1913
|
return createContextPrecisionReasonPrompt({
|
|
1914
1914
|
input,
|
|
@@ -2162,8 +2162,8 @@ function createNoiseSensitivityScorerLLM({
|
|
|
2162
2162
|
description: "Analyze the impact of noise on agent response quality",
|
|
2163
2163
|
outputSchema: analyzeOutputSchema4,
|
|
2164
2164
|
createPrompt: ({ run }) => {
|
|
2165
|
-
const originalQuery =
|
|
2166
|
-
const noisyResponse =
|
|
2165
|
+
const originalQuery = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
2166
|
+
const noisyResponse = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
2167
2167
|
if (!originalQuery || !noisyResponse) {
|
|
2168
2168
|
throw new Error("Both original query and noisy response are required for evaluation");
|
|
2169
2169
|
}
|
|
@@ -2206,11 +2206,11 @@ function createNoiseSensitivityScorerLLM({
|
|
|
2206
2206
|
const majorIssues = analysisResult.majorIssues || [];
|
|
2207
2207
|
const issuesPenalty = Math.min(majorIssues.length * majorIssuePenaltyRate, maxMajorIssuePenalty);
|
|
2208
2208
|
finalScore = Math.max(0, finalScore - issuesPenalty);
|
|
2209
|
-
return
|
|
2209
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(finalScore);
|
|
2210
2210
|
}).generateReason({
|
|
2211
2211
|
description: "Generate human-readable explanation of noise sensitivity evaluation",
|
|
2212
2212
|
createPrompt: ({ run, results, score }) => {
|
|
2213
|
-
const originalQuery =
|
|
2213
|
+
const originalQuery = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
2214
2214
|
const analysisResult = results.analyzeStepResult;
|
|
2215
2215
|
if (!analysisResult) {
|
|
2216
2216
|
throw new Error("Analysis step failed to produce results for reason generation");
|
|
@@ -2534,9 +2534,9 @@ function createPromptAlignmentScorerLLM({
|
|
|
2534
2534
|
description: "Analyze prompt-response alignment across multiple dimensions",
|
|
2535
2535
|
outputSchema: analyzeOutputSchema5,
|
|
2536
2536
|
createPrompt: ({ run }) => {
|
|
2537
|
-
const userPrompt =
|
|
2538
|
-
const systemPrompt =
|
|
2539
|
-
const agentResponse =
|
|
2537
|
+
const userPrompt = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
2538
|
+
const systemPrompt = chunkAT7HXT3U_cjs.getCombinedSystemPrompt(run.input) ?? "";
|
|
2539
|
+
const agentResponse = chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
2540
2540
|
if (evaluationMode === "user" && !userPrompt) {
|
|
2541
2541
|
throw new Error("User prompt is required for user prompt alignment scoring");
|
|
2542
2542
|
}
|
|
@@ -2572,12 +2572,12 @@ function createPromptAlignmentScorerLLM({
|
|
|
2572
2572
|
weightedScore = userScore * SCORING_WEIGHTS.BOTH.USER_WEIGHT + systemScore * SCORING_WEIGHTS.BOTH.SYSTEM_WEIGHT;
|
|
2573
2573
|
}
|
|
2574
2574
|
const finalScore = weightedScore * scale;
|
|
2575
|
-
return
|
|
2575
|
+
return chunkAT7HXT3U_cjs.roundToTwoDecimals(finalScore);
|
|
2576
2576
|
}).generateReason({
|
|
2577
2577
|
description: "Generate human-readable explanation of prompt alignment evaluation",
|
|
2578
2578
|
createPrompt: ({ run, results, score }) => {
|
|
2579
|
-
const userPrompt =
|
|
2580
|
-
const systemPrompt =
|
|
2579
|
+
const userPrompt = chunkAT7HXT3U_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
2580
|
+
const systemPrompt = chunkAT7HXT3U_cjs.getCombinedSystemPrompt(run.input) ?? "";
|
|
2581
2581
|
const analysis = results.analyzeStepResult;
|
|
2582
2582
|
if (!analysis) {
|
|
2583
2583
|
return `Unable to analyze prompt alignment. Score: ${score}`;
|
|
@@ -2642,18 +2642,18 @@ function createCompletenessScorer() {
|
|
|
2642
2642
|
type: "agent"
|
|
2643
2643
|
}).preprocess(async ({ run }) => {
|
|
2644
2644
|
const isInputInvalid = !run.input || run.input.inputMessages.some((i) => {
|
|
2645
|
-
const content =
|
|
2645
|
+
const content = chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i);
|
|
2646
2646
|
return content === null || content === void 0;
|
|
2647
2647
|
});
|
|
2648
2648
|
const isOutputInvalid = !run.output || run.output.some((i) => {
|
|
2649
|
-
const content =
|
|
2649
|
+
const content = chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i);
|
|
2650
2650
|
return content === null || content === void 0;
|
|
2651
2651
|
});
|
|
2652
2652
|
if (isInputInvalid || isOutputInvalid) {
|
|
2653
2653
|
throw new Error("Inputs cannot be null or undefined");
|
|
2654
2654
|
}
|
|
2655
|
-
const input = run.input?.inputMessages.map((i) =>
|
|
2656
|
-
const output = run.output?.map((i) =>
|
|
2655
|
+
const input = run.input?.inputMessages.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2656
|
+
const output = run.output?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2657
2657
|
const inputToProcess = input;
|
|
2658
2658
|
const outputToProcess = output;
|
|
2659
2659
|
const inputDoc = nlp__default.default(inputToProcess.trim());
|
|
@@ -2758,8 +2758,8 @@ function createTextualDifferenceScorer() {
|
|
|
2758
2758
|
description: "Calculate textual difference between input and output using sequence matching algorithms.",
|
|
2759
2759
|
type: "agent"
|
|
2760
2760
|
}).preprocess(async ({ run }) => {
|
|
2761
|
-
const input = run.input?.inputMessages?.map((i) =>
|
|
2762
|
-
const output = run.output?.map((i) =>
|
|
2761
|
+
const input = run.input?.inputMessages?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2762
|
+
const output = run.output?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2763
2763
|
const ratio = calculateRatio(input, output);
|
|
2764
2764
|
const changes = countChanges(input, output);
|
|
2765
2765
|
const maxLength = Math.max(input.length, output.length);
|
|
@@ -2782,8 +2782,8 @@ function createKeywordCoverageScorer() {
|
|
|
2782
2782
|
description: 'Leverage the nlp method from "compromise" to extract elements from the input and output and calculate the coverage.',
|
|
2783
2783
|
type: "agent"
|
|
2784
2784
|
}).preprocess(async ({ run }) => {
|
|
2785
|
-
const input = run.input?.inputMessages?.map((i) =>
|
|
2786
|
-
const output = run.output?.map((i) =>
|
|
2785
|
+
const input = run.input?.inputMessages?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2786
|
+
const output = run.output?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2787
2787
|
if (!input && !output) {
|
|
2788
2788
|
return {
|
|
2789
2789
|
result: {
|
|
@@ -2836,8 +2836,8 @@ function createContentSimilarityScorer({ ignoreCase, ignoreWhitespace } = { igno
|
|
|
2836
2836
|
description: "Calculates content similarity between input and output messages using string comparison algorithms.",
|
|
2837
2837
|
type: "agent"
|
|
2838
2838
|
}).preprocess(async ({ run }) => {
|
|
2839
|
-
let processedInput = run.input?.inputMessages.map((i) =>
|
|
2840
|
-
let processedOutput = run.output.map((i) =>
|
|
2839
|
+
let processedInput = run.input?.inputMessages.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2840
|
+
let processedOutput = run.output.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2841
2841
|
if (ignoreCase) {
|
|
2842
2842
|
processedInput = processedInput.toLowerCase();
|
|
2843
2843
|
processedOutput = processedOutput.toLowerCase();
|
|
@@ -2867,7 +2867,7 @@ function createToneScorer(config = {}) {
|
|
|
2867
2867
|
type: "agent"
|
|
2868
2868
|
}).preprocess(async ({ run }) => {
|
|
2869
2869
|
const sentiment = new Sentiment__default.default();
|
|
2870
|
-
const agentMessage = run.output?.map((i) =>
|
|
2870
|
+
const agentMessage = run.output?.map((i) => chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage(i)).join(", ") || "";
|
|
2871
2871
|
const responseSentiment = sentiment.analyze(agentMessage);
|
|
2872
2872
|
if (referenceTone) {
|
|
2873
2873
|
const referenceSentiment = sentiment.analyze(referenceTone);
|
|
@@ -2954,7 +2954,7 @@ function createToolCallAccuracyScorerCode(options) {
|
|
|
2954
2954
|
if (isInputInvalid || isOutputInvalid) {
|
|
2955
2955
|
throw new Error("Input and output messages cannot be null or empty");
|
|
2956
2956
|
}
|
|
2957
|
-
const { tools: actualTools, toolCallInfos } =
|
|
2957
|
+
const { tools: actualTools, toolCallInfos } = chunkAT7HXT3U_cjs.extractToolCalls(run.output);
|
|
2958
2958
|
const correctToolCalled = expectedTool ? strictMode ? actualTools.length === 1 && actualTools[0] === expectedTool : actualTools.includes(expectedTool) : false;
|
|
2959
2959
|
return {
|
|
2960
2960
|
expectedTool,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage } from '../../chunk-
|
|
1
|
+
import { getAssistantMessageFromRunOutput, getUserMessageFromRunInput, roundToTwoDecimals, extractToolCalls, getCombinedSystemPrompt, getTextContentFromMastraDBMessage } from '../../chunk-CKKVCGRB.js';
|
|
2
2
|
import { createScorer } from '@mastra/core/evals';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
import nlp from 'compromise';
|
package/dist/scorers/utils.cjs
CHANGED
|
@@ -1,64 +1,68 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var chunkAT7HXT3U_cjs = require('../chunk-AT7HXT3U.cjs');
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
Object.defineProperty(exports, "createAgentTestRun", {
|
|
8
8
|
enumerable: true,
|
|
9
|
-
get: function () { return
|
|
9
|
+
get: function () { return chunkAT7HXT3U_cjs.createAgentTestRun; }
|
|
10
10
|
});
|
|
11
11
|
Object.defineProperty(exports, "createTestMessage", {
|
|
12
12
|
enumerable: true,
|
|
13
|
-
get: function () { return
|
|
13
|
+
get: function () { return chunkAT7HXT3U_cjs.createTestMessage; }
|
|
14
14
|
});
|
|
15
15
|
Object.defineProperty(exports, "createTestRun", {
|
|
16
16
|
enumerable: true,
|
|
17
|
-
get: function () { return
|
|
17
|
+
get: function () { return chunkAT7HXT3U_cjs.createTestRun; }
|
|
18
18
|
});
|
|
19
19
|
Object.defineProperty(exports, "createToolInvocation", {
|
|
20
20
|
enumerable: true,
|
|
21
|
-
get: function () { return
|
|
21
|
+
get: function () { return chunkAT7HXT3U_cjs.createToolInvocation; }
|
|
22
22
|
});
|
|
23
23
|
Object.defineProperty(exports, "extractAgentResponseMessages", {
|
|
24
24
|
enumerable: true,
|
|
25
|
-
get: function () { return
|
|
25
|
+
get: function () { return chunkAT7HXT3U_cjs.extractAgentResponseMessages; }
|
|
26
26
|
});
|
|
27
27
|
Object.defineProperty(exports, "extractInputMessages", {
|
|
28
28
|
enumerable: true,
|
|
29
|
-
get: function () { return
|
|
29
|
+
get: function () { return chunkAT7HXT3U_cjs.extractInputMessages; }
|
|
30
30
|
});
|
|
31
31
|
Object.defineProperty(exports, "extractToolCalls", {
|
|
32
32
|
enumerable: true,
|
|
33
|
-
get: function () { return
|
|
33
|
+
get: function () { return chunkAT7HXT3U_cjs.extractToolCalls; }
|
|
34
34
|
});
|
|
35
35
|
Object.defineProperty(exports, "getAssistantMessageFromRunOutput", {
|
|
36
36
|
enumerable: true,
|
|
37
|
-
get: function () { return
|
|
37
|
+
get: function () { return chunkAT7HXT3U_cjs.getAssistantMessageFromRunOutput; }
|
|
38
38
|
});
|
|
39
39
|
Object.defineProperty(exports, "getCombinedSystemPrompt", {
|
|
40
40
|
enumerable: true,
|
|
41
|
-
get: function () { return
|
|
41
|
+
get: function () { return chunkAT7HXT3U_cjs.getCombinedSystemPrompt; }
|
|
42
|
+
});
|
|
43
|
+
Object.defineProperty(exports, "getReasoningFromRunOutput", {
|
|
44
|
+
enumerable: true,
|
|
45
|
+
get: function () { return chunkAT7HXT3U_cjs.getReasoningFromRunOutput; }
|
|
42
46
|
});
|
|
43
47
|
Object.defineProperty(exports, "getSystemMessagesFromRunInput", {
|
|
44
48
|
enumerable: true,
|
|
45
|
-
get: function () { return
|
|
49
|
+
get: function () { return chunkAT7HXT3U_cjs.getSystemMessagesFromRunInput; }
|
|
46
50
|
});
|
|
47
51
|
Object.defineProperty(exports, "getTextContentFromMastraDBMessage", {
|
|
48
52
|
enumerable: true,
|
|
49
|
-
get: function () { return
|
|
53
|
+
get: function () { return chunkAT7HXT3U_cjs.getTextContentFromMastraDBMessage; }
|
|
50
54
|
});
|
|
51
55
|
Object.defineProperty(exports, "getUserMessageFromRunInput", {
|
|
52
56
|
enumerable: true,
|
|
53
|
-
get: function () { return
|
|
57
|
+
get: function () { return chunkAT7HXT3U_cjs.getUserMessageFromRunInput; }
|
|
54
58
|
});
|
|
55
59
|
Object.defineProperty(exports, "isCloserTo", {
|
|
56
60
|
enumerable: true,
|
|
57
|
-
get: function () { return
|
|
61
|
+
get: function () { return chunkAT7HXT3U_cjs.isCloserTo; }
|
|
58
62
|
});
|
|
59
63
|
Object.defineProperty(exports, "roundToTwoDecimals", {
|
|
60
64
|
enumerable: true,
|
|
61
|
-
get: function () { return
|
|
65
|
+
get: function () { return chunkAT7HXT3U_cjs.roundToTwoDecimals; }
|
|
62
66
|
});
|
|
63
67
|
//# sourceMappingURL=utils.cjs.map
|
|
64
68
|
//# sourceMappingURL=utils.cjs.map
|
package/dist/scorers/utils.d.ts
CHANGED
|
@@ -3,28 +3,225 @@ import type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } fr
|
|
|
3
3
|
import { RequestContext } from '@mastra/core/request-context';
|
|
4
4
|
import type { ToolInvocation } from 'ai';
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
7
|
-
*
|
|
6
|
+
* Extracts text content from a MastraDBMessage.
|
|
7
|
+
*
|
|
8
|
+
* This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.
|
|
9
|
+
* It first checks for a string `content.content` field, then falls back to extracting
|
|
10
|
+
* text from the `parts` array (returning only the last text part, like AI SDK does).
|
|
11
|
+
*
|
|
12
|
+
* @param message - The MastraDBMessage to extract text from
|
|
13
|
+
* @returns The extracted text content, or an empty string if no text is found
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* const message: MastraDBMessage = {
|
|
18
|
+
* id: 'msg-1',
|
|
19
|
+
* role: 'assistant',
|
|
20
|
+
* content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },
|
|
21
|
+
* createdAt: new Date(),
|
|
22
|
+
* };
|
|
23
|
+
* const text = getTextContentFromMastraDBMessage(message); // 'Hello!'
|
|
24
|
+
* ```
|
|
8
25
|
*/
|
|
9
26
|
export declare function getTextContentFromMastraDBMessage(message: MastraDBMessage): string;
|
|
27
|
+
/**
|
|
28
|
+
* Rounds a number to two decimal places.
|
|
29
|
+
*
|
|
30
|
+
* Uses `Number.EPSILON` to handle floating-point precision issues.
|
|
31
|
+
*
|
|
32
|
+
* @param num - The number to round
|
|
33
|
+
* @returns The number rounded to two decimal places
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```ts
|
|
37
|
+
* roundToTwoDecimals(0.1 + 0.2); // 0.3
|
|
38
|
+
* roundToTwoDecimals(1.005); // 1.01
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
10
41
|
export declare const roundToTwoDecimals: (num: number) => number;
|
|
42
|
+
/**
|
|
43
|
+
* Determines if a value is closer to the first target than the second.
|
|
44
|
+
*
|
|
45
|
+
* @param value - The value to compare
|
|
46
|
+
* @param target1 - The first target value
|
|
47
|
+
* @param target2 - The second target value
|
|
48
|
+
* @returns `true` if `value` is closer to `target1` than `target2`
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)
|
|
53
|
+
* isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
11
56
|
export declare function isCloserTo(value: number, target1: number, target2: number): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Represents a test case for scorer evaluation.
|
|
59
|
+
*/
|
|
12
60
|
export type TestCase = {
|
|
61
|
+
/** The input text to evaluate */
|
|
13
62
|
input: string;
|
|
63
|
+
/** The output text to evaluate */
|
|
14
64
|
output: string;
|
|
65
|
+
/** The expected result of the evaluation */
|
|
15
66
|
expectedResult: {
|
|
67
|
+
/** The expected score */
|
|
16
68
|
score: number;
|
|
69
|
+
/** The optional expected reason */
|
|
17
70
|
reason?: string;
|
|
18
71
|
};
|
|
19
72
|
};
|
|
73
|
+
/**
|
|
74
|
+
* Represents a test case with additional context for scorer evaluation.
|
|
75
|
+
*/
|
|
20
76
|
export type TestCaseWithContext = TestCase & {
|
|
77
|
+
/** Additional context strings for the evaluation */
|
|
21
78
|
context: string[];
|
|
22
79
|
};
|
|
80
|
+
/**
|
|
81
|
+
* Creates a scoring input object for testing purposes.
|
|
82
|
+
*
|
|
83
|
+
* @param input - The user input text
|
|
84
|
+
* @param output - The assistant output text
|
|
85
|
+
* @param additionalContext - Optional additional context data
|
|
86
|
+
* @param requestContext - Optional request context data
|
|
87
|
+
* @returns A ScoringInput object ready for use in scorer tests
|
|
88
|
+
*
|
|
89
|
+
* @example
|
|
90
|
+
* ```ts
|
|
91
|
+
* const run = createTestRun(
|
|
92
|
+
* 'What is 2+2?',
|
|
93
|
+
* 'The answer is 4.',
|
|
94
|
+
* { topic: 'math' }
|
|
95
|
+
* );
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
23
98
|
export declare const createTestRun: (input: string, output: string, additionalContext?: Record<string, any>, requestContext?: Record<string, any>) => ScoringInput;
|
|
99
|
+
/**
|
|
100
|
+
* Extracts the user message text from a scorer run input.
|
|
101
|
+
*
|
|
102
|
+
* Finds the first message with role 'user' and extracts its text content.
|
|
103
|
+
*
|
|
104
|
+
* @param input - The scorer run input containing input messages
|
|
105
|
+
* @returns The user message text, or `undefined` if no user message is found
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* ```ts
|
|
109
|
+
* const scorer = createScorer({ ... })
|
|
110
|
+
* .preprocess(({ run }) => {
|
|
111
|
+
* const userText = getUserMessageFromRunInput(run.input);
|
|
112
|
+
* return { userText };
|
|
113
|
+
* });
|
|
114
|
+
* ```
|
|
115
|
+
*/
|
|
24
116
|
export declare const getUserMessageFromRunInput: (input?: ScorerRunInputForAgent) => string | undefined;
|
|
117
|
+
/**
|
|
118
|
+
* Extracts all system messages from a scorer run input.
|
|
119
|
+
*
|
|
120
|
+
* Collects text from both standard system messages and tagged system messages
|
|
121
|
+
* (specialized system prompts like memory instructions).
|
|
122
|
+
*
|
|
123
|
+
* @param input - The scorer run input containing system messages
|
|
124
|
+
* @returns An array of system message strings
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* ```ts
|
|
128
|
+
* const scorer = createScorer({ ... })
|
|
129
|
+
* .preprocess(({ run }) => {
|
|
130
|
+
* const systemMessages = getSystemMessagesFromRunInput(run.input);
|
|
131
|
+
* return { systemPrompt: systemMessages.join('\n') };
|
|
132
|
+
* });
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
25
135
|
export declare const getSystemMessagesFromRunInput: (input?: ScorerRunInputForAgent) => string[];
|
|
136
|
+
/**
|
|
137
|
+
* Combines all system messages into a single prompt string.
|
|
138
|
+
*
|
|
139
|
+
* Joins all system messages (standard and tagged) with double newlines.
|
|
140
|
+
*
|
|
141
|
+
* @param input - The scorer run input containing system messages
|
|
142
|
+
* @returns A combined system prompt string
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* ```ts
|
|
146
|
+
* const scorer = createScorer({ ... })
|
|
147
|
+
* .preprocess(({ run }) => {
|
|
148
|
+
* const systemPrompt = getCombinedSystemPrompt(run.input);
|
|
149
|
+
* return { systemPrompt };
|
|
150
|
+
* });
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
26
153
|
export declare const getCombinedSystemPrompt: (input?: ScorerRunInputForAgent) => string;
|
|
154
|
+
/**
|
|
155
|
+
* Extracts the assistant message text from a scorer run output.
|
|
156
|
+
*
|
|
157
|
+
* Finds the first message with role 'assistant' and extracts its text content.
|
|
158
|
+
*
|
|
159
|
+
* @param output - The scorer run output (array of MastraDBMessage)
|
|
160
|
+
* @returns The assistant message text, or `undefined` if no assistant message is found
|
|
161
|
+
*
|
|
162
|
+
* @example
|
|
163
|
+
* ```ts
|
|
164
|
+
* const scorer = createScorer({ ... })
|
|
165
|
+
* .preprocess(({ run }) => {
|
|
166
|
+
* const response = getAssistantMessageFromRunOutput(run.output);
|
|
167
|
+
* return { response };
|
|
168
|
+
* });
|
|
169
|
+
* ```
|
|
170
|
+
*/
|
|
27
171
|
export declare const getAssistantMessageFromRunOutput: (output?: ScorerRunOutputForAgent) => string | undefined;
|
|
172
|
+
/**
|
|
173
|
+
* Extracts reasoning text from a scorer run output.
|
|
174
|
+
*
|
|
175
|
+
* This function extracts reasoning content from assistant messages, which is
|
|
176
|
+
* produced by reasoning models like `deepseek-reasoner`. The reasoning can be
|
|
177
|
+
* stored in two places:
|
|
178
|
+
* 1. `content.reasoning` - a string field on the message content
|
|
179
|
+
* 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`
|
|
180
|
+
*
|
|
181
|
+
* @param output - The scorer run output (array of MastraDBMessage)
|
|
182
|
+
* @returns The reasoning text, or `undefined` if no reasoning is present
|
|
183
|
+
*
|
|
184
|
+
* @example
|
|
185
|
+
* ```ts
|
|
186
|
+
* const reasoningScorer = createScorer({
|
|
187
|
+
* id: 'reasoning-scorer',
|
|
188
|
+
* name: 'Reasoning Quality',
|
|
189
|
+
* description: 'Evaluates the quality of model reasoning',
|
|
190
|
+
* type: 'agent',
|
|
191
|
+
* })
|
|
192
|
+
* .preprocess(({ run }) => {
|
|
193
|
+
* const reasoning = getReasoningFromRunOutput(run.output);
|
|
194
|
+
* const response = getAssistantMessageFromRunOutput(run.output);
|
|
195
|
+
* return { reasoning, response };
|
|
196
|
+
* })
|
|
197
|
+
* .generateScore(({ results }) => {
|
|
198
|
+
* // Score based on reasoning quality
|
|
199
|
+
* return results.preprocessStepResult?.reasoning ? 1 : 0;
|
|
200
|
+
* });
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
export declare const getReasoningFromRunOutput: (output?: ScorerRunOutputForAgent) => string | undefined;
|
|
204
|
+
/**
|
|
205
|
+
* Creates a tool invocation object for testing purposes.
|
|
206
|
+
*
|
|
207
|
+
* @param options - The tool invocation configuration
|
|
208
|
+
* @param options.toolCallId - Unique identifier for the tool call
|
|
209
|
+
* @param options.toolName - Name of the tool being called
|
|
210
|
+
* @param options.args - Arguments passed to the tool
|
|
211
|
+
* @param options.result - Result returned by the tool
|
|
212
|
+
* @param options.state - State of the invocation (default: 'result')
|
|
213
|
+
* @returns A tool invocation object
|
|
214
|
+
*
|
|
215
|
+
* @example
|
|
216
|
+
* ```ts
|
|
217
|
+
* const invocation = createToolInvocation({
|
|
218
|
+
* toolCallId: 'call-123',
|
|
219
|
+
* toolName: 'weatherTool',
|
|
220
|
+
* args: { location: 'London' },
|
|
221
|
+
* result: { temperature: 20, condition: 'sunny' },
|
|
222
|
+
* });
|
|
223
|
+
* ```
|
|
224
|
+
*/
|
|
28
225
|
export declare const createToolInvocation: ({ toolCallId, toolName, args, result, state, }: {
|
|
29
226
|
toolCallId: string;
|
|
30
227
|
toolName: string;
|
|
@@ -39,8 +236,37 @@ export declare const createToolInvocation: ({ toolCallId, toolName, args, result
|
|
|
39
236
|
state: string;
|
|
40
237
|
};
|
|
41
238
|
/**
|
|
42
|
-
*
|
|
43
|
-
*
|
|
239
|
+
* Creates a MastraDBMessage object for testing purposes.
|
|
240
|
+
*
|
|
241
|
+
* Supports optional tool invocations for testing tool call scenarios.
|
|
242
|
+
*
|
|
243
|
+
* @param options - The message configuration
|
|
244
|
+
* @param options.content - The text content of the message
|
|
245
|
+
* @param options.role - The role of the message sender ('user', 'assistant', or 'system')
|
|
246
|
+
* @param options.id - Optional message ID (default: 'test-message')
|
|
247
|
+
* @param options.toolInvocations - Optional array of tool invocations
|
|
248
|
+
* @returns A MastraDBMessage object
|
|
249
|
+
*
|
|
250
|
+
* @example
|
|
251
|
+
* ```ts
|
|
252
|
+
* const message = createTestMessage({
|
|
253
|
+
* content: 'Hello, how can I help?',
|
|
254
|
+
* role: 'assistant',
|
|
255
|
+
* });
|
|
256
|
+
*
|
|
257
|
+
* // With tool invocations
|
|
258
|
+
* const messageWithTools = createTestMessage({
|
|
259
|
+
* content: 'Let me check the weather.',
|
|
260
|
+
* role: 'assistant',
|
|
261
|
+
* toolInvocations: [{
|
|
262
|
+
* toolCallId: 'call-1',
|
|
263
|
+
* toolName: 'weatherTool',
|
|
264
|
+
* args: { location: 'Paris' },
|
|
265
|
+
* result: { temp: 22 },
|
|
266
|
+
* state: 'result',
|
|
267
|
+
* }],
|
|
268
|
+
* });
|
|
269
|
+
* ```
|
|
44
270
|
*/
|
|
45
271
|
export declare function createTestMessage({ content, role, id, toolInvocations, }: {
|
|
46
272
|
content: string;
|
|
@@ -54,6 +280,35 @@ export declare function createTestMessage({ content, role, id, toolInvocations,
|
|
|
54
280
|
state: any;
|
|
55
281
|
}>;
|
|
56
282
|
}): MastraDBMessage;
|
|
283
|
+
/**
|
|
284
|
+
* Creates a complete agent test run object for testing scorers.
|
|
285
|
+
*
|
|
286
|
+
* Provides a convenient way to construct the full run object that scorers receive,
|
|
287
|
+
* including input messages, output, system messages, and request context.
|
|
288
|
+
*
|
|
289
|
+
* @param options - The test run configuration
|
|
290
|
+
* @param options.inputMessages - Array of input messages (default: [])
|
|
291
|
+
* @param options.output - The output messages (required)
|
|
292
|
+
* @param options.rememberedMessages - Array of remembered messages from memory (default: [])
|
|
293
|
+
* @param options.systemMessages - Array of system messages (default: [])
|
|
294
|
+
* @param options.taggedSystemMessages - Tagged system messages map (default: {})
|
|
295
|
+
* @param options.requestContext - Request context (default: new RequestContext())
|
|
296
|
+
* @param options.runId - Unique run ID (default: random UUID)
|
|
297
|
+
* @returns A complete test run object
|
|
298
|
+
*
|
|
299
|
+
* @example
|
|
300
|
+
* ```ts
|
|
301
|
+
* const testRun = createAgentTestRun({
|
|
302
|
+
* inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],
|
|
303
|
+
* output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],
|
|
304
|
+
* });
|
|
305
|
+
*
|
|
306
|
+
* const result = await scorer.run({
|
|
307
|
+
* input: testRun.input,
|
|
308
|
+
* output: testRun.output,
|
|
309
|
+
* });
|
|
310
|
+
* ```
|
|
311
|
+
*/
|
|
57
312
|
export declare const createAgentTestRun: ({ inputMessages, output, rememberedMessages, systemMessages, taggedSystemMessages, requestContext, runId, }: {
|
|
58
313
|
inputMessages?: ScorerRunInputForAgent["inputMessages"];
|
|
59
314
|
output: ScorerRunOutputForAgent;
|
|
@@ -68,16 +323,76 @@ export declare const createAgentTestRun: ({ inputMessages, output, rememberedMes
|
|
|
68
323
|
requestContext: RequestContext;
|
|
69
324
|
runId: string;
|
|
70
325
|
};
|
|
326
|
+
/**
|
|
327
|
+
* Information about a tool call extracted from scorer output.
|
|
328
|
+
*/
|
|
71
329
|
export type ToolCallInfo = {
|
|
330
|
+
/** Name of the tool that was called */
|
|
72
331
|
toolName: string;
|
|
332
|
+
/** Unique identifier for the tool call */
|
|
73
333
|
toolCallId: string;
|
|
334
|
+
/** Index of the message containing this tool call */
|
|
74
335
|
messageIndex: number;
|
|
336
|
+
/** Index of the invocation within the message's tool invocations */
|
|
75
337
|
invocationIndex: number;
|
|
76
338
|
};
|
|
339
|
+
/**
|
|
340
|
+
* Extracts all tool calls from a scorer run output.
|
|
341
|
+
*
|
|
342
|
+
* Iterates through all messages and their tool invocations to collect
|
|
343
|
+
* information about tools that were called (with state 'result' or 'call').
|
|
344
|
+
*
|
|
345
|
+
* @param output - The scorer run output (array of MastraDBMessage)
|
|
346
|
+
* @returns An object containing tool names and detailed tool call info
|
|
347
|
+
*
|
|
348
|
+
* @example
|
|
349
|
+
* ```ts
|
|
350
|
+
* const scorer = createScorer({ ... })
|
|
351
|
+
* .preprocess(({ run }) => {
|
|
352
|
+
* const { tools, toolCallInfos } = extractToolCalls(run.output);
|
|
353
|
+
* return {
|
|
354
|
+
* toolsUsed: tools,
|
|
355
|
+
* toolCount: tools.length,
|
|
356
|
+
* };
|
|
357
|
+
* });
|
|
358
|
+
* ```
|
|
359
|
+
*/
|
|
77
360
|
export declare function extractToolCalls(output: ScorerRunOutputForAgent): {
|
|
78
361
|
tools: string[];
|
|
79
362
|
toolCallInfos: ToolCallInfo[];
|
|
80
363
|
};
|
|
364
|
+
/**
|
|
365
|
+
* Extracts text content from all input messages.
|
|
366
|
+
*
|
|
367
|
+
* @param runInput - The scorer run input
|
|
368
|
+
* @returns An array of text strings from each input message
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* ```ts
|
|
372
|
+
* const scorer = createScorer({ ... })
|
|
373
|
+
* .preprocess(({ run }) => {
|
|
374
|
+
* const messages = extractInputMessages(run.input);
|
|
375
|
+
* return { allUserMessages: messages.join('\n') };
|
|
376
|
+
* });
|
|
377
|
+
* ```
|
|
378
|
+
*/
|
|
81
379
|
export declare const extractInputMessages: (runInput: ScorerRunInputForAgent | undefined) => string[];
|
|
380
|
+
/**
|
|
381
|
+
* Extracts text content from all assistant response messages.
|
|
382
|
+
*
|
|
383
|
+
* Filters for messages with role 'assistant' and extracts their text content.
|
|
384
|
+
*
|
|
385
|
+
* @param runOutput - The scorer run output (array of MastraDBMessage)
|
|
386
|
+
* @returns An array of text strings from each assistant message
|
|
387
|
+
*
|
|
388
|
+
* @example
|
|
389
|
+
* ```ts
|
|
390
|
+
* const scorer = createScorer({ ... })
|
|
391
|
+
* .preprocess(({ run }) => {
|
|
392
|
+
* const responses = extractAgentResponseMessages(run.output);
|
|
393
|
+
* return { allResponses: responses.join('\n') };
|
|
394
|
+
* });
|
|
395
|
+
* ```
|
|
396
|
+
*/
|
|
82
397
|
export declare const extractAgentResponseMessages: (runOutput: ScorerRunOutputForAgent) => string[];
|
|
83
398
|
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACxG,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACxG,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CAUlF;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACrB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,cAAc,EAAE;QACd,yBAAyB;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,mCAAmC;QACnC,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,oDAAoD;IACpD,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,aAAa,GACxB,OAAO,MAAM,EACb,QAAQ,MAAM,EACd,oBAAoB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACvC,iBAAiB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACnC,YAOF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,GAAG,SAGpF,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAGhF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,eAAO,MAAM,yBAAyB,GAAI,SAAS,uBAAuB,KAAG,MAAM,GAAG,SAgCrF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;CACjC,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,iBAAiB,CAAC,EAChC,OAAO,EACP,IAAI,EACJ,EAAmB,EACnB,eAAoB,GACrB,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,GAAG,eAAe,CAoBlB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG;IACzB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,oEAAoE;IACpE,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAwBpH;AAED;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC"}
|
package/dist/scorers/utils.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-
|
|
1
|
+
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-CKKVCGRB.js';
|
|
2
2
|
//# sourceMappingURL=utils.js.map
|
|
3
3
|
//# sourceMappingURL=utils.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/evals",
|
|
3
|
-
"version": "1.0.0-beta.
|
|
3
|
+
"version": "1.0.0-beta.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -56,7 +56,6 @@
|
|
|
56
56
|
"license": "Apache-2.0",
|
|
57
57
|
"dependencies": {
|
|
58
58
|
"compromise": "^14.14.4",
|
|
59
|
-
"fs-extra": "^11.3.2",
|
|
60
59
|
"keyword-extractor": "^0.0.28",
|
|
61
60
|
"sentiment": "^5.0.2",
|
|
62
61
|
"string-similarity": "^4.0.4"
|
|
@@ -69,19 +68,20 @@
|
|
|
69
68
|
"devDependencies": {
|
|
70
69
|
"@ai-sdk/openai": "^1.3.24",
|
|
71
70
|
"@microsoft/api-extractor": "^7.52.8",
|
|
72
|
-
"@types/fs-extra": "^11.0.4",
|
|
73
71
|
"@types/sentiment": "^5.0.4",
|
|
74
72
|
"@types/string-similarity": "^4.0.2",
|
|
73
|
+
"@vitest/coverage-v8": "4.0.12",
|
|
74
|
+
"@vitest/ui": "4.0.12",
|
|
75
75
|
"ai": "^4.3.19",
|
|
76
76
|
"dotenv": "^17.0.0",
|
|
77
77
|
"eslint": "^9.37.0",
|
|
78
78
|
"tsup": "^8.5.0",
|
|
79
79
|
"typescript": "^5.8.3",
|
|
80
|
-
"vitest": "
|
|
80
|
+
"vitest": "4.0.12",
|
|
81
81
|
"zod": "^3.25.76",
|
|
82
|
+
"@internal/types-builder": "0.0.28",
|
|
82
83
|
"@internal/lint": "0.0.53",
|
|
83
|
-
"@mastra/core": "1.0.0-beta.
|
|
84
|
-
"@internal/types-builder": "0.0.28"
|
|
84
|
+
"@mastra/core": "1.0.0-beta.6"
|
|
85
85
|
},
|
|
86
86
|
"engines": {
|
|
87
87
|
"node": ">=22.13.0"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-CCLM7KPF.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/scorers/utils.ts"],"names":["requestContext","RequestContext"],"mappings":";;;;;AASO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAEO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAMO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,kBACxBA,gBAAA,GAAiB,IAAIC,6BAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,oBACAD,gBAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G","file":"chunk-TPQLLHZW.cjs","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\nimport type { ToolInvocation } from 'ai';\n\n/**\n * Extract text content from MastraDBMessage\n * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Helper function to create MastraDBMessage objects for tests\n * Supports optional tool invocations for testing tool call scenarios\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n"]}
|