@mastra/evals 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/dist/{chunk-6EA6D7JG.js → chunk-OEOE7ZHN.js} +21 -3
- package/dist/chunk-OEOE7ZHN.js.map +1 -0
- package/dist/{chunk-DSXZHUHI.cjs → chunk-W3U7MMDX.cjs} +21 -2
- package/dist/chunk-W3U7MMDX.cjs.map +1 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/evals/03-reference.md +84 -10
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/index.d.ts +1 -0
- package/dist/scorers/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +19 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/prebuilt/index.cjs +75 -63
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +17 -5
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/dist/scorers/utils.cjs +20 -16
- package/dist/scorers/utils.d.ts +39 -0
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +4 -4
- package/dist/chunk-6EA6D7JG.js.map +0 -1
- package/dist/chunk-DSXZHUHI.cjs.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,81 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 1.1.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- Added `getContext` hook to hallucination scorer for dynamic context resolution at runtime. This enables live scoring scenarios where context (like tool results) is only available when the scorer runs. Also added `extractToolResults` utility function to help extract tool results from scorer output. ([#12639](https://github.com/mastra-ai/mastra/pull/12639))
|
|
8
|
+
|
|
9
|
+
**Before (static context):**
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
const scorer = createHallucinationScorer({
|
|
13
|
+
model: openai('gpt-4o'),
|
|
14
|
+
options: {
|
|
15
|
+
context: ['The capital of France is Paris.', 'France is in Europe.'],
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**After (dynamic context from tool results):**
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { extractToolResults } from '@mastra/evals/scorers';
|
|
24
|
+
|
|
25
|
+
const scorer = createHallucinationScorer({
|
|
26
|
+
model: openai('gpt-4o'),
|
|
27
|
+
options: {
|
|
28
|
+
getContext: ({ run }) => {
|
|
29
|
+
const toolResults = extractToolResults(run.output);
|
|
30
|
+
return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result }));
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Patch Changes
|
|
37
|
+
|
|
38
|
+
- Updated dependencies [[`e6fc281`](https://github.com/mastra-ai/mastra/commit/e6fc281896a3584e9e06465b356a44fe7faade65), [`97be6c8`](https://github.com/mastra-ai/mastra/commit/97be6c8963130fca8a664fcf99d7b3a38e463595), [`2770921`](https://github.com/mastra-ai/mastra/commit/2770921eec4d55a36b278d15c3a83f694e462ee5), [`b1695db`](https://github.com/mastra-ai/mastra/commit/b1695db2d7be0c329d499619c7881899649188d0), [`5fe1fe0`](https://github.com/mastra-ai/mastra/commit/5fe1fe0109faf2c87db34b725d8a4571a594f80e), [`4133d48`](https://github.com/mastra-ai/mastra/commit/4133d48eaa354cdb45920dc6265732ffbc96788d), [`5dd01cc`](https://github.com/mastra-ai/mastra/commit/5dd01cce68d61874aa3ecbd91ee17884cfd5aca2), [`13e0a2a`](https://github.com/mastra-ai/mastra/commit/13e0a2a2bcec01ff4d701274b3727d5e907a6a01), [`f6673b8`](https://github.com/mastra-ai/mastra/commit/f6673b893b65b7d273ad25ead42e990704cc1e17), [`cd6be8a`](https://github.com/mastra-ai/mastra/commit/cd6be8ad32741cd41cabf508355bb31b71e8a5bd), [`9eb4e8e`](https://github.com/mastra-ai/mastra/commit/9eb4e8e39efbdcfff7a40ff2ce07ce2714c65fa8), [`c987384`](https://github.com/mastra-ai/mastra/commit/c987384d6c8ca844a9701d7778f09f5a88da7f9f), [`cb8cc12`](https://github.com/mastra-ai/mastra/commit/cb8cc12bfadd526aa95a01125076f1da44e4afa7), [`aa37c84`](https://github.com/mastra-ai/mastra/commit/aa37c84d29b7db68c72517337932ef486c316275), [`62f5d50`](https://github.com/mastra-ai/mastra/commit/62f5d5043debbba497dacb7ab008fe86b38b8de3), [`47eba72`](https://github.com/mastra-ai/mastra/commit/47eba72f0397d0d14fbe324b97940c3d55e5a525)]:
|
|
39
|
+
- @mastra/core@1.2.0
|
|
40
|
+
|
|
41
|
+
## 1.1.0-alpha.0
|
|
42
|
+
|
|
43
|
+
### Minor Changes
|
|
44
|
+
|
|
45
|
+
- Added `getContext` hook to hallucination scorer for dynamic context resolution at runtime. This enables live scoring scenarios where context (like tool results) is only available when the scorer runs. Also added `extractToolResults` utility function to help extract tool results from scorer output. ([#12639](https://github.com/mastra-ai/mastra/pull/12639))
|
|
46
|
+
|
|
47
|
+
**Before (static context):**
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
const scorer = createHallucinationScorer({
|
|
51
|
+
model: openai('gpt-4o'),
|
|
52
|
+
options: {
|
|
53
|
+
context: ['The capital of France is Paris.', 'France is in Europe.'],
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**After (dynamic context from tool results):**
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
import { extractToolResults } from '@mastra/evals/scorers';
|
|
62
|
+
|
|
63
|
+
const scorer = createHallucinationScorer({
|
|
64
|
+
model: openai('gpt-4o'),
|
|
65
|
+
options: {
|
|
66
|
+
getContext: ({ run }) => {
|
|
67
|
+
const toolResults = extractToolResults(run.output);
|
|
68
|
+
return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result }));
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Patch Changes
|
|
75
|
+
|
|
76
|
+
- Updated dependencies [[`2770921`](https://github.com/mastra-ai/mastra/commit/2770921eec4d55a36b278d15c3a83f694e462ee5), [`b1695db`](https://github.com/mastra-ai/mastra/commit/b1695db2d7be0c329d499619c7881899649188d0), [`4133d48`](https://github.com/mastra-ai/mastra/commit/4133d48eaa354cdb45920dc6265732ffbc96788d), [`5dd01cc`](https://github.com/mastra-ai/mastra/commit/5dd01cce68d61874aa3ecbd91ee17884cfd5aca2), [`13e0a2a`](https://github.com/mastra-ai/mastra/commit/13e0a2a2bcec01ff4d701274b3727d5e907a6a01), [`c987384`](https://github.com/mastra-ai/mastra/commit/c987384d6c8ca844a9701d7778f09f5a88da7f9f), [`cb8cc12`](https://github.com/mastra-ai/mastra/commit/cb8cc12bfadd526aa95a01125076f1da44e4afa7), [`62f5d50`](https://github.com/mastra-ai/mastra/commit/62f5d5043debbba497dacb7ab008fe86b38b8de3)]:
|
|
77
|
+
- @mastra/core@1.2.0-alpha.1
|
|
78
|
+
|
|
3
79
|
## 1.0.1
|
|
4
80
|
|
|
5
81
|
### Patch Changes
|
|
@@ -171,7 +171,25 @@ var extractInputMessages = (runInput) => {
|
|
|
171
171
|
var extractAgentResponseMessages = (runOutput) => {
|
|
172
172
|
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
|
|
173
173
|
};
|
|
174
|
+
function extractToolResults(output) {
|
|
175
|
+
const results = [];
|
|
176
|
+
for (const message of output) {
|
|
177
|
+
const toolInvocations = message?.content?.toolInvocations;
|
|
178
|
+
if (!toolInvocations) continue;
|
|
179
|
+
for (const invocation of toolInvocations) {
|
|
180
|
+
if (invocation.state === "result" && invocation.result !== void 0) {
|
|
181
|
+
results.push({
|
|
182
|
+
toolName: invocation.toolName,
|
|
183
|
+
toolCallId: invocation.toolCallId || "",
|
|
184
|
+
args: invocation.args || {},
|
|
185
|
+
result: invocation.result
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return results;
|
|
191
|
+
}
|
|
174
192
|
|
|
175
|
-
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
|
|
176
|
-
//# sourceMappingURL=chunk-
|
|
177
|
-
//# sourceMappingURL=chunk-
|
|
193
|
+
export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, extractToolResults, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
|
|
194
|
+
//# sourceMappingURL=chunk-OEOE7ZHN.js.map
|
|
195
|
+
//# sourceMappingURL=chunk-OEOE7ZHN.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AAyBO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAgBO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAgBO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AA6CO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAmBO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAoBO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,IAAI,OAAA,CACR,MAAA,CAAO,CAAC,IAAA,KAAc,KAAK,IAAA,KAAS,MAAM,CAAA,CAC1C,GAAA,CAAI,CAAC,IAAA,KAAc,IAAA,CAAK,QAAQ,EAAE,CAAA,CAClC,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAmBO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAmBO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAiCO,IAAM,yBAAA,GAA4B,CAAC,MAAA,KAAyD;AACjG,EAAA,IAAI,CAAC,QAAQ,OAAO,MAAA;AAEpB,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC9D,EAAA,IAAI,CAAC,SAAS,OAAO,MAAA;AAGrB,EAAA,IAAI,OAAA,CAAQ,QAAQ,SAAA,EAAW;AAC7B,IAAA,OAAO,QAAQ,OAAA,CAAQ,SAAA;AAAA,EACzB;AAIA,EAAA,MAAM,cAAA,GAAiB,QAAQ,OAAA,CAAQ,KAAA,EAAO,OAAO,CAAC,CAAA,KAAW,CAAA,CAAE,IAAA,KAAS,WAAW,CAAA;AACvF,EAAA,IAAI,cAAA,IAAkB,cAAA,CAAe,MAAA,GAAS,CAAA,EAAG;AAC/C,IAAA,MAAM,cAAA,GAAiB,cAAA,CACpB,GAAA,CAAI,CAAC,CAAA,KAAW;AAEf,MAAA,IAAI,EAAE,OAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,CAAA,CAAE,OAAO,CAAA,EAAG;AACzC,QAAA,OAAO,EAAE,OAAA,CACN,MAAA,CAAO,CAAC,CAAA,KAAW,EAAE,IAAA,KAAS,MAAM,CAAA,CACpC,GAAA,CAAI,CAAC,CAAA,KAAW,CAAA,CAAE,IAAI,CAAA,CACtB,KAAK,EAAE,CAAA;AAAA,MACZ;AACA,MAAA,OAAO,EAAE,SAAA,IAAa,EAAA;AAAA,IACxB,CAAC,CAAA,CACA,MAAA,CAAO,OAAO,CAAA;AAEjB,IAAA,OAAO,eAAe,MAAA,GAAS,CAAA,GAAI,cAAA,CAAe,IAAA,CAAK,IAAI,CAAA,GAAI,MAAA;AAAA,EACjE;AAEA,EAAA,OAAO,MAAA;AACT;AAuBO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAmCO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AA+BO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AAqCO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAiBO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAmBO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G;AAyCO,SAAS,mBAAmB,MAAA,EAAmD;AACpF,EAAA,MAAM,UAA4B,EAAC;AAEnC,EAAA,KAAA,MAAW,WAAW,MAAA,EAAQ;AAC5B,IAAA,MAAM,eAAA,GAAkB,SAAS,OAAA,EAAS,eAAA;AAC1C,IAAA,IAAI,CAAC,eAAA,EAAiB;AAEtB,IAAA,KAAA,MAAW,cAAc,eAAA,EAAiB;AACxC,MAAA,IAAI,UAAA,CAAW,KAAA,KAAU,QAAA,IAAY,UAAA,CAAW,WAAW,MAAA,EAAW;AACpE,QAAA,OAAA,CAAQ,IAAA,CAAK;AAAA,UACX,UAAU,UAAA,CAAW,QAAA;AAAA,UACrB,UAAA,EAAY,WAAW,UAAA,IAAc,EAAA;AAAA,UACrC,IAAA,EAAM,UAAA,CAAW,IAAA,IAAQ,EAAC;AAAA,UAC1B,QAAQ,UAAA,CAAW;AAAA,SACpB,CAAA;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT","file":"chunk-OEOE7ZHN.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\n\n/**\n * Extracts text content from a MastraDBMessage.\n *\n * This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.\n * It first checks for a string `content.content` field, then falls back to extracting\n * text from the `parts` array (returning only the last text part, like AI SDK does).\n *\n * @param message - The MastraDBMessage to extract text from\n * @returns The extracted text content, or an empty string if no text is found\n *\n * @example\n * ```ts\n * const message: MastraDBMessage = {\n * id: 'msg-1',\n * role: 'assistant',\n * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },\n * createdAt: new Date(),\n * };\n * const text = getTextContentFromMastraDBMessage(message); // 'Hello!'\n * ```\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\n/**\n * Rounds a number to two decimal places.\n *\n * Uses `Number.EPSILON` to handle floating-point precision issues.\n *\n * @param num - The number to round\n * @returns The number rounded to two decimal places\n *\n * @example\n * ```ts\n * roundToTwoDecimals(0.1 + 0.2); // 0.3\n * roundToTwoDecimals(1.005); // 1.01\n * ```\n */\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\n/**\n * Determines if a value is closer to the first target than the second.\n *\n * @param value - The value to compare\n * @param target1 - The first target value\n * @param target2 - The second target value\n * @returns `true` if `value` is closer to `target1` than `target2`\n *\n * @example\n * ```ts\n * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)\n * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)\n * ```\n */\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\n/**\n * Represents a test case for scorer evaluation.\n */\nexport type TestCase = {\n /** The input text to evaluate */\n input: string;\n /** The output text to evaluate */\n output: string;\n /** The expected result of the evaluation */\n expectedResult: {\n /** The expected score */\n score: number;\n /** The optional expected reason */\n reason?: string;\n };\n};\n\n/**\n * Represents a test case with additional context for scorer evaluation.\n */\nexport type TestCaseWithContext = TestCase & {\n /** Additional context strings for the evaluation */\n context: string[];\n};\n\n/**\n * Creates a scoring input object for testing purposes.\n *\n * @param input - The user input text\n * @param output - The assistant output text\n * @param additionalContext - Optional additional context data\n * @param requestContext - Optional request context data\n * @returns A ScoringInput object ready for use in scorer tests\n *\n * @example\n * ```ts\n * const run = createTestRun(\n * 'What is 2+2?',\n * 'The answer is 4.',\n * { topic: 'math' }\n * );\n * ```\n */\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\n/**\n * Extracts the user message text from a scorer run input.\n *\n * Finds the first message with role 'user' and extracts its text content.\n *\n * @param input - The scorer run input containing input messages\n * @returns The user message text, or `undefined` if no user message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const userText = getUserMessageFromRunInput(run.input);\n * return { userText };\n * });\n * ```\n */\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts all system messages from a scorer run input.\n *\n * Collects text from both standard system messages and tagged system messages\n * (specialized system prompts like memory instructions).\n *\n * @param input - The scorer run input containing system messages\n * @returns An array of system message strings\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemMessages = getSystemMessagesFromRunInput(run.input);\n * return { systemPrompt: systemMessages.join('\\n') };\n * });\n * ```\n */\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter((part: any) => part.type === 'text')\n .map((part: any) => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\n/**\n * Combines all system messages into a single prompt string.\n *\n * Joins all system messages (standard and tagged) with double newlines.\n *\n * @param input - The scorer run input containing system messages\n * @returns A combined system prompt string\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemPrompt = getCombinedSystemPrompt(run.input);\n * return { systemPrompt };\n * });\n * ```\n */\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\n/**\n * Extracts the assistant message text from a scorer run output.\n *\n * Finds the first message with role 'assistant' and extracts its text content.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The assistant message text, or `undefined` if no assistant message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { response };\n * });\n * ```\n */\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts reasoning text from a scorer run output.\n *\n * This function extracts reasoning content from assistant messages, which is\n * produced by reasoning models like `deepseek-reasoner`. The reasoning can be\n * stored in two places:\n * 1. `content.reasoning` - a string field on the message content\n * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The reasoning text, or `undefined` if no reasoning is present\n *\n * @example\n * ```ts\n * const reasoningScorer = createScorer({\n * id: 'reasoning-scorer',\n * name: 'Reasoning Quality',\n * description: 'Evaluates the quality of model reasoning',\n * type: 'agent',\n * })\n * .preprocess(({ run }) => {\n * const reasoning = getReasoningFromRunOutput(run.output);\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { reasoning, response };\n * })\n * .generateScore(({ results }) => {\n * // Score based on reasoning quality\n * return results.preprocessStepResult?.reasoning ? 1 : 0;\n * });\n * ```\n */\nexport const getReasoningFromRunOutput = (output?: ScorerRunOutputForAgent): string | undefined => {\n if (!output) return undefined;\n\n const message = output.find(({ role }) => role === 'assistant');\n if (!message) return undefined;\n\n // Check for reasoning in content.reasoning (string format)\n if (message.content.reasoning) {\n return message.content.reasoning;\n }\n\n // Check for reasoning in parts with type 'reasoning'\n // Reasoning models store reasoning in parts as { type: 'reasoning', details: [{ type: 'text', text: '...' }] }\n const reasoningParts = message.content.parts?.filter((p: any) => p.type === 'reasoning');\n if (reasoningParts && reasoningParts.length > 0) {\n const reasoningTexts = reasoningParts\n .map((p: any) => {\n // The reasoning text can be in p.reasoning or in p.details[].text\n if (p.details && Array.isArray(p.details)) {\n return p.details\n .filter((d: any) => d.type === 'text')\n .map((d: any) => d.text)\n .join('');\n }\n return p.reasoning || '';\n })\n .filter(Boolean);\n\n return reasoningTexts.length > 0 ? reasoningTexts.join('\\n') : undefined;\n }\n\n return undefined;\n};\n\n/**\n * Creates a tool invocation object for testing purposes.\n *\n * @param options - The tool invocation configuration\n * @param options.toolCallId - Unique identifier for the tool call\n * @param options.toolName - Name of the tool being called\n * @param options.args - Arguments passed to the tool\n * @param options.result - Result returned by the tool\n * @param options.state - State of the invocation (default: 'result')\n * @returns A tool invocation object\n *\n * @example\n * ```ts\n * const invocation = createToolInvocation({\n * toolCallId: 'call-123',\n * toolName: 'weatherTool',\n * args: { location: 'London' },\n * result: { temperature: 20, condition: 'sunny' },\n * });\n * ```\n */\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: 'call' | 'partial-call' | 'result';\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Creates a MastraDBMessage object for testing purposes.\n *\n * Supports optional tool invocations for testing tool call scenarios.\n *\n * @param options - The message configuration\n * @param options.content - The text content of the message\n * @param options.role - The role of the message sender ('user', 'assistant', or 'system')\n * @param options.id - Optional message ID (default: 'test-message')\n * @param options.toolInvocations - Optional array of tool invocations\n * @returns A MastraDBMessage object\n *\n * @example\n * ```ts\n * const message = createTestMessage({\n * content: 'Hello, how can I help?',\n * role: 'assistant',\n * });\n *\n * // With tool invocations\n * const messageWithTools = createTestMessage({\n * content: 'Let me check the weather.',\n * role: 'assistant',\n * toolInvocations: [{\n * toolCallId: 'call-1',\n * toolName: 'weatherTool',\n * args: { location: 'Paris' },\n * result: { temp: 22 },\n * state: 'result',\n * }],\n * });\n * ```\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\n/**\n * Creates a complete agent test run object for testing scorers.\n *\n * Provides a convenient way to construct the full run object that scorers receive,\n * including input messages, output, system messages, and request context.\n *\n * @param options - The test run configuration\n * @param options.inputMessages - Array of input messages (default: [])\n * @param options.output - The output messages (required)\n * @param options.rememberedMessages - Array of remembered messages from memory (default: [])\n * @param options.systemMessages - Array of system messages (default: [])\n * @param options.taggedSystemMessages - Tagged system messages map (default: {})\n * @param options.requestContext - Request context (default: new RequestContext())\n * @param options.runId - Unique run ID (default: random UUID)\n * @returns A complete test run object\n *\n * @example\n * ```ts\n * const testRun = createAgentTestRun({\n * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],\n * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],\n * });\n *\n * const result = await scorer.run({\n * input: testRun.input,\n * output: testRun.output,\n * });\n * ```\n */\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\n/**\n * Information about a tool call extracted from scorer output.\n */\nexport type ToolCallInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Index of the message containing this tool call */\n messageIndex: number;\n /** Index of the invocation within the message's tool invocations */\n invocationIndex: number;\n};\n\n/**\n * Extracts all tool calls from a scorer run output.\n *\n * Iterates through all messages and their tool invocations to collect\n * information about tools that were called (with state 'result' or 'call').\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An object containing tool names and detailed tool call info\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const { tools, toolCallInfos } = extractToolCalls(run.output);\n * return {\n * toolsUsed: tools,\n * toolCount: tools.length,\n * };\n * });\n * ```\n */\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\n/**\n * Extracts text content from all input messages.\n *\n * @param runInput - The scorer run input\n * @returns An array of text strings from each input message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const messages = extractInputMessages(run.input);\n * return { allUserMessages: messages.join('\\n') };\n * });\n * ```\n */\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\n/**\n * Extracts text content from all assistant response messages.\n *\n * Filters for messages with role 'assistant' and extracts their text content.\n *\n * @param runOutput - The scorer run output (array of MastraDBMessage)\n * @returns An array of text strings from each assistant message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const responses = extractAgentResponseMessages(run.output);\n * return { allResponses: responses.join('\\n') };\n * });\n * ```\n */\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n\n/**\n * Information about a tool result extracted from scorer output.\n */\nexport type ToolResultInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Arguments passed to the tool */\n args: Record<string, any>;\n /** Result returned by the tool */\n result: any;\n};\n\n/**\n * Extracts tool results from a scorer run output.\n *\n * Returns structured objects that can be used with the hallucination scorer's\n * `getContext` hook or for other scorer logic.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An array of ToolResultInfo objects\n *\n * @example\n * ```ts\n * import { extractToolResults } from '@mastra/evals/scorers';\n * import { createHallucinationScorer } from '@mastra/evals/scorers/prebuilt';\n *\n * const scorer = createHallucinationScorer({\n * model: openai('gpt-4o'),\n * options: {\n * getContext: (run) => {\n * const toolResults = extractToolResults(run.output);\n * return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result }));\n * },\n * },\n * });\n * ```\n */\nexport function extractToolResults(output: ScorerRunOutputForAgent): ToolResultInfo[] {\n const results: ToolResultInfo[] = [];\n\n for (const message of output) {\n const toolInvocations = message?.content?.toolInvocations;\n if (!toolInvocations) continue;\n\n for (const invocation of toolInvocations) {\n if (invocation.state === 'result' && invocation.result !== undefined) {\n results.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || '',\n args: invocation.args || {},\n result: invocation.result,\n });\n }\n }\n }\n\n return results;\n}\n"]}
|
|
@@ -173,6 +173,24 @@ var extractInputMessages = (runInput) => {
|
|
|
173
173
|
var extractAgentResponseMessages = (runOutput) => {
|
|
174
174
|
return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
|
|
175
175
|
};
|
|
176
|
+
function extractToolResults(output) {
|
|
177
|
+
const results = [];
|
|
178
|
+
for (const message of output) {
|
|
179
|
+
const toolInvocations = message?.content?.toolInvocations;
|
|
180
|
+
if (!toolInvocations) continue;
|
|
181
|
+
for (const invocation of toolInvocations) {
|
|
182
|
+
if (invocation.state === "result" && invocation.result !== void 0) {
|
|
183
|
+
results.push({
|
|
184
|
+
toolName: invocation.toolName,
|
|
185
|
+
toolCallId: invocation.toolCallId || "",
|
|
186
|
+
args: invocation.args || {},
|
|
187
|
+
result: invocation.result
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return results;
|
|
193
|
+
}
|
|
176
194
|
|
|
177
195
|
exports.createAgentTestRun = createAgentTestRun;
|
|
178
196
|
exports.createTestMessage = createTestMessage;
|
|
@@ -181,6 +199,7 @@ exports.createToolInvocation = createToolInvocation;
|
|
|
181
199
|
exports.extractAgentResponseMessages = extractAgentResponseMessages;
|
|
182
200
|
exports.extractInputMessages = extractInputMessages;
|
|
183
201
|
exports.extractToolCalls = extractToolCalls;
|
|
202
|
+
exports.extractToolResults = extractToolResults;
|
|
184
203
|
exports.getAssistantMessageFromRunOutput = getAssistantMessageFromRunOutput;
|
|
185
204
|
exports.getCombinedSystemPrompt = getCombinedSystemPrompt;
|
|
186
205
|
exports.getReasoningFromRunOutput = getReasoningFromRunOutput;
|
|
@@ -189,5 +208,5 @@ exports.getTextContentFromMastraDBMessage = getTextContentFromMastraDBMessage;
|
|
|
189
208
|
exports.getUserMessageFromRunInput = getUserMessageFromRunInput;
|
|
190
209
|
exports.isCloserTo = isCloserTo;
|
|
191
210
|
exports.roundToTwoDecimals = roundToTwoDecimals;
|
|
192
|
-
//# sourceMappingURL=chunk-
|
|
193
|
-
//# sourceMappingURL=chunk-
|
|
211
|
+
//# sourceMappingURL=chunk-W3U7MMDX.cjs.map
|
|
212
|
+
//# sourceMappingURL=chunk-W3U7MMDX.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/scorers/utils.ts"],"names":["requestContext","RequestContext"],"mappings":";;;;;AAyBO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAgBO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAgBO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AA6CO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAmBO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAoBO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,IAAI,OAAA,CACR,MAAA,CAAO,CAAC,IAAA,KAAc,KAAK,IAAA,KAAS,MAAM,CAAA,CAC1C,GAAA,CAAI,CAAC,IAAA,KAAc,IAAA,CAAK,QAAQ,EAAE,CAAA,CAClC,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAmBO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAmBO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAiCO,IAAM,yBAAA,GAA4B,CAAC,MAAA,KAAyD;AACjG,EAAA,IAAI,CAAC,QAAQ,OAAO,MAAA;AAEpB,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC9D,EAAA,IAAI,CAAC,SAAS,OAAO,MAAA;AAGrB,EAAA,IAAI,OAAA,CAAQ,QAAQ,SAAA,EAAW;AAC7B,IAAA,OAAO,QAAQ,OAAA,CAAQ,SAAA;AAAA,EACzB;AAIA,EAAA,MAAM,cAAA,GAAiB,QAAQ,OAAA,CAAQ,KAAA,EAAO,OAAO,CAAC,CAAA,KAAW,CAAA,CAAE,IAAA,KAAS,WAAW,CAAA;AACvF,EAAA,IAAI,cAAA,IAAkB,cAAA,CAAe,MAAA,GAAS,CAAA,EAAG;AAC/C,IAAA,MAAM,cAAA,GAAiB,cAAA,CACpB,GAAA,CAAI,CAAC,CAAA,KAAW;AAEf,MAAA,IAAI,EAAE,OAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,CAAA,CAAE,OAAO,CAAA,EAAG;AACzC,QAAA,OAAO,EAAE,OAAA,CACN,MAAA,CAAO,CAAC,CAAA,KAAW,EAAE,IAAA,KAAS,MAAM,CAAA,CACpC,GAAA,CAAI,CAAC,CAAA,KAAW,CAAA,CAAE,IAAI,CAAA,CACtB,KAAK,EAAE,CAAA;AAAA,MACZ;AACA,MAAA,OAAO,EAAE,SAAA,IAAa,EAAA;AAAA,IACxB,CAAC,CAAA,CACA,MAAA,CAAO,OAAO,CAAA;AAEjB,IAAA,OAAO,eAAe,MAAA,GAAS,CAAA,GAAI,cAAA,CAAe,IAAA,CAAK,IAAI,CAAA,GAAI,MAAA;AAAA,EACjE;AAEA,EAAA,OAAO,MAAA;AACT;AAuBO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAmCO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AA+BO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,kBACxBA,gBAAA,GAAiB,IAAIC,6BAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,oBACAD,gBAAA;AAAA,IACA;AAAA,GACF;AACF;AAqCO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAiBO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAmBO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G;AAyCO,SAAS,mBAAmB,MAAA,EAAmD;AACpF,EAAA,MAAM,UAA4B,EAAC;AAEnC,EAAA,KAAA,MAAW,WAAW,MAAA,EAAQ;AAC5B,IAAA,MAAM,eAAA,GAAkB,SAAS,OAAA,EAAS,eAAA;AAC1C,IAAA,IAAI,CAAC,eAAA,EAAiB;AAEtB,IAAA,KAAA,MAAW,cAAc,eAAA,EAAiB;AACxC,MAAA,IAAI,UAAA,CAAW,KAAA,KAAU,QAAA,IAAY,UAAA,CAAW,WAAW,MAAA,EAAW;AACpE,QAAA,OAAA,CAAQ,IAAA,CAAK;AAAA,UACX,UAAU,UAAA,CAAW,QAAA;AAAA,UACrB,UAAA,EAAY,WAAW,UAAA,IAAc,EAAA;AAAA,UACrC,IAAA,EAAM,UAAA,CAAW,IAAA,IAAQ,EAAC;AAAA,UAC1B,QAAQ,UAAA,CAAW;AAAA,SACpB,CAAA;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT","file":"chunk-W3U7MMDX.cjs","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\n\n/**\n * Extracts text content from a MastraDBMessage.\n *\n * This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.\n * It first checks for a string `content.content` field, then falls back to extracting\n * text from the `parts` array (returning only the last text part, like AI SDK does).\n *\n * @param message - The MastraDBMessage to extract text from\n * @returns The extracted text content, or an empty string if no text is found\n *\n * @example\n * ```ts\n * const message: MastraDBMessage = {\n * id: 'msg-1',\n * role: 'assistant',\n * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },\n * createdAt: new Date(),\n * };\n * const text = getTextContentFromMastraDBMessage(message); // 'Hello!'\n * ```\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\n/**\n * Rounds a number to two decimal places.\n *\n * Uses `Number.EPSILON` to handle floating-point precision issues.\n *\n * @param num - The number to round\n * @returns The number rounded to two decimal places\n *\n * @example\n * ```ts\n * roundToTwoDecimals(0.1 + 0.2); // 0.3\n * roundToTwoDecimals(1.005); // 1.01\n * ```\n */\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\n/**\n * Determines if a value is closer to the first target than the second.\n *\n * @param value - The value to compare\n * @param target1 - The first target value\n * @param target2 - The second target value\n * @returns `true` if `value` is closer to `target1` than `target2`\n *\n * @example\n * ```ts\n * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)\n * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)\n * ```\n */\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\n/**\n * Represents a test case for scorer evaluation.\n */\nexport type TestCase = {\n /** The input text to evaluate */\n input: string;\n /** The output text to evaluate */\n output: string;\n /** The expected result of the evaluation */\n expectedResult: {\n /** The expected score */\n score: number;\n /** The optional expected reason */\n reason?: string;\n };\n};\n\n/**\n * Represents a test case with additional context for scorer evaluation.\n */\nexport type TestCaseWithContext = TestCase & {\n /** Additional context strings for the evaluation */\n context: string[];\n};\n\n/**\n * Creates a scoring input object for testing purposes.\n *\n * @param input - The user input text\n * @param output - The assistant output text\n * @param additionalContext - Optional additional context data\n * @param requestContext - Optional request context data\n * @returns A ScoringInput object ready for use in scorer tests\n *\n * @example\n * ```ts\n * const run = createTestRun(\n * 'What is 2+2?',\n * 'The answer is 4.',\n * { topic: 'math' }\n * );\n * ```\n */\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\n/**\n * Extracts the user message text from a scorer run input.\n *\n * Finds the first message with role 'user' and extracts its text content.\n *\n * @param input - The scorer run input containing input messages\n * @returns The user message text, or `undefined` if no user message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const userText = getUserMessageFromRunInput(run.input);\n * return { userText };\n * });\n * ```\n */\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts all system messages from a scorer run input.\n *\n * Collects text from both standard system messages and tagged system messages\n * (specialized system prompts like memory instructions).\n *\n * @param input - The scorer run input containing system messages\n * @returns An array of system message strings\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemMessages = getSystemMessagesFromRunInput(run.input);\n * return { systemPrompt: systemMessages.join('\\n') };\n * });\n * ```\n */\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter((part: any) => part.type === 'text')\n .map((part: any) => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\n/**\n * Combines all system messages into a single prompt string.\n *\n * Joins all system messages (standard and tagged) with double newlines.\n *\n * @param input - The scorer run input containing system messages\n * @returns A combined system prompt string\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemPrompt = getCombinedSystemPrompt(run.input);\n * return { systemPrompt };\n * });\n * ```\n */\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\n/**\n * Extracts the assistant message text from a scorer run output.\n *\n * Finds the first message with role 'assistant' and extracts its text content.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The assistant message text, or `undefined` if no assistant message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { response };\n * });\n * ```\n */\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts reasoning text from a scorer run output.\n *\n * This function extracts reasoning content from assistant messages, which is\n * produced by reasoning models like `deepseek-reasoner`. The reasoning can be\n * stored in two places:\n * 1. `content.reasoning` - a string field on the message content\n * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The reasoning text, or `undefined` if no reasoning is present\n *\n * @example\n * ```ts\n * const reasoningScorer = createScorer({\n * id: 'reasoning-scorer',\n * name: 'Reasoning Quality',\n * description: 'Evaluates the quality of model reasoning',\n * type: 'agent',\n * })\n * .preprocess(({ run }) => {\n * const reasoning = getReasoningFromRunOutput(run.output);\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { reasoning, response };\n * })\n * .generateScore(({ results }) => {\n * // Score based on reasoning quality\n * return results.preprocessStepResult?.reasoning ? 1 : 0;\n * });\n * ```\n */\nexport const getReasoningFromRunOutput = (output?: ScorerRunOutputForAgent): string | undefined => {\n if (!output) return undefined;\n\n const message = output.find(({ role }) => role === 'assistant');\n if (!message) return undefined;\n\n // Check for reasoning in content.reasoning (string format)\n if (message.content.reasoning) {\n return message.content.reasoning;\n }\n\n // Check for reasoning in parts with type 'reasoning'\n // Reasoning models store reasoning in parts as { type: 'reasoning', details: [{ type: 'text', text: '...' }] }\n const reasoningParts = message.content.parts?.filter((p: any) => p.type === 'reasoning');\n if (reasoningParts && reasoningParts.length > 0) {\n const reasoningTexts = reasoningParts\n .map((p: any) => {\n // The reasoning text can be in p.reasoning or in p.details[].text\n if (p.details && Array.isArray(p.details)) {\n return p.details\n .filter((d: any) => d.type === 'text')\n .map((d: any) => d.text)\n .join('');\n }\n return p.reasoning || '';\n })\n .filter(Boolean);\n\n return reasoningTexts.length > 0 ? reasoningTexts.join('\\n') : undefined;\n }\n\n return undefined;\n};\n\n/**\n * Creates a tool invocation object for testing purposes.\n *\n * @param options - The tool invocation configuration\n * @param options.toolCallId - Unique identifier for the tool call\n * @param options.toolName - Name of the tool being called\n * @param options.args - Arguments passed to the tool\n * @param options.result - Result returned by the tool\n * @param options.state - State of the invocation (default: 'result')\n * @returns A tool invocation object\n *\n * @example\n * ```ts\n * const invocation = createToolInvocation({\n * toolCallId: 'call-123',\n * toolName: 'weatherTool',\n * args: { location: 'London' },\n * result: { temperature: 20, condition: 'sunny' },\n * });\n * ```\n */\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: 'call' | 'partial-call' | 'result';\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Creates a MastraDBMessage object for testing purposes.\n *\n * Supports optional tool invocations for testing tool call scenarios.\n *\n * @param options - The message configuration\n * @param options.content - The text content of the message\n * @param options.role - The role of the message sender ('user', 'assistant', or 'system')\n * @param options.id - Optional message ID (default: 'test-message')\n * @param options.toolInvocations - Optional array of tool invocations\n * @returns A MastraDBMessage object\n *\n * @example\n * ```ts\n * const message = createTestMessage({\n * content: 'Hello, how can I help?',\n * role: 'assistant',\n * });\n *\n * // With tool invocations\n * const messageWithTools = createTestMessage({\n * content: 'Let me check the weather.',\n * role: 'assistant',\n * toolInvocations: [{\n * toolCallId: 'call-1',\n * toolName: 'weatherTool',\n * args: { location: 'Paris' },\n * result: { temp: 22 },\n * state: 'result',\n * }],\n * });\n * ```\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\n/**\n * Creates a complete agent test run object for testing scorers.\n *\n * Provides a convenient way to construct the full run object that scorers receive,\n * including input messages, output, system messages, and request context.\n *\n * @param options - The test run configuration\n * @param options.inputMessages - Array of input messages (default: [])\n * @param options.output - The output messages (required)\n * @param options.rememberedMessages - Array of remembered messages from memory (default: [])\n * @param options.systemMessages - Array of system messages (default: [])\n * @param options.taggedSystemMessages - Tagged system messages map (default: {})\n * @param options.requestContext - Request context (default: new RequestContext())\n * @param options.runId - Unique run ID (default: random UUID)\n * @returns A complete test run object\n *\n * @example\n * ```ts\n * const testRun = createAgentTestRun({\n * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],\n * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],\n * });\n *\n * const result = await scorer.run({\n * input: testRun.input,\n * output: testRun.output,\n * });\n * ```\n */\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\n/**\n * Information about a tool call extracted from scorer output.\n */\nexport type ToolCallInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Index of the message containing this tool call */\n messageIndex: number;\n /** Index of the invocation within the message's tool invocations */\n invocationIndex: number;\n};\n\n/**\n * Extracts all tool calls from a scorer run output.\n *\n * Iterates through all messages and their tool invocations to collect\n * information about tools that were called (with state 'result' or 'call').\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An object containing tool names and detailed tool call info\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const { tools, toolCallInfos } = extractToolCalls(run.output);\n * return {\n * toolsUsed: tools,\n * toolCount: tools.length,\n * };\n * });\n * ```\n */\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\n/**\n * Extracts text content from all input messages.\n *\n * @param runInput - The scorer run input\n * @returns An array of text strings from each input message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const messages = extractInputMessages(run.input);\n * return { allUserMessages: messages.join('\\n') };\n * });\n * ```\n */\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\n/**\n * Extracts text content from all assistant response messages.\n *\n * Filters for messages with role 'assistant' and extracts their text content.\n *\n * @param runOutput - The scorer run output (array of MastraDBMessage)\n * @returns An array of text strings from each assistant message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const responses = extractAgentResponseMessages(run.output);\n * return { allResponses: responses.join('\\n') };\n * });\n * ```\n */\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n\n/**\n * Information about a tool result extracted from scorer output.\n */\nexport type ToolResultInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Arguments passed to the tool */\n args: Record<string, any>;\n /** Result returned by the tool */\n result: any;\n};\n\n/**\n * Extracts tool results from a scorer run output.\n *\n * Returns structured objects that can be used with the hallucination scorer's\n * `getContext` hook or for other scorer logic.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An array of ToolResultInfo objects\n *\n * @example\n * ```ts\n * import { extractToolResults } from '@mastra/evals/scorers';\n * import { createHallucinationScorer } from '@mastra/evals/scorers/prebuilt';\n *\n * const scorer = createHallucinationScorer({\n * model: openai('gpt-4o'),\n * options: {\n * getContext: (run) => {\n * const toolResults = extractToolResults(run.output);\n * return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result }));\n * },\n * },\n * });\n * ```\n */\nexport function extractToolResults(output: ScorerRunOutputForAgent): ToolResultInfo[] {\n const results: ToolResultInfo[] = [];\n\n for (const message of output) {\n const toolInvocations = message?.content?.toolInvocations;\n if (!toolInvocations) continue;\n\n for (const invocation of toolInvocations) {\n if (invocation.state === 'result' && invocation.result !== undefined) {\n results.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || '',\n args: invocation.args || {},\n result: invocation.result,\n });\n }\n }\n }\n\n return results;\n}\n"]}
|
package/dist/docs/README.md
CHANGED
package/dist/docs/SKILL.md
CHANGED
|
@@ -1312,6 +1312,10 @@ The `createHallucinationScorer()` function accepts a single options object with
|
|
|
1312
1312
|
|
|
1313
1313
|
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
1314
1314
|
|
|
1315
|
+
### GetContextParams
|
|
1316
|
+
|
|
1317
|
+
The `getContext` hook receives the following parameters:
|
|
1318
|
+
|
|
1315
1319
|
## .run() Returns
|
|
1316
1320
|
|
|
1317
1321
|
## Scoring Details
|
|
@@ -1361,28 +1365,98 @@ A hallucination score between 0 and 1:
|
|
|
1361
1365
|
|
|
1362
1366
|
**Note:** The score represents the degree of hallucination - lower scores indicate better factual alignment with the provided context
|
|
1363
1367
|
|
|
1364
|
-
##
|
|
1368
|
+
## Examples
|
|
1369
|
+
|
|
1370
|
+
### Static Context
|
|
1371
|
+
|
|
1372
|
+
Use static context when you have known ground truth to compare against:
|
|
1373
|
+
|
|
1374
|
+
```typescript title="src/example-static-context.ts"
|
|
1375
|
+
import { createHallucinationScorer } from "@mastra/evals/scorers/prebuilt";
|
|
1376
|
+
|
|
1377
|
+
const scorer = createHallucinationScorer({
|
|
1378
|
+
model: "openai/gpt-4o",
|
|
1379
|
+
options: {
|
|
1380
|
+
context: [
|
|
1381
|
+
"The first iPhone was announced on January 9, 2007.",
|
|
1382
|
+
"It was released on June 29, 2007.",
|
|
1383
|
+
"Steve Jobs introduced it at Macworld.",
|
|
1384
|
+
],
|
|
1385
|
+
},
|
|
1386
|
+
});
|
|
1387
|
+
```
|
|
1388
|
+
|
|
1389
|
+
### Dynamic Context with getContext
|
|
1390
|
+
|
|
1391
|
+
Use `getContext` for live scoring scenarios where context comes from tool results:
|
|
1392
|
+
|
|
1393
|
+
```typescript title="src/example-dynamic-context.ts"
|
|
1394
|
+
import { createHallucinationScorer } from "@mastra/evals/scorers/prebuilt";
|
|
1395
|
+
import { extractToolResults } from "@mastra/evals/scorers";
|
|
1396
|
+
|
|
1397
|
+
const scorer = createHallucinationScorer({
|
|
1398
|
+
model: "openai/gpt-4o",
|
|
1399
|
+
options: {
|
|
1400
|
+
getContext: ({ run, step }) => {
|
|
1401
|
+
// Extract tool results as context
|
|
1402
|
+
const toolResults = extractToolResults(run.output);
|
|
1403
|
+
return toolResults.map((t) =>
|
|
1404
|
+
JSON.stringify({ tool: t.toolName, result: t.result })
|
|
1405
|
+
);
|
|
1406
|
+
},
|
|
1407
|
+
},
|
|
1408
|
+
});
|
|
1409
|
+
```
|
|
1410
|
+
|
|
1411
|
+
### Live Scoring with Agent
|
|
1412
|
+
|
|
1413
|
+
Attach the scorer to an agent for live evaluation:
|
|
1414
|
+
|
|
1415
|
+
```typescript title="src/example-live-scoring.ts"
|
|
1416
|
+
import { Agent } from "@mastra/core/agent";
|
|
1417
|
+
import { createHallucinationScorer } from "@mastra/evals/scorers/prebuilt";
|
|
1418
|
+
import { extractToolResults } from "@mastra/evals/scorers";
|
|
1419
|
+
|
|
1420
|
+
const hallucinationScorer = createHallucinationScorer({
|
|
1421
|
+
model: "openai/gpt-4o",
|
|
1422
|
+
options: {
|
|
1423
|
+
getContext: ({ run }) => {
|
|
1424
|
+
const toolResults = extractToolResults(run.output);
|
|
1425
|
+
return toolResults.map((t) =>
|
|
1426
|
+
JSON.stringify({ tool: t.toolName, result: t.result })
|
|
1427
|
+
);
|
|
1428
|
+
},
|
|
1429
|
+
},
|
|
1430
|
+
});
|
|
1365
1431
|
|
|
1366
|
-
|
|
1432
|
+
const agent = new Agent({
|
|
1433
|
+
name: "my-agent",
|
|
1434
|
+
model: "openai/gpt-4o",
|
|
1435
|
+
instructions: "You are a helpful assistant.",
|
|
1436
|
+
evals: {
|
|
1437
|
+
scorers: [hallucinationScorer],
|
|
1438
|
+
},
|
|
1439
|
+
});
|
|
1440
|
+
```
|
|
1441
|
+
|
|
1442
|
+
### Batch Evaluation with runEvals
|
|
1367
1443
|
|
|
1368
|
-
```typescript title="src/example-
|
|
1444
|
+
```typescript title="src/example-batch-evals.ts"
|
|
1369
1445
|
import { runEvals } from "@mastra/core/evals";
|
|
1370
1446
|
import { createHallucinationScorer } from "@mastra/evals/scorers/prebuilt";
|
|
1371
1447
|
import { myAgent } from "./agent";
|
|
1372
1448
|
|
|
1373
|
-
// Context is typically populated from agent tool calls or RAG retrieval
|
|
1374
1449
|
const scorer = createHallucinationScorer({
|
|
1375
1450
|
model: "openai/gpt-4o",
|
|
1451
|
+
options: {
|
|
1452
|
+
context: ["Known fact 1", "Known fact 2"],
|
|
1453
|
+
},
|
|
1376
1454
|
});
|
|
1377
1455
|
|
|
1378
1456
|
const result = await runEvals({
|
|
1379
1457
|
data: [
|
|
1380
|
-
{
|
|
1381
|
-
|
|
1382
|
-
},
|
|
1383
|
-
{
|
|
1384
|
-
input: "Tell me about the original iPhone announcement.",
|
|
1385
|
-
},
|
|
1458
|
+
{ input: "Tell me about topic A" },
|
|
1459
|
+
{ input: "Tell me about topic B" },
|
|
1386
1460
|
],
|
|
1387
1461
|
scorers: [scorer],
|
|
1388
1462
|
target: myAgent,
|
|
@@ -10,7 +10,7 @@ export declare function createToolCallAccuracyScorerCode(options: ToolCallAccura
|
|
|
10
10
|
expectedToolOrder: string[] | undefined;
|
|
11
11
|
hasToolCalls: boolean;
|
|
12
12
|
correctToolCalled: boolean;
|
|
13
|
-
toolCallInfos: import("
|
|
13
|
+
toolCallInfos: import("../..").ToolCallInfo[];
|
|
14
14
|
correctOrderCalled: boolean | null;
|
|
15
15
|
}> & Record<"generateScoreStepResult", number>>;
|
|
16
16
|
export {};
|
package/dist/scorers/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scorers/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,QAAQ,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scorers/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,QAAQ,CAAC;AACvB,cAAc,SAAS,CAAC"}
|
|
@@ -1,12 +1,29 @@
|
|
|
1
|
+
import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/evals';
|
|
1
2
|
import type { MastraModelConfig } from '@mastra/core/llm';
|
|
3
|
+
import type { TracingContext } from '@mastra/core/observability';
|
|
4
|
+
export interface GetContextRun {
|
|
5
|
+
input?: ScorerRunInputForAgent;
|
|
6
|
+
output: ScorerRunOutputForAgent;
|
|
7
|
+
runId?: string;
|
|
8
|
+
requestContext?: Record<string, any>;
|
|
9
|
+
tracingContext?: TracingContext;
|
|
10
|
+
}
|
|
11
|
+
export interface GetContextParams {
|
|
12
|
+
run: GetContextRun;
|
|
13
|
+
results: Record<string, any>;
|
|
14
|
+
score?: number;
|
|
15
|
+
step: 'analyze' | 'generateReason';
|
|
16
|
+
}
|
|
17
|
+
export type GetContextFn = (params: GetContextParams) => string[] | Promise<string[]>;
|
|
2
18
|
export interface HallucinationMetricOptions {
|
|
3
19
|
scale?: number;
|
|
4
|
-
context
|
|
20
|
+
context?: string[];
|
|
21
|
+
getContext?: GetContextFn;
|
|
5
22
|
}
|
|
6
23
|
export declare function createHallucinationScorer({ model, options, }: {
|
|
7
24
|
model: MastraModelConfig;
|
|
8
25
|
options?: HallucinationMetricOptions;
|
|
9
|
-
}): import("@mastra/core/evals").MastraScorer<"hallucination-scorer",
|
|
26
|
+
}): import("@mastra/core/evals").MastraScorer<"hallucination-scorer", ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
10
27
|
claims: string[];
|
|
11
28
|
}> & Record<"analyzeStepResult", {
|
|
12
29
|
verdicts: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAC1F,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAC1D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAWjE,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,MAAM,EAAE,uBAAuB,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACrC,cAAc,CAAC,EAAE,cAAc,CAAC;CACjC;AAED,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,aAAa,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,SAAS,GAAG,gBAAgB,CAAC;CACpC;AAED,MAAM,MAAM,YAAY,GAAG,CAAC,MAAM,EAAE,gBAAgB,KAAK,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;AAEtF,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,YAAY,CAAC;CAC3B;AAED,wBAAgB,yBAAyB,CAAC,EACxC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,iBAAiB,CAAC;IACzB,OAAO,CAAC,EAAE,0BAA0B,CAAC;CACtC;;;;;;;;6FA0EA"}
|
|
@@ -7,7 +7,7 @@ export interface ToolCallAccuracyOptions {
|
|
|
7
7
|
export declare function createToolCallAccuracyScorerLLM({ model, availableTools }: ToolCallAccuracyOptions): import("@mastra/core/evals").MastraScorer<"llm-tool-call-accuracy-scorer", import("@mastra/core/evals").ScorerRunInputForAgent, import("@mastra/core/evals").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
|
|
8
8
|
actualTools: string[];
|
|
9
9
|
hasToolCalls: boolean;
|
|
10
|
-
toolCallInfos: import("
|
|
10
|
+
toolCallInfos: import("../..").ToolCallInfo[];
|
|
11
11
|
}> & Record<"analyzeStepResult", {
|
|
12
12
|
evaluations: {
|
|
13
13
|
reasoning: string;
|