@mastra/evals 0.14.4 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/CHANGELOG.md +34 -25
  2. package/README.md +19 -159
  3. package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
  4. package/dist/chunk-CCLM7KPF.js.map +1 -0
  5. package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
  6. package/dist/chunk-TPQLLHZW.cjs.map +1 -0
  7. package/dist/scorers/code/completeness/index.d.ts +1 -1
  8. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  9. package/dist/scorers/code/content-similarity/index.d.ts +1 -1
  10. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  11. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
  12. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  13. package/dist/scorers/code/textual-difference/index.d.ts +1 -1
  14. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  15. package/dist/scorers/code/tone/index.d.ts +1 -1
  16. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
  18. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
  20. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  21. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  22. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/bias/index.d.ts +2 -2
  24. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-precision/index.d.ts +3 -3
  26. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
  28. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
  30. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  32. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  33. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  34. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  35. package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
  36. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +2 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
  42. package/dist/scorers/prebuilt/index.cjs.map +1 -0
  43. package/dist/scorers/prebuilt/index.d.ts +3 -0
  44. package/dist/scorers/prebuilt/index.d.ts.map +1 -0
  45. package/dist/scorers/{llm → prebuilt}/index.js +419 -15
  46. package/dist/scorers/prebuilt/index.js.map +1 -0
  47. package/dist/scorers/utils.cjs +21 -17
  48. package/dist/scorers/utils.d.ts +21 -11
  49. package/dist/scorers/utils.d.ts.map +1 -1
  50. package/dist/scorers/utils.js +1 -1
  51. package/package.json +15 -59
  52. package/dist/attachListeners.d.ts +0 -4
  53. package/dist/attachListeners.d.ts.map +0 -1
  54. package/dist/chunk-44PMY5ES.js +0 -78
  55. package/dist/chunk-44PMY5ES.js.map +0 -1
  56. package/dist/chunk-7QAUEU4L.cjs +0 -10
  57. package/dist/chunk-7QAUEU4L.cjs.map +0 -1
  58. package/dist/chunk-EMMSS5I5.cjs +0 -37
  59. package/dist/chunk-EMMSS5I5.cjs.map +0 -1
  60. package/dist/chunk-G3PMV62Z.js +0 -33
  61. package/dist/chunk-G3PMV62Z.js.map +0 -1
  62. package/dist/chunk-IUSAD2BW.cjs +0 -19
  63. package/dist/chunk-IUSAD2BW.cjs.map +0 -1
  64. package/dist/chunk-KHEXN75Q.js.map +0 -1
  65. package/dist/chunk-PWGOG6ML.cjs +0 -81
  66. package/dist/chunk-PWGOG6ML.cjs.map +0 -1
  67. package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
  68. package/dist/chunk-QTWX6TKR.js +0 -8
  69. package/dist/chunk-QTWX6TKR.js.map +0 -1
  70. package/dist/chunk-YGTIO3J5.js +0 -17
  71. package/dist/chunk-YGTIO3J5.js.map +0 -1
  72. package/dist/dist-LDTK3TIP.cjs +0 -16759
  73. package/dist/dist-LDTK3TIP.cjs.map +0 -1
  74. package/dist/dist-OWYZEOJK.js +0 -16737
  75. package/dist/dist-OWYZEOJK.js.map +0 -1
  76. package/dist/evaluation.d.ts +0 -8
  77. package/dist/evaluation.d.ts.map +0 -1
  78. package/dist/index.cjs +0 -93
  79. package/dist/index.cjs.map +0 -1
  80. package/dist/index.d.ts +0 -3
  81. package/dist/index.d.ts.map +0 -1
  82. package/dist/index.js +0 -89
  83. package/dist/index.js.map +0 -1
  84. package/dist/magic-string.es-7ORA5OGR.js +0 -1305
  85. package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
  86. package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
  87. package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
  88. package/dist/metrics/index.d.ts +0 -4
  89. package/dist/metrics/index.d.ts.map +0 -1
  90. package/dist/metrics/judge/index.cjs +0 -12
  91. package/dist/metrics/judge/index.cjs.map +0 -1
  92. package/dist/metrics/judge/index.d.ts +0 -7
  93. package/dist/metrics/judge/index.d.ts.map +0 -1
  94. package/dist/metrics/judge/index.js +0 -3
  95. package/dist/metrics/judge/index.js.map +0 -1
  96. package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
  97. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
  98. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
  99. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
  100. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
  101. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
  102. package/dist/metrics/llm/bias/index.d.ts +0 -14
  103. package/dist/metrics/llm/bias/index.d.ts.map +0 -1
  104. package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
  105. package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
  106. package/dist/metrics/llm/bias/prompts.d.ts +0 -14
  107. package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
  108. package/dist/metrics/llm/context-position/index.d.ts +0 -16
  109. package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
  110. package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
  111. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
  112. package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
  113. package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
  114. package/dist/metrics/llm/context-precision/index.d.ts +0 -16
  115. package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
  116. package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
  117. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
  118. package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
  119. package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
  120. package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
  121. package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
  122. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
  123. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
  124. package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
  125. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
  126. package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
  127. package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
  128. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
  129. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
  130. package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
  131. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
  132. package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
  133. package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
  134. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
  135. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
  136. package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
  137. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
  138. package/dist/metrics/llm/hallucination/index.d.ts +0 -16
  139. package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
  140. package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
  141. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
  142. package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
  143. package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
  144. package/dist/metrics/llm/index.cjs +0 -2481
  145. package/dist/metrics/llm/index.cjs.map +0 -1
  146. package/dist/metrics/llm/index.d.ts +0 -12
  147. package/dist/metrics/llm/index.d.ts.map +0 -1
  148. package/dist/metrics/llm/index.js +0 -2469
  149. package/dist/metrics/llm/index.js.map +0 -1
  150. package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
  151. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
  152. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
  153. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
  154. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
  155. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
  156. package/dist/metrics/llm/summarization/index.d.ts +0 -19
  157. package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
  158. package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
  159. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
  160. package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
  161. package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
  162. package/dist/metrics/llm/toxicity/index.d.ts +0 -14
  163. package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
  164. package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
  165. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
  166. package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
  167. package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
  168. package/dist/metrics/llm/types.d.ts +0 -7
  169. package/dist/metrics/llm/types.d.ts.map +0 -1
  170. package/dist/metrics/llm/utils.d.ts +0 -14
  171. package/dist/metrics/llm/utils.d.ts.map +0 -1
  172. package/dist/metrics/nlp/completeness/index.d.ts +0 -21
  173. package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
  174. package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
  175. package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
  176. package/dist/metrics/nlp/index.cjs +0 -201
  177. package/dist/metrics/nlp/index.cjs.map +0 -1
  178. package/dist/metrics/nlp/index.d.ts +0 -6
  179. package/dist/metrics/nlp/index.d.ts.map +0 -1
  180. package/dist/metrics/nlp/index.js +0 -188
  181. package/dist/metrics/nlp/index.js.map +0 -1
  182. package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
  183. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
  184. package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
  185. package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
  186. package/dist/metrics/nlp/tone/index.d.ts +0 -18
  187. package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
  188. package/dist/ratio.d.ts +0 -13
  189. package/dist/ratio.d.ts.map +0 -1
  190. package/dist/scorers/code/index.cjs +0 -327
  191. package/dist/scorers/code/index.cjs.map +0 -1
  192. package/dist/scorers/code/index.js +0 -313
  193. package/dist/scorers/code/index.js.map +0 -1
  194. package/dist/scorers/llm/index.cjs.map +0 -1
  195. package/dist/scorers/llm/index.js.map +0 -1
@@ -1,6 +1,12 @@
1
- import { RuntimeContext } from '@mastra/core/runtime-context';
2
- import type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/scores';
3
- import type { ToolInvocation, UIMessage } from 'ai';
1
+ import type { MastraDBMessage } from '@mastra/core/agent';
2
+ import type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';
3
+ import { RequestContext } from '@mastra/core/request-context';
4
+ import type { ToolInvocation } from 'ai';
5
+ /**
6
+ * Extract text content from MastraDBMessage
7
+ * Matches the logic used in MessageList.mastraDBMessageToAIV4UIMessage
8
+ */
9
+ export declare function getTextContentFromMastraDBMessage(message: MastraDBMessage): string;
4
10
  export declare const roundToTwoDecimals: (num: number) => number;
5
11
  export declare function isCloserTo(value: number, target1: number, target2: number): boolean;
6
12
  export type TestCase = {
@@ -14,7 +20,7 @@ export type TestCase = {
14
20
  export type TestCaseWithContext = TestCase & {
15
21
  context: string[];
16
22
  };
17
- export declare const createTestRun: (input: string, output: string, additionalContext?: Record<string, any>, runtimeContext?: Record<string, any>) => ScoringInput;
23
+ export declare const createTestRun: (input: string, output: string, additionalContext?: Record<string, any>, requestContext?: Record<string, any>) => ScoringInput;
18
24
  export declare const getUserMessageFromRunInput: (input?: ScorerRunInputForAgent) => string | undefined;
19
25
  export declare const getSystemMessagesFromRunInput: (input?: ScorerRunInputForAgent) => string[];
20
26
  export declare const getCombinedSystemPrompt: (input?: ScorerRunInputForAgent) => string;
@@ -32,10 +38,14 @@ export declare const createToolInvocation: ({ toolCallId, toolName, args, result
32
38
  result: Record<string, any>;
33
39
  state: string;
34
40
  };
35
- export declare const createUIMessage: ({ content, role, id, toolInvocations, }: {
36
- id: string;
37
- role: "user" | "assistant" | "system";
41
+ /**
42
+ * Helper function to create MastraDBMessage objects for tests
43
+ * Supports optional tool invocations for testing tool call scenarios
44
+ */
45
+ export declare function createTestMessage({ content, role, id, toolInvocations, }: {
38
46
  content: string;
47
+ role: 'user' | 'assistant' | 'system';
48
+ id?: string;
39
49
  toolInvocations?: Array<{
40
50
  toolCallId: string;
41
51
  toolName: string;
@@ -43,19 +53,19 @@ export declare const createUIMessage: ({ content, role, id, toolInvocations, }:
43
53
  result: Record<string, any>;
44
54
  state: any;
45
55
  }>;
46
- }) => UIMessage;
47
- export declare const createAgentTestRun: ({ inputMessages, output, rememberedMessages, systemMessages, taggedSystemMessages, runtimeContext, runId, }: {
56
+ }): MastraDBMessage;
57
+ export declare const createAgentTestRun: ({ inputMessages, output, rememberedMessages, systemMessages, taggedSystemMessages, requestContext, runId, }: {
48
58
  inputMessages?: ScorerRunInputForAgent["inputMessages"];
49
59
  output: ScorerRunOutputForAgent;
50
60
  rememberedMessages?: ScorerRunInputForAgent["rememberedMessages"];
51
61
  systemMessages?: ScorerRunInputForAgent["systemMessages"];
52
62
  taggedSystemMessages?: ScorerRunInputForAgent["taggedSystemMessages"];
53
- runtimeContext?: RuntimeContext;
63
+ requestContext?: RequestContext;
54
64
  runId?: string;
55
65
  }) => {
56
66
  input: ScorerRunInputForAgent;
57
67
  output: ScorerRunOutputForAgent;
58
- runtimeContext: RuntimeContext;
68
+ requestContext: RequestContext;
59
69
  runId: string;
60
70
  };
61
71
  export type ToolCallInfo = {
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACzG,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpD,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED,MAAM,MAAM,QAAQ,GAAG;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,aAAa,GACxB,OAAO,MAAM,EACb,QAAQ,MAAM,EACd,oBAAoB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACvC,iBAAiB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACnC,YAOF,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,uBAExE,CAAC;AAEF,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAEhF,CAAC;AAEF,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;CACjC,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,yCAK7B;IACD,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,KAAG,SAQH,CAAC;AAEF,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAuBpH;AAED,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACxG,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC;;;GAGG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CAUlF;AAED,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED,MAAM,MAAM,QAAQ,GAAG;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,aAAa,GACxB,OAAO,MAAM,EACb,QAAQ,MAAM,EACd,oBAAoB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACvC,iBAAiB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACnC,YAOF,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,GAAG,SAGpF,CAAC;AAEF,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAGhF,CAAC;AAEF,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;CACjC,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,EAChC,OAAO,EACP,IAAI,EACJ,EAAmB,EACnB,eAAoB,GACrB,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,GAAG,eAAe,CAoBlB;AAED,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAwBpH;AAED,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC"}
@@ -1,3 +1,3 @@
1
- export { createAgentTestRun, createTestRun, createToolInvocation, createUIMessage, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-KHEXN75Q.js';
1
+ export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-CCLM7KPF.js';
2
2
  //# sourceMappingURL=utils.js.map
3
3
  //# sourceMappingURL=utils.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "0.14.4",
3
+ "version": "1.0.0-beta.1",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "files": [
@@ -29,54 +29,14 @@
29
29
  "default": "./dist/index.cjs"
30
30
  }
31
31
  },
32
- "./judge": {
32
+ "./scorers/prebuilt": {
33
33
  "import": {
34
- "types": "./dist/metrics/judge/index.d.ts",
35
- "default": "./dist/metrics/judge/index.js"
34
+ "types": "./dist/scorers/prebuilt/index.d.ts",
35
+ "default": "./dist/scorers/prebuilt/index.js"
36
36
  },
37
37
  "require": {
38
- "types": "./dist/metrics/judge/index.d.ts",
39
- "default": "./dist/metrics/judge/index.cjs"
40
- }
41
- },
42
- "./nlp": {
43
- "import": {
44
- "types": "./dist/metrics/nlp/index.d.ts",
45
- "default": "./dist/metrics/nlp/index.js"
46
- },
47
- "require": {
48
- "types": "./dist/metrics/nlp/index.d.ts",
49
- "default": "./dist/metrics/nlp/index.cjs"
50
- }
51
- },
52
- "./llm": {
53
- "import": {
54
- "types": "./dist/metrics/llm/index.d.ts",
55
- "default": "./dist/metrics/llm/index.js"
56
- },
57
- "require": {
58
- "types": "./dist/metrics/llm/index.d.ts",
59
- "default": "./dist/metrics/llm/index.cjs"
60
- }
61
- },
62
- "./scorers/llm": {
63
- "import": {
64
- "types": "./dist/scorers/llm/index.d.ts",
65
- "default": "./dist/scorers/llm/index.js"
66
- },
67
- "require": {
68
- "types": "./dist/scorers/llm/index.d.ts",
69
- "default": "./dist/scorers/llm/index.cjs"
70
- }
71
- },
72
- "./scorers/code": {
73
- "import": {
74
- "types": "./dist/scorers/code/index.d.ts",
75
- "default": "./dist/scorers/code/index.js"
76
- },
77
- "require": {
78
- "types": "./dist/scorers/code/index.d.ts",
79
- "default": "./dist/scorers/code/index.cjs"
38
+ "types": "./dist/scorers/prebuilt/index.d.ts",
39
+ "default": "./dist/scorers/prebuilt/index.cjs"
80
40
  }
81
41
  },
82
42
  "./scorers/utils": {
@@ -96,39 +56,35 @@
96
56
  "license": "Apache-2.0",
97
57
  "dependencies": {
98
58
  "compromise": "^14.14.4",
99
- "fs-extra": "^11.3.2",
100
59
  "keyword-extractor": "^0.0.28",
101
60
  "sentiment": "^5.0.2",
102
61
  "string-similarity": "^4.0.4"
103
62
  },
104
63
  "peerDependencies": {
105
- "@mastra/core": ">=0.21.0-0 <0.25.0-0",
64
+ "@mastra/core": ">=1.0.0-0 <2.0.0-0",
106
65
  "ai": "^4.0.0 || ^5.0.0",
107
66
  "zod": "^3.25.0 || ^4.0.0"
108
67
  },
109
68
  "devDependencies": {
110
69
  "@ai-sdk/openai": "^1.3.24",
111
70
  "@microsoft/api-extractor": "^7.52.8",
112
- "@types/fs-extra": "^11.0.4",
113
71
  "@types/sentiment": "^5.0.4",
114
72
  "@types/string-similarity": "^4.0.2",
73
+ "@vitest/coverage-v8": "4.0.8",
74
+ "@vitest/ui": "4.0.8",
115
75
  "ai": "^4.3.19",
116
76
  "dotenv": "^17.0.0",
117
77
  "eslint": "^9.37.0",
118
78
  "tsup": "^8.5.0",
119
79
  "typescript": "^5.8.3",
120
- "vitest": "^3.2.4",
80
+ "vitest": "^4.0.8",
121
81
  "zod": "^3.25.76",
122
- "@internal/lint": "0.0.59",
123
- "@mastra/core": "0.24.1",
124
- "@internal/types-builder": "0.0.34"
82
+ "@internal/lint": "0.0.53",
83
+ "@internal/types-builder": "0.0.28",
84
+ "@mastra/core": "1.0.0-beta.3"
125
85
  },
126
- "publishConfig": {
127
- "access": "public",
128
- "publish-branch": [
129
- "main",
130
- "0.x"
131
- ]
86
+ "engines": {
87
+ "node": ">=22.13.0"
132
88
  },
133
89
  "scripts": {
134
90
  "check": "tsc --noEmit",
@@ -1,4 +0,0 @@
1
- import type { Mastra } from '@mastra/core';
2
- export declare function attachListeners(mastra?: Mastra): Promise<void>;
3
- export declare function globalSetup(): Promise<void>;
4
- //# sourceMappingURL=attachListeners.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"attachListeners.d.ts","sourceRoot":"","sources":["../src/attachListeners.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAO3C,wBAAsB,eAAe,CAAC,MAAM,CAAC,EAAE,MAAM,iBA0BpD;AAED,wBAAsB,WAAW,kBAOhC"}
@@ -1,78 +0,0 @@
1
- // src/ratio.ts
2
- function calculateRatio(input, output) {
3
- if (input === output) {
4
- return 1;
5
- }
6
- if (input.length === 0 || output.length === 0) {
7
- return 0;
8
- }
9
- const matches = longestCommonSubsequence(input, output);
10
- const total = input.length + output.length;
11
- return total > 0 ? 2 * matches / total : 0;
12
- }
13
- function longestCommonSubsequence(str1, str2) {
14
- const m = str1.length;
15
- const n = str2.length;
16
- const dp = [];
17
- for (let i = 0; i <= m; i++) {
18
- dp[i] = [];
19
- for (let j = 0; j <= n; j++) {
20
- dp[i][j] = 0;
21
- }
22
- }
23
- for (let i = 1; i <= m; i++) {
24
- for (let j = 1; j <= n; j++) {
25
- if (str1[i - 1] === str2[j - 1]) {
26
- dp[i][j] = dp[i - 1][j - 1] + 1;
27
- } else {
28
- dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
29
- }
30
- }
31
- }
32
- return dp[m][n];
33
- }
34
- function countChanges(input, output) {
35
- const inputNormalized = input.replace(/\s+/g, " ").trim();
36
- const outputNormalized = output.replace(/\s+/g, " ").trim();
37
- if (inputNormalized === outputNormalized) {
38
- if (input !== output) {
39
- const inputWords2 = input.split(/\s+/).filter((w) => w.length > 0);
40
- const outputWords2 = output.split(/\s+/).filter((w) => w.length > 0);
41
- return Math.abs(inputWords2.length - outputWords2.length) || 1;
42
- }
43
- return 0;
44
- }
45
- const inputWords = inputNormalized.split(/\s+/).filter((w) => w.length > 0);
46
- const outputWords = outputNormalized.split(/\s+/).filter((w) => w.length > 0);
47
- if (inputWords.length === 0 && outputWords.length === 0) {
48
- return 0;
49
- }
50
- if (inputWords.length === 0) {
51
- return outputWords.length;
52
- }
53
- if (outputWords.length === 0) {
54
- return inputWords.length;
55
- }
56
- const matchingWords = findCommonWords(inputWords, outputWords);
57
- const maxLength = Math.max(inputWords.length, outputWords.length);
58
- const changes = maxLength - matchingWords;
59
- return changes;
60
- }
61
- function findCommonWords(arr1, arr2) {
62
- let matches = 0;
63
- const used = /* @__PURE__ */ new Set();
64
- for (let i = 0; i < arr1.length; i++) {
65
- for (let j = 0; j < arr2.length; j++) {
66
- if (!used.has(j) && arr1[i] === arr2[j]) {
67
- matches++;
68
- used.add(j);
69
- break;
70
- }
71
- }
72
- }
73
- return matches;
74
- }
75
-
76
- export { calculateRatio, countChanges };
77
- //# sourceMappingURL=chunk-44PMY5ES.js.map
78
- //# sourceMappingURL=chunk-44PMY5ES.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ratio.ts"],"names":["inputWords","outputWords"],"mappings":";AAKO,SAAS,cAAA,CAAe,OAAe,MAAA,EAAwB;AACpE,EAAA,IAAI,UAAU,MAAA,EAAQ;AACpB,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,IAAI,KAAA,CAAM,MAAA,KAAW,CAAA,IAAK,MAAA,CAAO,WAAW,CAAA,EAAG;AAC7C,IAAA,OAAO,CAAA;AAAA,EACT;AAGA,EAAA,MAAM,OAAA,GAAU,wBAAA,CAAyB,KAAA,EAAO,MAAM,CAAA;AACtD,EAAA,MAAM,KAAA,GAAQ,KAAA,CAAM,MAAA,GAAS,MAAA,CAAO,MAAA;AAEpC,EAAA,OAAO,KAAA,GAAQ,CAAA,GAAK,CAAA,GAAM,OAAA,GAAW,KAAA,GAAQ,CAAA;AAC/C;AAKA,SAAS,wBAAA,CAAyB,MAAc,IAAA,EAAsB;AACpE,EAAA,MAAM,IAAI,IAAA,CAAK,MAAA;AACf,EAAA,MAAM,IAAI,IAAA,CAAK,MAAA;AACf,EAAA,MAAM,KAAiB,EAAC;AAGxB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,IAAA,EAAA,CAAG,CAAC,IAAI,EAAC;AACT,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,MAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAI,CAAA;AAAA,IACd;AAAA,EACF;AAEA,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,MAAA,IAAI,KAAK,CAAA,GAAI,CAAC,MAAM,IAAA,CAAK,CAAA,GAAI,CAAC,CAAA,EAAG;AAC/B,QAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAK,EAAA,CAAG,IAAI,CAAC,CAAA,CAAG,CAAA,GAAI,CAAC,CAAA,GAAK,CAAA;AAAA,MACpC,CAAA,MAAO;AACL,QAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAK,IAAA,CAAK,IAAI,EAAA,CAAG,CAAA,GAAI,CAAC,CAAA,CAAG,CAAC,CAAA,EAAI,EAAA,CAAG,CAAC,CAAA,CAAG,CAAA,GAAI,CAAC,CAAE,CAAA;AAAA,MACtD;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA;AACjB;AAOO,SAAS,YAAA,CAAa,OAAe,MAAA,EAAwB;AAElE,EAAA,MAAM,kBAAkB,KAAA,CAAM,OAAA,CAAQ,MAAA,EAAQ,GAAG,EAAE,IAAA,EAAK;AACxD,EAAA,MAAM,mBAAmB,MAAA,CAAO,OAAA,CAAQ,MAAA,EAAQ,GAAG,EAAE,IAAA,EAAK;AAG1D,EAAA,IAAI,oBAAoB,gBAAA,EAAkB;AAExC,IAAA,IAAI,UAAU,MAAA,EAAQ;AAEpB,MAAA,MAAMA,WAAAA,GAAa,MAAM,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAC9D,MAAA,MAAMC,YAAAA,GAAc,OAAO,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAChE,MAAA,OAAO,KAAK,GAAA,CAAID,WAAAA,CAAW,MAAA,GAASC,YAAAA,CAAY,MAAM,CAAA,IAAK,CAAA;AAAA,IAC7D;AACA,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,MAAM,UAAA,GAAa,gBAAgB,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AACxE,EAAA,MAAM,WAAA,GAAc,iBAAiB,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAE1E,EAAA,IAAI,UAAA,CAAW,MAAA,KAAW,CAAA,IAAK,WAAA,CAAY,WAAW,CAAA,EAAG;AACvD,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,IAAI,UAAA,CAAW,WAAW,CAAA,EAAG;AAC3B,IAAA,OAAO,WAAA,CAAY,MAAA;AAAA,EACrB;AACA,EAAA,IAAI,WAAA,CAAY,WAAW,CAAA,EAAG;AAC5B,IAAA,OAAO,UAAA,CAAW,MAAA;AAAA,EACpB;AAIA,EAAA,MAAM,aAAA,GAAgB,eAAA,CAAgB,UAAA,EAAY,WAAW,CAAA;AAC7D,EAAA,MAAM,YAAY,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,MAAA,EAAQ,YAAY,MAAM,CAAA;AAChE,EAAA,MAAM,UAAU,SAAA,GAAY,aAAA;AAE5B,EAAA,OAAO,OAAA;AACT;AAKA,SAAS,eAAA,CAAgB,MAAgB,IAAA,EAAwB;AAC/D,EAAA,IAAI,OAAA,GAAU,CAAA;AACd,EAAA,MAAM,IAAA,uBAAW,GAAA,EAAY;AAE7B,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,IAAA,CAAK,QAAQ,CAAA,EAAA,EAAK;AACpC,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,IAAA,CAAK,QAAQ,CAAA,EAAA,EAAK;AACpC,MAAA,IAAI,CAAC,IAAA,CAAK,GAAA,CAAI,CAAC,CAAA,IAAK,KAAK,CAAC,CAAA,KAAM,IAAA,CAAK,CAAC,CAAA,EAAG;AACvC,QAAA,OAAA,EAAA;AACA,QAAA,IAAA,CAAK,IAAI,CAAC,CAAA;AACV,QAAA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT","file":"chunk-44PMY5ES.js","sourcesContent":["/**\n * Calculates similarity ratio similar to SequenceMatcher.ratio()\n * Uses longest common subsequence (LCS) approach\n * Ratio = 2.0 * matches / total\n */\nexport function calculateRatio(input: string, output: string): number {\n if (input === output) {\n return 1.0;\n }\n if (input.length === 0 || output.length === 0) {\n return 0.0;\n }\n\n // Use character-level LCS for more accurate matching (similar to SequenceMatcher)\n const matches = longestCommonSubsequence(input, output);\n const total = input.length + output.length;\n\n return total > 0 ? (2.0 * matches) / total : 0.0;\n}\n\n/**\n * Finds the length of the longest common subsequence between two strings\n */\nfunction longestCommonSubsequence(str1: string, str2: string): number {\n const m = str1.length;\n const n = str2.length;\n const dp: number[][] = [];\n\n // Initialize DP table\n for (let i = 0; i <= m; i++) {\n dp[i] = [];\n for (let j = 0; j <= n; j++) {\n dp[i]![j] = 0;\n }\n }\n\n for (let i = 1; i <= m; i++) {\n for (let j = 1; j <= n; j++) {\n if (str1[i - 1] === str2[j - 1]) {\n dp[i]![j]! = dp[i - 1]![j - 1]! + 1;\n } else {\n dp[i]![j]! = Math.max(dp[i - 1]![j]!, dp[i]![j - 1]!);\n }\n }\n }\n\n return dp[m]![n]!;\n}\n\n/**\n * Counts the number of differences between two strings\n * Uses opcodes-like approach: counts insertions, deletions, and replacements\n * For whitespace differences, preserves the original strings before word splitting\n */\nexport function countChanges(input: string, output: string): number {\n // Normalize whitespace for comparison but preserve original for change detection\n const inputNormalized = input.replace(/\\s+/g, ' ').trim();\n const outputNormalized = output.replace(/\\s+/g, ' ').trim();\n\n // If normalized strings are identical, check if there are whitespace differences\n if (inputNormalized === outputNormalized) {\n // If original strings differ only in whitespace, count that as a change\n if (input !== output) {\n // Count whitespace differences\n const inputWords = input.split(/\\s+/).filter(w => w.length > 0);\n const outputWords = output.split(/\\s+/).filter(w => w.length > 0);\n return Math.abs(inputWords.length - outputWords.length) || 1;\n }\n return 0;\n }\n\n const inputWords = inputNormalized.split(/\\s+/).filter(w => w.length > 0);\n const outputWords = outputNormalized.split(/\\s+/).filter(w => w.length > 0);\n\n if (inputWords.length === 0 && outputWords.length === 0) {\n return 0;\n }\n if (inputWords.length === 0) {\n return outputWords.length;\n }\n if (outputWords.length === 0) {\n return inputWords.length;\n }\n\n // Use LCS approach: changes = total - 2 * matches\n // But for word-level, we want to count replacements as single changes\n const matchingWords = findCommonWords(inputWords, outputWords);\n const maxLength = Math.max(inputWords.length, outputWords.length);\n const changes = maxLength - matchingWords;\n\n return changes;\n}\n\n/**\n * Finds the number of common words between two arrays using a greedy matching approach\n */\nfunction findCommonWords(arr1: string[], arr2: string[]): number {\n let matches = 0;\n const used = new Set<number>();\n\n for (let i = 0; i < arr1.length; i++) {\n for (let j = 0; j < arr2.length; j++) {\n if (!used.has(j) && arr1[i] === arr2[j]) {\n matches++;\n used.add(j);\n break;\n }\n }\n }\n\n return matches;\n}\n"]}
@@ -1,10 +0,0 @@
1
- 'use strict';
2
-
3
- // src/metrics/llm/utils.ts
4
- var roundToTwoDecimals = (num) => {
5
- return Math.round((num + Number.EPSILON) * 100) / 100;
6
- };
7
-
8
- exports.roundToTwoDecimals = roundToTwoDecimals;
9
- //# sourceMappingURL=chunk-7QAUEU4L.cjs.map
10
- //# sourceMappingURL=chunk-7QAUEU4L.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/metrics/llm/utils.ts"],"names":[],"mappings":";;;AAAO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD","file":"chunk-7QAUEU4L.cjs","sourcesContent":["export const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n"]}
@@ -1,37 +0,0 @@
1
- 'use strict';
2
-
3
- var __create = Object.create;
4
- var __defProp = Object.defineProperty;
5
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
- var __getOwnPropNames = Object.getOwnPropertyNames;
7
- var __getProtoOf = Object.getPrototypeOf;
8
- var __hasOwnProp = Object.prototype.hasOwnProperty;
9
- var __commonJS = (cb, mod) => function __require() {
10
- return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
11
- };
12
- var __export = (target, all) => {
13
- for (var name in all)
14
- __defProp(target, name, { get: all[name], enumerable: true });
15
- };
16
- var __copyProps = (to, from, except, desc) => {
17
- if (from && typeof from === "object" || typeof from === "function") {
18
- for (let key of __getOwnPropNames(from))
19
- if (!__hasOwnProp.call(to, key) && key !== except)
20
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
21
- }
22
- return to;
23
- };
24
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
25
- // If the importer is in node compatibility mode or this is not an ESM
26
- // file that has been converted to a CommonJS file using a Babel-
27
- // compatible transform (i.e. "__esModule" has not been set), then set
28
- // "default" to the CommonJS "module.exports" for node compatibility.
29
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
30
- mod
31
- ));
32
-
33
- exports.__commonJS = __commonJS;
34
- exports.__export = __export;
35
- exports.__toESM = __toESM;
36
- //# sourceMappingURL=chunk-EMMSS5I5.cjs.map
37
- //# sourceMappingURL=chunk-EMMSS5I5.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":[],"names":[],"mappings":"","file":"chunk-EMMSS5I5.cjs"}
@@ -1,33 +0,0 @@
1
- var __create = Object.create;
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __getProtoOf = Object.getPrototypeOf;
6
- var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __commonJS = (cb, mod) => function __require() {
8
- return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
9
- };
10
- var __export = (target, all) => {
11
- for (var name in all)
12
- __defProp(target, name, { get: all[name], enumerable: true });
13
- };
14
- var __copyProps = (to, from, except, desc) => {
15
- if (from && typeof from === "object" || typeof from === "function") {
16
- for (let key of __getOwnPropNames(from))
17
- if (!__hasOwnProp.call(to, key) && key !== except)
18
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
19
- }
20
- return to;
21
- };
22
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
23
- // If the importer is in node compatibility mode or this is not an ESM
24
- // file that has been converted to a CommonJS file using a Babel-
25
- // compatible transform (i.e. "__esModule" has not been set), then set
26
- // "default" to the CommonJS "module.exports" for node compatibility.
27
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
28
- mod
29
- ));
30
-
31
- export { __commonJS, __export, __toESM };
32
- //# sourceMappingURL=chunk-G3PMV62Z.js.map
33
- //# sourceMappingURL=chunk-G3PMV62Z.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":[],"names":[],"mappings":"","file":"chunk-G3PMV62Z.js"}
@@ -1,19 +0,0 @@
1
- 'use strict';
2
-
3
- var agent = require('@mastra/core/agent');
4
-
5
- // src/metrics/judge/index.ts
6
- var MastraAgentJudge = class {
7
- agent;
8
- constructor(name, instructions, model) {
9
- this.agent = new agent.Agent({
10
- name: `Mastra Eval Judge ${name}`,
11
- instructions,
12
- model
13
- });
14
- }
15
- };
16
-
17
- exports.MastraAgentJudge = MastraAgentJudge;
18
- //# sourceMappingURL=chunk-IUSAD2BW.cjs.map
19
- //# sourceMappingURL=chunk-IUSAD2BW.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/metrics/judge/index.ts"],"names":["Agent"],"mappings":";;;;;AAGO,IAAe,mBAAf,MAAgC;AAAA,EAClB,KAAA;AAAA,EAEnB,WAAA,CAAY,IAAA,EAAc,YAAA,EAAsB,KAAA,EAAsB;AACpE,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAIA,WAAA,CAAM;AAAA,MACrB,IAAA,EAAM,qBAAqB,IAAI,CAAA,CAAA;AAAA,MAC/B,YAAA;AAAA,MACA;AAAA,KACD,CAAA;AAAA,EACH;AACF","file":"chunk-IUSAD2BW.cjs","sourcesContent":["import { Agent } from '@mastra/core/agent';\nimport type { LanguageModel } from '@mastra/core/llm';\n\nexport abstract class MastraAgentJudge {\n protected readonly agent: Agent;\n\n constructor(name: string, instructions: string, model: LanguageModel) {\n this.agent = new Agent({\n name: `Mastra Eval Judge ${name}`,\n instructions: instructions,\n model,\n });\n }\n}\n"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AAIO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAmC;AAC5E,EAAA,OAAO,KAAA,EAAO,cAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA,EAAG,OAAA;AACnE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,OAAO,MAAA,EAAQ,KAAK,CAAC,EAAE,MAAK,KAAM,IAAA,KAAS,WAAW,CAAA,EAAG,OAAA;AAC3D;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAEO,IAAM,kBAAkB,CAAC;AAAA,EAC9B,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,KAWiB;AACf,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA;AAAA,IACA,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,IACvC;AAAA,GACF;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AACnC,IAAA,IAAI,SAAS,eAAA,EAAiB;AAC5B,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACjG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAC1D,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,UAAU,aAAA,EAAe,GAAA,CAAI,SAAO,GAAA,CAAI,OAAO,KAAK,EAAC;AAC9D;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,GAAA,CAAI,OAAO,CAAA;AACjF","file":"chunk-KHEXN75Q.js","sourcesContent":["import { RuntimeContext } from '@mastra/core/runtime-context';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/scores';\nimport type { ToolInvocation, UIMessage } from 'ai';\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n runtimeContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n runtimeContext: runtimeContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent) => {\n return input?.inputMessages.find(({ role }) => role === 'user')?.content;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n return output?.find(({ role }) => role === 'assistant')?.content;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\nexport const createUIMessage = ({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n id: string;\n role: 'user' | 'assistant' | 'system';\n content: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): UIMessage => {\n return {\n id,\n role,\n content,\n parts: [{ type: 'text', text: content }],\n toolInvocations,\n };\n};\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n runtimeContext = new RuntimeContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n runtimeContext?: RuntimeContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n runtimeContext: RuntimeContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n runtimeContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n if (message?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {\n const invocation = message.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => msg.content) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => msg.content);\n};\n"]}
@@ -1,81 +0,0 @@
1
- 'use strict';
2
-
3
- // src/ratio.ts
4
- function calculateRatio(input, output) {
5
- if (input === output) {
6
- return 1;
7
- }
8
- if (input.length === 0 || output.length === 0) {
9
- return 0;
10
- }
11
- const matches = longestCommonSubsequence(input, output);
12
- const total = input.length + output.length;
13
- return total > 0 ? 2 * matches / total : 0;
14
- }
15
- function longestCommonSubsequence(str1, str2) {
16
- const m = str1.length;
17
- const n = str2.length;
18
- const dp = [];
19
- for (let i = 0; i <= m; i++) {
20
- dp[i] = [];
21
- for (let j = 0; j <= n; j++) {
22
- dp[i][j] = 0;
23
- }
24
- }
25
- for (let i = 1; i <= m; i++) {
26
- for (let j = 1; j <= n; j++) {
27
- if (str1[i - 1] === str2[j - 1]) {
28
- dp[i][j] = dp[i - 1][j - 1] + 1;
29
- } else {
30
- dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
31
- }
32
- }
33
- }
34
- return dp[m][n];
35
- }
36
- function countChanges(input, output) {
37
- const inputNormalized = input.replace(/\s+/g, " ").trim();
38
- const outputNormalized = output.replace(/\s+/g, " ").trim();
39
- if (inputNormalized === outputNormalized) {
40
- if (input !== output) {
41
- const inputWords2 = input.split(/\s+/).filter((w) => w.length > 0);
42
- const outputWords2 = output.split(/\s+/).filter((w) => w.length > 0);
43
- return Math.abs(inputWords2.length - outputWords2.length) || 1;
44
- }
45
- return 0;
46
- }
47
- const inputWords = inputNormalized.split(/\s+/).filter((w) => w.length > 0);
48
- const outputWords = outputNormalized.split(/\s+/).filter((w) => w.length > 0);
49
- if (inputWords.length === 0 && outputWords.length === 0) {
50
- return 0;
51
- }
52
- if (inputWords.length === 0) {
53
- return outputWords.length;
54
- }
55
- if (outputWords.length === 0) {
56
- return inputWords.length;
57
- }
58
- const matchingWords = findCommonWords(inputWords, outputWords);
59
- const maxLength = Math.max(inputWords.length, outputWords.length);
60
- const changes = maxLength - matchingWords;
61
- return changes;
62
- }
63
- function findCommonWords(arr1, arr2) {
64
- let matches = 0;
65
- const used = /* @__PURE__ */ new Set();
66
- for (let i = 0; i < arr1.length; i++) {
67
- for (let j = 0; j < arr2.length; j++) {
68
- if (!used.has(j) && arr1[i] === arr2[j]) {
69
- matches++;
70
- used.add(j);
71
- break;
72
- }
73
- }
74
- }
75
- return matches;
76
- }
77
-
78
- exports.calculateRatio = calculateRatio;
79
- exports.countChanges = countChanges;
80
- //# sourceMappingURL=chunk-PWGOG6ML.cjs.map
81
- //# sourceMappingURL=chunk-PWGOG6ML.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ratio.ts"],"names":["inputWords","outputWords"],"mappings":";;;AAKO,SAAS,cAAA,CAAe,OAAe,MAAA,EAAwB;AACpE,EAAA,IAAI,UAAU,MAAA,EAAQ;AACpB,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,IAAI,KAAA,CAAM,MAAA,KAAW,CAAA,IAAK,MAAA,CAAO,WAAW,CAAA,EAAG;AAC7C,IAAA,OAAO,CAAA;AAAA,EACT;AAGA,EAAA,MAAM,OAAA,GAAU,wBAAA,CAAyB,KAAA,EAAO,MAAM,CAAA;AACtD,EAAA,MAAM,KAAA,GAAQ,KAAA,CAAM,MAAA,GAAS,MAAA,CAAO,MAAA;AAEpC,EAAA,OAAO,KAAA,GAAQ,CAAA,GAAK,CAAA,GAAM,OAAA,GAAW,KAAA,GAAQ,CAAA;AAC/C;AAKA,SAAS,wBAAA,CAAyB,MAAc,IAAA,EAAsB;AACpE,EAAA,MAAM,IAAI,IAAA,CAAK,MAAA;AACf,EAAA,MAAM,IAAI,IAAA,CAAK,MAAA;AACf,EAAA,MAAM,KAAiB,EAAC;AAGxB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,IAAA,EAAA,CAAG,CAAC,IAAI,EAAC;AACT,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,MAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAI,CAAA;AAAA,IACd;AAAA,EACF;AAEA,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,CAAA,EAAG,CAAA,EAAA,EAAK;AAC3B,MAAA,IAAI,KAAK,CAAA,GAAI,CAAC,MAAM,IAAA,CAAK,CAAA,GAAI,CAAC,CAAA,EAAG;AAC/B,QAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAK,EAAA,CAAG,IAAI,CAAC,CAAA,CAAG,CAAA,GAAI,CAAC,CAAA,GAAK,CAAA;AAAA,MACpC,CAAA,MAAO;AACL,QAAA,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA,GAAK,IAAA,CAAK,IAAI,EAAA,CAAG,CAAA,GAAI,CAAC,CAAA,CAAG,CAAC,CAAA,EAAI,EAAA,CAAG,CAAC,CAAA,CAAG,CAAA,GAAI,CAAC,CAAE,CAAA;AAAA,MACtD;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAA,CAAG,CAAC,CAAA,CAAG,CAAC,CAAA;AACjB;AAOO,SAAS,YAAA,CAAa,OAAe,MAAA,EAAwB;AAElE,EAAA,MAAM,kBAAkB,KAAA,CAAM,OAAA,CAAQ,MAAA,EAAQ,GAAG,EAAE,IAAA,EAAK;AACxD,EAAA,MAAM,mBAAmB,MAAA,CAAO,OAAA,CAAQ,MAAA,EAAQ,GAAG,EAAE,IAAA,EAAK;AAG1D,EAAA,IAAI,oBAAoB,gBAAA,EAAkB;AAExC,IAAA,IAAI,UAAU,MAAA,EAAQ;AAEpB,MAAA,MAAMA,WAAAA,GAAa,MAAM,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAC9D,MAAA,MAAMC,YAAAA,GAAc,OAAO,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAChE,MAAA,OAAO,KAAK,GAAA,CAAID,WAAAA,CAAW,MAAA,GAASC,YAAAA,CAAY,MAAM,CAAA,IAAK,CAAA;AAAA,IAC7D;AACA,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,MAAM,UAAA,GAAa,gBAAgB,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AACxE,EAAA,MAAM,WAAA,GAAc,iBAAiB,KAAA,CAAM,KAAK,EAAE,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,MAAA,GAAS,CAAC,CAAA;AAE1E,EAAA,IAAI,UAAA,CAAW,MAAA,KAAW,CAAA,IAAK,WAAA,CAAY,WAAW,CAAA,EAAG;AACvD,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,IAAI,UAAA,CAAW,WAAW,CAAA,EAAG;AAC3B,IAAA,OAAO,WAAA,CAAY,MAAA;AAAA,EACrB;AACA,EAAA,IAAI,WAAA,CAAY,WAAW,CAAA,EAAG;AAC5B,IAAA,OAAO,UAAA,CAAW,MAAA;AAAA,EACpB;AAIA,EAAA,MAAM,aAAA,GAAgB,eAAA,CAAgB,UAAA,EAAY,WAAW,CAAA;AAC7D,EAAA,MAAM,YAAY,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,MAAA,EAAQ,YAAY,MAAM,CAAA;AAChE,EAAA,MAAM,UAAU,SAAA,GAAY,aAAA;AAE5B,EAAA,OAAO,OAAA;AACT;AAKA,SAAS,eAAA,CAAgB,MAAgB,IAAA,EAAwB;AAC/D,EAAA,IAAI,OAAA,GAAU,CAAA;AACd,EAAA,MAAM,IAAA,uBAAW,GAAA,EAAY;AAE7B,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,IAAA,CAAK,QAAQ,CAAA,EAAA,EAAK;AACpC,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,IAAA,CAAK,QAAQ,CAAA,EAAA,EAAK;AACpC,MAAA,IAAI,CAAC,IAAA,CAAK,GAAA,CAAI,CAAC,CAAA,IAAK,KAAK,CAAC,CAAA,KAAM,IAAA,CAAK,CAAC,CAAA,EAAG;AACvC,QAAA,OAAA,EAAA;AACA,QAAA,IAAA,CAAK,IAAI,CAAC,CAAA;AACV,QAAA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT","file":"chunk-PWGOG6ML.cjs","sourcesContent":["/**\n * Calculates similarity ratio similar to SequenceMatcher.ratio()\n * Uses longest common subsequence (LCS) approach\n * Ratio = 2.0 * matches / total\n */\nexport function calculateRatio(input: string, output: string): number {\n if (input === output) {\n return 1.0;\n }\n if (input.length === 0 || output.length === 0) {\n return 0.0;\n }\n\n // Use character-level LCS for more accurate matching (similar to SequenceMatcher)\n const matches = longestCommonSubsequence(input, output);\n const total = input.length + output.length;\n\n return total > 0 ? (2.0 * matches) / total : 0.0;\n}\n\n/**\n * Finds the length of the longest common subsequence between two strings\n */\nfunction longestCommonSubsequence(str1: string, str2: string): number {\n const m = str1.length;\n const n = str2.length;\n const dp: number[][] = [];\n\n // Initialize DP table\n for (let i = 0; i <= m; i++) {\n dp[i] = [];\n for (let j = 0; j <= n; j++) {\n dp[i]![j] = 0;\n }\n }\n\n for (let i = 1; i <= m; i++) {\n for (let j = 1; j <= n; j++) {\n if (str1[i - 1] === str2[j - 1]) {\n dp[i]![j]! = dp[i - 1]![j - 1]! + 1;\n } else {\n dp[i]![j]! = Math.max(dp[i - 1]![j]!, dp[i]![j - 1]!);\n }\n }\n }\n\n return dp[m]![n]!;\n}\n\n/**\n * Counts the number of differences between two strings\n * Uses opcodes-like approach: counts insertions, deletions, and replacements\n * For whitespace differences, preserves the original strings before word splitting\n */\nexport function countChanges(input: string, output: string): number {\n // Normalize whitespace for comparison but preserve original for change detection\n const inputNormalized = input.replace(/\\s+/g, ' ').trim();\n const outputNormalized = output.replace(/\\s+/g, ' ').trim();\n\n // If normalized strings are identical, check if there are whitespace differences\n if (inputNormalized === outputNormalized) {\n // If original strings differ only in whitespace, count that as a change\n if (input !== output) {\n // Count whitespace differences\n const inputWords = input.split(/\\s+/).filter(w => w.length > 0);\n const outputWords = output.split(/\\s+/).filter(w => w.length > 0);\n return Math.abs(inputWords.length - outputWords.length) || 1;\n }\n return 0;\n }\n\n const inputWords = inputNormalized.split(/\\s+/).filter(w => w.length > 0);\n const outputWords = outputNormalized.split(/\\s+/).filter(w => w.length > 0);\n\n if (inputWords.length === 0 && outputWords.length === 0) {\n return 0;\n }\n if (inputWords.length === 0) {\n return outputWords.length;\n }\n if (outputWords.length === 0) {\n return inputWords.length;\n }\n\n // Use LCS approach: changes = total - 2 * matches\n // But for word-level, we want to count replacements as single changes\n const matchingWords = findCommonWords(inputWords, outputWords);\n const maxLength = Math.max(inputWords.length, outputWords.length);\n const changes = maxLength - matchingWords;\n\n return changes;\n}\n\n/**\n * Finds the number of common words between two arrays using a greedy matching approach\n */\nfunction findCommonWords(arr1: string[], arr2: string[]): number {\n let matches = 0;\n const used = new Set<number>();\n\n for (let i = 0; i < arr1.length; i++) {\n for (let j = 0; j < arr2.length; j++) {\n if (!used.has(j) && arr1[i] === arr2[j]) {\n matches++;\n used.add(j);\n break;\n }\n }\n }\n\n return matches;\n}\n"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/scorers/utils.ts"],"names":["runtimeContext","RuntimeContext"],"mappings":";;;;;AAIO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAEO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AAeO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAEO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAmC;AAC5E,EAAA,OAAO,KAAA,EAAO,cAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA,EAAG,OAAA;AACnE;AAEO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,GAAA,CAAI,OAAA,CACR,MAAA,CAAO,CAAA,IAAA,KAAQ,KAAK,IAAA,KAAS,MAAM,CAAA,CACnC,GAAA,CAAI,UAAQ,IAAA,CAAK,IAAA,IAAQ,EAAE,CAAA,CAC3B,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAEO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,OAAO,MAAA,EAAQ,KAAK,CAAC,EAAE,MAAK,KAAM,IAAA,KAAS,WAAW,CAAA,EAAG,OAAA;AAC3D;AAEO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAEO,IAAM,kBAAkB,CAAC;AAAA,EAC9B,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,KAWiB;AACf,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA;AAAA,IACA,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,IACvC;AAAA,GACF;AACF;AAEO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,kBACxBA,gBAAA,GAAiB,IAAIC,6BAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,oBACAD,gBAAA;AAAA,IACA;AAAA,GACF;AACF;AASO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AACnC,IAAA,IAAI,SAAS,eAAA,EAAiB;AAC5B,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACjG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAC1D,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAEO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,UAAU,aAAA,EAAe,GAAA,CAAI,SAAO,GAAA,CAAI,OAAO,KAAK,EAAC;AAC9D;AAEO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,GAAA,CAAI,OAAO,CAAA;AACjF","file":"chunk-QKR2PMLZ.cjs","sourcesContent":["import { RuntimeContext } from '@mastra/core/runtime-context';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/scores';\nimport type { ToolInvocation, UIMessage } from 'ai';\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n runtimeContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n runtimeContext: runtimeContext ?? {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent) => {\n return input?.inputMessages.find(({ role }) => role === 'user')?.content;\n};\n\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter(part => part.type === 'text')\n .map(part => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n return output?.find(({ role }) => role === 'assistant')?.content;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\nexport const createUIMessage = ({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n id: string;\n role: 'user' | 'assistant' | 'system';\n content: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): UIMessage => {\n return {\n id,\n role,\n content,\n parts: [{ type: 'text', text: content }],\n toolInvocations,\n };\n};\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n runtimeContext = new RuntimeContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n runtimeContext?: RuntimeContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n runtimeContext: RuntimeContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n runtimeContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n if (message?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {\n const invocation = message.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => msg.content) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => msg.content);\n};\n"]}
@@ -1,8 +0,0 @@
1
- // src/metrics/llm/utils.ts
2
- var roundToTwoDecimals = (num) => {
3
- return Math.round((num + Number.EPSILON) * 100) / 100;
4
- };
5
-
6
- export { roundToTwoDecimals };
7
- //# sourceMappingURL=chunk-QTWX6TKR.js.map
8
- //# sourceMappingURL=chunk-QTWX6TKR.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/metrics/llm/utils.ts"],"names":[],"mappings":";AAAO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD","file":"chunk-QTWX6TKR.js","sourcesContent":["export const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n"]}
@@ -1,17 +0,0 @@
1
- import { Agent } from '@mastra/core/agent';
2
-
3
- // src/metrics/judge/index.ts
4
- var MastraAgentJudge = class {
5
- agent;
6
- constructor(name, instructions, model) {
7
- this.agent = new Agent({
8
- name: `Mastra Eval Judge ${name}`,
9
- instructions,
10
- model
11
- });
12
- }
13
- };
14
-
15
- export { MastraAgentJudge };
16
- //# sourceMappingURL=chunk-YGTIO3J5.js.map
17
- //# sourceMappingURL=chunk-YGTIO3J5.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/metrics/judge/index.ts"],"names":[],"mappings":";;;AAGO,IAAe,mBAAf,MAAgC;AAAA,EAClB,KAAA;AAAA,EAEnB,WAAA,CAAY,IAAA,EAAc,YAAA,EAAsB,KAAA,EAAsB;AACpE,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAI,KAAA,CAAM;AAAA,MACrB,IAAA,EAAM,qBAAqB,IAAI,CAAA,CAAA;AAAA,MAC/B,YAAA;AAAA,MACA;AAAA,KACD,CAAA;AAAA,EACH;AACF","file":"chunk-YGTIO3J5.js","sourcesContent":["import { Agent } from '@mastra/core/agent';\nimport type { LanguageModel } from '@mastra/core/llm';\n\nexport abstract class MastraAgentJudge {\n protected readonly agent: Agent;\n\n constructor(name: string, instructions: string, model: LanguageModel) {\n this.agent = new Agent({\n name: `Mastra Eval Judge ${name}`,\n instructions: instructions,\n model,\n });\n }\n}\n"]}