@mastra/evals 0.14.4 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/CHANGELOG.md +34 -25
  2. package/README.md +19 -159
  3. package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
  4. package/dist/chunk-CCLM7KPF.js.map +1 -0
  5. package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
  6. package/dist/chunk-TPQLLHZW.cjs.map +1 -0
  7. package/dist/scorers/code/completeness/index.d.ts +1 -1
  8. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  9. package/dist/scorers/code/content-similarity/index.d.ts +1 -1
  10. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  11. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
  12. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  13. package/dist/scorers/code/textual-difference/index.d.ts +1 -1
  14. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  15. package/dist/scorers/code/tone/index.d.ts +1 -1
  16. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
  18. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
  20. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  21. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  22. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/bias/index.d.ts +2 -2
  24. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-precision/index.d.ts +3 -3
  26. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
  28. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
  30. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  32. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  33. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  34. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  35. package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
  36. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +2 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
  42. package/dist/scorers/prebuilt/index.cjs.map +1 -0
  43. package/dist/scorers/prebuilt/index.d.ts +3 -0
  44. package/dist/scorers/prebuilt/index.d.ts.map +1 -0
  45. package/dist/scorers/{llm → prebuilt}/index.js +419 -15
  46. package/dist/scorers/prebuilt/index.js.map +1 -0
  47. package/dist/scorers/utils.cjs +21 -17
  48. package/dist/scorers/utils.d.ts +21 -11
  49. package/dist/scorers/utils.d.ts.map +1 -1
  50. package/dist/scorers/utils.js +1 -1
  51. package/package.json +15 -59
  52. package/dist/attachListeners.d.ts +0 -4
  53. package/dist/attachListeners.d.ts.map +0 -1
  54. package/dist/chunk-44PMY5ES.js +0 -78
  55. package/dist/chunk-44PMY5ES.js.map +0 -1
  56. package/dist/chunk-7QAUEU4L.cjs +0 -10
  57. package/dist/chunk-7QAUEU4L.cjs.map +0 -1
  58. package/dist/chunk-EMMSS5I5.cjs +0 -37
  59. package/dist/chunk-EMMSS5I5.cjs.map +0 -1
  60. package/dist/chunk-G3PMV62Z.js +0 -33
  61. package/dist/chunk-G3PMV62Z.js.map +0 -1
  62. package/dist/chunk-IUSAD2BW.cjs +0 -19
  63. package/dist/chunk-IUSAD2BW.cjs.map +0 -1
  64. package/dist/chunk-KHEXN75Q.js.map +0 -1
  65. package/dist/chunk-PWGOG6ML.cjs +0 -81
  66. package/dist/chunk-PWGOG6ML.cjs.map +0 -1
  67. package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
  68. package/dist/chunk-QTWX6TKR.js +0 -8
  69. package/dist/chunk-QTWX6TKR.js.map +0 -1
  70. package/dist/chunk-YGTIO3J5.js +0 -17
  71. package/dist/chunk-YGTIO3J5.js.map +0 -1
  72. package/dist/dist-LDTK3TIP.cjs +0 -16759
  73. package/dist/dist-LDTK3TIP.cjs.map +0 -1
  74. package/dist/dist-OWYZEOJK.js +0 -16737
  75. package/dist/dist-OWYZEOJK.js.map +0 -1
  76. package/dist/evaluation.d.ts +0 -8
  77. package/dist/evaluation.d.ts.map +0 -1
  78. package/dist/index.cjs +0 -93
  79. package/dist/index.cjs.map +0 -1
  80. package/dist/index.d.ts +0 -3
  81. package/dist/index.d.ts.map +0 -1
  82. package/dist/index.js +0 -89
  83. package/dist/index.js.map +0 -1
  84. package/dist/magic-string.es-7ORA5OGR.js +0 -1305
  85. package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
  86. package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
  87. package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
  88. package/dist/metrics/index.d.ts +0 -4
  89. package/dist/metrics/index.d.ts.map +0 -1
  90. package/dist/metrics/judge/index.cjs +0 -12
  91. package/dist/metrics/judge/index.cjs.map +0 -1
  92. package/dist/metrics/judge/index.d.ts +0 -7
  93. package/dist/metrics/judge/index.d.ts.map +0 -1
  94. package/dist/metrics/judge/index.js +0 -3
  95. package/dist/metrics/judge/index.js.map +0 -1
  96. package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
  97. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
  98. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
  99. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
  100. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
  101. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
  102. package/dist/metrics/llm/bias/index.d.ts +0 -14
  103. package/dist/metrics/llm/bias/index.d.ts.map +0 -1
  104. package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
  105. package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
  106. package/dist/metrics/llm/bias/prompts.d.ts +0 -14
  107. package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
  108. package/dist/metrics/llm/context-position/index.d.ts +0 -16
  109. package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
  110. package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
  111. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
  112. package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
  113. package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
  114. package/dist/metrics/llm/context-precision/index.d.ts +0 -16
  115. package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
  116. package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
  117. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
  118. package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
  119. package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
  120. package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
  121. package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
  122. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
  123. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
  124. package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
  125. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
  126. package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
  127. package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
  128. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
  129. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
  130. package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
  131. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
  132. package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
  133. package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
  134. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
  135. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
  136. package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
  137. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
  138. package/dist/metrics/llm/hallucination/index.d.ts +0 -16
  139. package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
  140. package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
  141. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
  142. package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
  143. package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
  144. package/dist/metrics/llm/index.cjs +0 -2481
  145. package/dist/metrics/llm/index.cjs.map +0 -1
  146. package/dist/metrics/llm/index.d.ts +0 -12
  147. package/dist/metrics/llm/index.d.ts.map +0 -1
  148. package/dist/metrics/llm/index.js +0 -2469
  149. package/dist/metrics/llm/index.js.map +0 -1
  150. package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
  151. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
  152. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
  153. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
  154. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
  155. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
  156. package/dist/metrics/llm/summarization/index.d.ts +0 -19
  157. package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
  158. package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
  159. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
  160. package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
  161. package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
  162. package/dist/metrics/llm/toxicity/index.d.ts +0 -14
  163. package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
  164. package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
  165. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
  166. package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
  167. package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
  168. package/dist/metrics/llm/types.d.ts +0 -7
  169. package/dist/metrics/llm/types.d.ts.map +0 -1
  170. package/dist/metrics/llm/utils.d.ts +0 -14
  171. package/dist/metrics/llm/utils.d.ts.map +0 -1
  172. package/dist/metrics/nlp/completeness/index.d.ts +0 -21
  173. package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
  174. package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
  175. package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
  176. package/dist/metrics/nlp/index.cjs +0 -201
  177. package/dist/metrics/nlp/index.cjs.map +0 -1
  178. package/dist/metrics/nlp/index.d.ts +0 -6
  179. package/dist/metrics/nlp/index.d.ts.map +0 -1
  180. package/dist/metrics/nlp/index.js +0 -188
  181. package/dist/metrics/nlp/index.js.map +0 -1
  182. package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
  183. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
  184. package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
  185. package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
  186. package/dist/metrics/nlp/tone/index.d.ts +0 -18
  187. package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
  188. package/dist/ratio.d.ts +0 -13
  189. package/dist/ratio.d.ts.map +0 -1
  190. package/dist/scorers/code/index.cjs +0 -327
  191. package/dist/scorers/code/index.cjs.map +0 -1
  192. package/dist/scorers/code/index.js +0 -313
  193. package/dist/scorers/code/index.js.map +0 -1
  194. package/dist/scorers/llm/index.cjs.map +0 -1
  195. package/dist/scorers/llm/index.js.map +0 -1
@@ -1,16 +0,0 @@
1
- import type { LanguageModel } from '@mastra/core/llm';
2
- import { MastraAgentJudge } from '../../judge/index.js';
3
- export declare class ContextRelevancyJudge extends MastraAgentJudge {
4
- constructor(model: LanguageModel);
5
- evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
6
- verdict: string;
7
- reason: string;
8
- }[]>;
9
- getReason(args: {
10
- score: number;
11
- input: string;
12
- irrelevancies: string[];
13
- relevantStatements: string[];
14
- }): Promise<string>;
15
- }
16
- //# sourceMappingURL=metricJudge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metricJudge.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/context-relevancy/metricJudge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAI/C,qBAAa,qBAAsB,SAAQ,gBAAgB;gBAC7C,KAAK,EAAE,aAAa;IAI1B,QAAQ,CACZ,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,MAAM,EAAE,GACzB,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAoB3C,SAAS,CAAC,IAAI,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,EAAE,CAAC;QACxB,kBAAkB,EAAE,MAAM,EAAE,CAAC;KAC9B,GAAG,OAAO,CAAC,MAAM,CAAC;CASpB"}
@@ -1,13 +0,0 @@
1
- export declare const CONTEXT_RELEVANCY_AGENT_INSTRUCTIONS = "You are a balanced and nuanced context relevancy evaluator. Your job is to determine if retrieved context nodes are overall relevant to given input.\n\nKey Principles:\n1. Evaluate whether each context node was useful in generating the given input\n2. Consider all forms of relevance:\n - Direct definitions or explanations\n - Supporting evidence or examples\n - Related characteristics or behaviors\n - Real-world applications or effects\n3. Prioritize usefulness over completeness\n4. Recognize that some nodes may be partially relevant\n5. Empty or error nodes should be marked as not relevant";
2
- export declare function generateEvaluatePrompt({ input, output, context, }: {
3
- input: string;
4
- output: string;
5
- context: string[];
6
- }): string;
7
- export declare function generateReasonPrompt({ score, input, irrelevancies, relevantStatements, }: {
8
- score: number;
9
- input: string;
10
- irrelevancies: string[];
11
- relevantStatements: string[];
12
- }): string;
13
- //# sourceMappingURL=prompts.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/context-relevancy/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,oCAAoC,kmBAWQ,CAAC;AAE1D,wBAAgB,sBAAsB,CAAC,EACrC,KAAK,EACL,MAAM,EACN,OAAO,GACR,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,UAoDA;AAED,wBAAgB,oBAAoB,CAAC,EACnC,KAAK,EACL,KAAK,EACL,aAAa,EACb,kBAAkB,GACnB,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,UAyBA"}
@@ -1,16 +0,0 @@
1
- import { Metric } from '@mastra/core/eval';
2
- import type { LanguageModel } from '@mastra/core/llm';
3
- import type { MetricResultWithReason } from '../types.js';
4
- export interface ContextualRecallMetricOptions {
5
- scale?: number;
6
- context: string[];
7
- }
8
- export declare class ContextualRecallMetric extends Metric {
9
- private judge;
10
- private scale;
11
- private context;
12
- constructor(model: LanguageModel, { scale, context }: ContextualRecallMetricOptions);
13
- measure(input: string, output: string): Promise<MetricResultWithReason>;
14
- private calculateScore;
15
- }
16
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/contextual-recall/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAKvD,MAAM,WAAW,6BAA6B;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,qBAAa,sBAAuB,SAAQ,MAAM;IAChD,OAAO,CAAC,KAAK,CAAwB;IACrC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAW;gBAEd,KAAK,EAAE,aAAa,EAAE,EAAE,KAAS,EAAE,OAAO,EAAE,EAAE,6BAA6B;IAQjF,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAkB7E,OAAO,CAAC,cAAc;CAWvB"}
@@ -1,16 +0,0 @@
1
- import type { LanguageModel } from '@mastra/core/llm';
2
- import { MastraAgentJudge } from '../../judge/index.js';
3
- export declare class ContextualRecallJudge extends MastraAgentJudge {
4
- constructor(model: LanguageModel);
5
- evaluate(input: string, actualOutput: string, retrievalContext: string[]): Promise<{
6
- verdict: string;
7
- reason: string;
8
- }[]>;
9
- getReason(args: {
10
- score: number;
11
- unsupportiveReasons: string[];
12
- expectedOutput: string;
13
- supportiveReasons: string[];
14
- }): Promise<string>;
15
- }
16
- //# sourceMappingURL=metricJudge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metricJudge.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/contextual-recall/metricJudge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAI/C,qBAAa,qBAAsB,SAAQ,gBAAgB;gBAC7C,KAAK,EAAE,aAAa;IAI1B,QAAQ,CACZ,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,MAAM,EAAE,GACzB,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAqB3C,SAAS,CAAC,IAAI,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,mBAAmB,EAAE,MAAM,EAAE,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;QACvB,iBAAiB,EAAE,MAAM,EAAE,CAAC;KAC7B,GAAG,OAAO,CAAC,MAAM,CAAC;CASpB"}
@@ -1,13 +0,0 @@
1
- export declare const CONTEXT_RECALL_AGENT_INSTRUCTIONS = "You are a balanced and nuanced contextual recall evaluator. Your job is to determine if retrieved context nodes are aligning to the expected output.";
2
- export declare function generateEvaluatePrompt({ input, output, context, }: {
3
- input: string;
4
- output: string;
5
- context: string[];
6
- }): string;
7
- export declare function generateReasonPrompt({ score, unsupportiveReasons, expectedOutput, supportiveReasons, }: {
8
- score: number;
9
- unsupportiveReasons: string[];
10
- expectedOutput: string;
11
- supportiveReasons: string[];
12
- }): string;
13
- //# sourceMappingURL=prompts.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/contextual-recall/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,iCAAiC,yJAAyJ,CAAC;AAExM,wBAAgB,sBAAsB,CAAC,EACrC,KAAK,EACL,MAAM,EACN,OAAO,GACR,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,UA8BA;AAED,wBAAgB,oBAAoB,CAAC,EACnC,KAAK,EACL,mBAAmB,EACnB,cAAc,EACd,iBAAiB,GAClB,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,EAAE,CAAC;CAC7B,UA6BA"}
@@ -1,16 +0,0 @@
1
- import { Metric } from '@mastra/core/eval';
2
- import type { LanguageModel } from '@mastra/core/llm';
3
- import type { MetricResultWithReason } from '../types.js';
4
- export interface FaithfulnessMetricOptions {
5
- scale?: number;
6
- context: string[];
7
- }
8
- export declare class FaithfulnessMetric extends Metric {
9
- private judge;
10
- private scale;
11
- private context;
12
- constructor(model: LanguageModel, { scale, context }: FaithfulnessMetricOptions);
13
- measure(input: string, output: string): Promise<MetricResultWithReason>;
14
- private calculateScore;
15
- }
16
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAKvD,MAAM,WAAW,yBAAyB;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,qBAAa,kBAAmB,SAAQ,MAAM;IAC5C,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAW;gBAEd,KAAK,EAAE,aAAa,EAAE,EAAE,KAAS,EAAE,OAAO,EAAE,EAAE,yBAAyB;IAQ7E,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAoB7E,OAAO,CAAC,cAAc;CAYvB"}
@@ -1,22 +0,0 @@
1
- import type { LanguageModel } from '@mastra/core/llm';
2
- import { MastraAgentJudge } from '../../judge/index.js';
3
- export declare class FaithfulnessJudge extends MastraAgentJudge {
4
- constructor(model: LanguageModel);
5
- evaluate(output: string, context: string[]): Promise<{
6
- claim: string;
7
- verdict: string;
8
- reason: string;
9
- }[]>;
10
- getReason(args: {
11
- input: string;
12
- output: string;
13
- context: string[];
14
- score: number;
15
- scale: number;
16
- verdicts: {
17
- verdict: string;
18
- reason: string;
19
- }[];
20
- }): Promise<string>;
21
- }
22
- //# sourceMappingURL=metricJudge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metricJudge.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/faithfulness/metricJudge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAS/C,qBAAa,iBAAkB,SAAQ,gBAAgB;gBACzC,KAAK,EAAE,aAAa;IAI1B,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IA4B1G,SAAS,CAAC,IAAI,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE;YAAE,OAAO,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;KACjD,GAAG,OAAO,CAAC,MAAM,CAAC;CASpB"}
@@ -1,20 +0,0 @@
1
- export declare const FAITHFULNESS_AGENT_INSTRUCTIONS = "You are a precise and thorough faithfulness evaluator. Your job is to determine if LLM outputs are factually consistent with the provided context, focusing on claim verification.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider a claim truthful if it is explicitly supported by the context\n4. Consider a claim contradictory if it directly conflicts with the context\n5. Consider a claim unsure if it is not mentioned in the context\n6. Empty outputs should be handled as having no claims\n7. Focus on factual consistency, not relevance or completeness\n8. Never use prior knowledge in judgments\n9. Claims with speculative language (may, might, possibly) should be marked as \"unsure\"";
2
- export declare function generateClaimExtractionPrompt({ output }: {
3
- output: string;
4
- }): string;
5
- export declare function generateEvaluatePrompt({ claims, context }: {
6
- claims: string[];
7
- context: string[];
8
- }): string;
9
- export declare function generateReasonPrompt({ input, output, context, score, scale, verdicts, }: {
10
- input: string;
11
- output: string;
12
- context: string[];
13
- score: number;
14
- scale: number;
15
- verdicts: {
16
- verdict: string;
17
- reason: string;
18
- }[];
19
- }): string;
20
- //# sourceMappingURL=prompts.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/faithfulness/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,+BAA+B,gzBAW4C,CAAC;AAEzF,wBAAgB,6BAA6B,CAAC,EAAE,MAAM,EAAE,EAAE;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,UAmC3E;AAED,wBAAgB,sBAAsB,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE;IAAE,MAAM,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,MAAM,EAAE,CAAA;CAAE,UA6DlG;AAED,wBAAgB,oBAAoB,CAAC,EACnC,KAAK,EACL,MAAM,EACN,OAAO,EACP,KAAK,EACL,KAAK,EACL,QAAQ,GACT,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACjD,UAsCA"}
@@ -1,16 +0,0 @@
1
- import { Metric } from '@mastra/core/eval';
2
- import type { LanguageModel } from '@mastra/core/llm';
3
- import type { MetricResultWithReason } from '../types.js';
4
- export interface HallucinationMetricOptions {
5
- scale?: number;
6
- context: string[];
7
- }
8
- export declare class HallucinationMetric extends Metric {
9
- private judge;
10
- private scale;
11
- private context;
12
- constructor(model: LanguageModel, { scale, context }: HallucinationMetricOptions);
13
- measure(input: string, output: string): Promise<MetricResultWithReason>;
14
- private calculateScore;
15
- }
16
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAKvD,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,qBAAa,mBAAoB,SAAQ,MAAM;IAC7C,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAW;gBAEd,KAAK,EAAE,aAAa,EAAE,EAAE,KAAS,EAAE,OAAO,EAAE,EAAE,0BAA0B;IAQ9E,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAoB7E,OAAO,CAAC,cAAc;CAYvB"}
@@ -1,22 +0,0 @@
1
- import type { LanguageModel } from '@mastra/core/llm';
2
- import { MastraAgentJudge } from '../../judge/index.js';
3
- export declare class HallucinationJudge extends MastraAgentJudge {
4
- constructor(model: LanguageModel);
5
- evaluate(output: string, context: string[]): Promise<{
6
- statement: string;
7
- verdict: string;
8
- reason: string;
9
- }[]>;
10
- getReason(args: {
11
- input: string;
12
- output: string;
13
- context: string[];
14
- score: number;
15
- scale: number;
16
- verdicts: {
17
- verdict: string;
18
- reason: string;
19
- }[];
20
- }): Promise<string>;
21
- }
22
- //# sourceMappingURL=metricJudge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metricJudge.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/hallucination/metricJudge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAI/C,qBAAa,kBAAmB,SAAQ,gBAAgB;gBAC1C,KAAK,EAAE,aAAa;IAI1B,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IA4B9G,SAAS,CAAC,IAAI,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE;YAAE,OAAO,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;KACjD,GAAG,OAAO,CAAC,MAAM,CAAC;CAOpB"}
@@ -1,17 +0,0 @@
1
- export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n - Using less precise dates (e.g., year when context gives month)\n - Reasonable numerical approximations\n - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
2
- export declare function generateEvaluatePrompt({ context, claims }: {
3
- context: string[];
4
- claims: string[];
5
- }): string;
6
- export declare function generateReasonPrompt({ input, output, context, score, scale, verdicts, }: {
7
- input: string;
8
- output: string;
9
- context: string[];
10
- score: number;
11
- scale: number;
12
- verdicts: {
13
- verdict: string;
14
- reason: string;
15
- }[];
16
- }): string;
17
- //# sourceMappingURL=prompts.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/metrics/llm/hallucination/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,gCAAgC,+oCAe0E,CAAC;AAExH,wBAAgB,sBAAsB,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;IAAE,OAAO,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,UAkFlG;AAED,wBAAgB,oBAAoB,CAAC,EACnC,KAAK,EACL,MAAM,EACN,OAAO,EACP,KAAK,EACL,KAAK,EACL,QAAQ,GACT,EAAE;IACD,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACjD,UA8BA"}