@mastra/evals 0.1.8-alpha.8 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/_tsup-dts-rollup.d.cts +3 -3
  2. package/dist/_tsup-dts-rollup.d.ts +3 -3
  3. package/dist/chunk-COBCYVZ7.cjs +17 -0
  4. package/dist/chunk-CVPOREIE.cjs +42 -0
  5. package/dist/dist-26T2FI3G.cjs +17189 -0
  6. package/dist/{dist-EOJDANYG.js → dist-JUX55UQU.js} +101 -885
  7. package/dist/index.cjs +3 -19589
  8. package/dist/index.js +1 -2
  9. package/dist/magic-string.es-5UDOWOAZ.js +0 -2
  10. package/dist/magic-string.es-IL2775P6.cjs +1300 -0
  11. package/dist/metrics/judge/index.cjs +6 -13
  12. package/dist/metrics/judge/index.js +0 -1
  13. package/dist/metrics/llm/index.cjs +87 -86
  14. package/dist/metrics/llm/index.js +75 -63
  15. package/dist/metrics/nlp/index.cjs +0 -1
  16. package/dist/metrics/nlp/index.js +0 -1
  17. package/package.json +16 -13
  18. package/.turbo/turbo-build.log +0 -37
  19. package/CHANGELOG.md +0 -921
  20. package/eslint.config.js +0 -6
  21. package/src/attachListeners.ts +0 -40
  22. package/src/constants.ts +0 -1
  23. package/src/evaluation.test.ts +0 -29
  24. package/src/evaluation.ts +0 -58
  25. package/src/index.ts +0 -2
  26. package/src/metrics/index.ts +0 -3
  27. package/src/metrics/judge/index.ts +0 -14
  28. package/src/metrics/llm/answer-relevancy/index.test.ts +0 -156
  29. package/src/metrics/llm/answer-relevancy/index.ts +0 -58
  30. package/src/metrics/llm/answer-relevancy/metricJudge.ts +0 -56
  31. package/src/metrics/llm/answer-relevancy/prompts.ts +0 -214
  32. package/src/metrics/llm/bias/index.test.ts +0 -152
  33. package/src/metrics/llm/bias/index.ts +0 -52
  34. package/src/metrics/llm/bias/metricJudge.ts +0 -53
  35. package/src/metrics/llm/bias/prompts.ts +0 -109
  36. package/src/metrics/llm/context-position/index.test.ts +0 -275
  37. package/src/metrics/llm/context-position/index.ts +0 -69
  38. package/src/metrics/llm/context-position/metricJudge.ts +0 -55
  39. package/src/metrics/llm/context-position/prompts.ts +0 -135
  40. package/src/metrics/llm/context-precision/index.test.ts +0 -213
  41. package/src/metrics/llm/context-precision/index.ts +0 -68
  42. package/src/metrics/llm/context-precision/metricJudge.ts +0 -55
  43. package/src/metrics/llm/context-precision/prompts.ts +0 -139
  44. package/src/metrics/llm/context-relevancy/index.test.ts +0 -162
  45. package/src/metrics/llm/context-relevancy/index.ts +0 -59
  46. package/src/metrics/llm/context-relevancy/metricJudge.ts +0 -51
  47. package/src/metrics/llm/context-relevancy/prompts.ts +0 -111
  48. package/src/metrics/llm/contextual-recall/index.test.ts +0 -90
  49. package/src/metrics/llm/contextual-recall/index.ts +0 -56
  50. package/src/metrics/llm/contextual-recall/metricJudge.ts +0 -52
  51. package/src/metrics/llm/contextual-recall/prompts.ts +0 -82
  52. package/src/metrics/llm/faithfulness/index.test.ts +0 -254
  53. package/src/metrics/llm/faithfulness/index.ts +0 -59
  54. package/src/metrics/llm/faithfulness/metricJudge.ts +0 -62
  55. package/src/metrics/llm/faithfulness/prompts.ts +0 -166
  56. package/src/metrics/llm/hallucination/index.test.ts +0 -214
  57. package/src/metrics/llm/hallucination/index.ts +0 -59
  58. package/src/metrics/llm/hallucination/metricJudge.ts +0 -44
  59. package/src/metrics/llm/hallucination/prompts.ts +0 -143
  60. package/src/metrics/llm/index.ts +0 -11
  61. package/src/metrics/llm/prompt-alignment/index.test.ts +0 -335
  62. package/src/metrics/llm/prompt-alignment/index.ts +0 -116
  63. package/src/metrics/llm/prompt-alignment/metricJudge.ts +0 -43
  64. package/src/metrics/llm/prompt-alignment/prompts.ts +0 -224
  65. package/src/metrics/llm/summarization/index.test.ts +0 -274
  66. package/src/metrics/llm/summarization/index.ts +0 -72
  67. package/src/metrics/llm/summarization/metricJudge.ts +0 -101
  68. package/src/metrics/llm/summarization/prompts.ts +0 -252
  69. package/src/metrics/llm/toxicity/index.test.ts +0 -84
  70. package/src/metrics/llm/toxicity/index.ts +0 -54
  71. package/src/metrics/llm/toxicity/metricJudge.ts +0 -39
  72. package/src/metrics/llm/toxicity/prompts.ts +0 -87
  73. package/src/metrics/llm/types.ts +0 -7
  74. package/src/metrics/llm/utils.ts +0 -20
  75. package/src/metrics/nlp/completeness/index.test.ts +0 -98
  76. package/src/metrics/nlp/completeness/index.ts +0 -121
  77. package/src/metrics/nlp/content-similarity/index.test.ts +0 -76
  78. package/src/metrics/nlp/content-similarity/index.ts +0 -49
  79. package/src/metrics/nlp/index.ts +0 -5
  80. package/src/metrics/nlp/keyword-coverage/index.test.ts +0 -85
  81. package/src/metrics/nlp/keyword-coverage/index.ts +0 -49
  82. package/src/metrics/nlp/textual-difference/index.test.ts +0 -88
  83. package/src/metrics/nlp/textual-difference/index.ts +0 -38
  84. package/src/metrics/nlp/tone/index.test.ts +0 -100
  85. package/src/metrics/nlp/tone/index.ts +0 -55
  86. package/tsconfig.json +0 -5
  87. package/vitest.config.ts +0 -12
@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
355
355
  context: string[];
356
356
  }): string;
357
357
 
358
- export declare function generateEvaluatePrompt_alias_7({ context, output }: {
358
+ export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
359
359
  context: string[];
360
- output: string;
360
+ claims: string[];
361
361
  }): string;
362
362
 
363
363
  export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
504
504
  export { globalSetup }
505
505
  export { globalSetup as globalSetup_alias_1 }
506
506
 
507
- export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contradicts the provided context, focusing on identifying factual inconsistencies.\n\nKey Principles:\n1. Treat each context piece as a statement to verify\n2. Verify if the output contradicts any of these statements\n3. Consider a contradiction when the output directly conflicts with context statements\n4. Consider no contradiction when the output aligns with or doesn't mention context statements\n5. Empty outputs should be handled as having no contradictions\n6. Focus on factual inconsistencies, not omissions\n7. Never use prior knowledge in judgments\n8. Speculative language (may, might, possibly) should not be considered contradictions";
507
+ export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n - Using less precise dates (e.g., year when context gives month)\n - Reasonable numerical approximations\n - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
508
508
 
509
509
  export declare class HallucinationJudge extends MastraAgentJudge {
510
510
  constructor(model: LanguageModel);
@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
355
355
  context: string[];
356
356
  }): string;
357
357
 
358
- export declare function generateEvaluatePrompt_alias_7({ context, output }: {
358
+ export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
359
359
  context: string[];
360
- output: string;
360
+ claims: string[];
361
361
  }): string;
362
362
 
363
363
  export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
504
504
  export { globalSetup }
505
505
  export { globalSetup as globalSetup_alias_1 }
506
506
 
507
- export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contradicts the provided context, focusing on identifying factual inconsistencies.\n\nKey Principles:\n1. Treat each context piece as a statement to verify\n2. Verify if the output contradicts any of these statements\n3. Consider a contradiction when the output directly conflicts with context statements\n4. Consider no contradiction when the output aligns with or doesn't mention context statements\n5. Empty outputs should be handled as having no contradictions\n6. Focus on factual inconsistencies, not omissions\n7. Never use prior knowledge in judgments\n8. Speculative language (may, might, possibly) should not be considered contradictions";
507
+ export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n - Using less precise dates (e.g., year when context gives month)\n - Reasonable numerical approximations\n - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
508
508
 
509
509
  export declare class HallucinationJudge extends MastraAgentJudge {
510
510
  constructor(model: LanguageModel);
@@ -0,0 +1,17 @@
1
+ 'use strict';
2
+
3
+ var agent = require('@mastra/core/agent');
4
+
5
+ // src/metrics/judge/index.ts
6
+ var MastraAgentJudge = class {
7
+ agent;
8
+ constructor(name, instructions, model) {
9
+ this.agent = new agent.Agent({
10
+ name: `Mastra Eval Judge ${name}`,
11
+ instructions,
12
+ model
13
+ });
14
+ }
15
+ };
16
+
17
+ exports.MastraAgentJudge = MastraAgentJudge;
@@ -0,0 +1,42 @@
1
+ 'use strict';
2
+
3
+ var __create = Object.create;
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __getOwnPropNames = Object.getOwnPropertyNames;
7
+ var __getProtoOf = Object.getPrototypeOf;
8
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
9
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
10
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
11
+ }) : x)(function(x) {
12
+ if (typeof require !== "undefined") return require.apply(this, arguments);
13
+ throw Error('Dynamic require of "' + x + '" is not supported');
14
+ });
15
+ var __commonJS = (cb, mod) => function __require2() {
16
+ return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
17
+ };
18
+ var __export = (target, all) => {
19
+ for (var name in all)
20
+ __defProp(target, name, { get: all[name], enumerable: true });
21
+ };
22
+ var __copyProps = (to, from, except, desc) => {
23
+ if (from && typeof from === "object" || typeof from === "function") {
24
+ for (let key of __getOwnPropNames(from))
25
+ if (!__hasOwnProp.call(to, key) && key !== except)
26
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
27
+ }
28
+ return to;
29
+ };
30
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
31
+ // If the importer is in node compatibility mode or this is not an ESM
32
+ // file that has been converted to a CommonJS file using a Babel-
33
+ // compatible transform (i.e. "__esModule" has not been set), then set
34
+ // "default" to the CommonJS "module.exports" for node compatibility.
35
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
36
+ mod
37
+ ));
38
+
39
+ exports.__commonJS = __commonJS;
40
+ exports.__export = __export;
41
+ exports.__require = __require;
42
+ exports.__toESM = __toESM;