npm - @mastra/evals - Versions diffs - 0.1.8-alpha.8 → 0.1.8 - Mend

@mastra/evals 0.1.8-alpha.8 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/dist/_tsup-dts-rollup.d.cts +3 -3
package/dist/_tsup-dts-rollup.d.ts +3 -3
package/dist/chunk-COBCYVZ7.cjs +17 -0
package/dist/chunk-CVPOREIE.cjs +42 -0
package/dist/dist-26T2FI3G.cjs +17189 -0
package/dist/{dist-EOJDANYG.js → dist-JUX55UQU.js} +101 -885
package/dist/index.cjs +3 -19589
package/dist/index.js +1 -2
package/dist/magic-string.es-5UDOWOAZ.js +0 -2
package/dist/magic-string.es-IL2775P6.cjs +1300 -0
package/dist/metrics/judge/index.cjs +6 -13
package/dist/metrics/judge/index.js +0 -1
package/dist/metrics/llm/index.cjs +87 -86
package/dist/metrics/llm/index.js +75 -63
package/dist/metrics/nlp/index.cjs +0 -1
package/dist/metrics/nlp/index.js +0 -1
package/package.json +16 -13
package/.turbo/turbo-build.log +0 -37
package/CHANGELOG.md +0 -921
package/eslint.config.js +0 -6
package/src/attachListeners.ts +0 -40
package/src/constants.ts +0 -1
package/src/evaluation.test.ts +0 -29
package/src/evaluation.ts +0 -58
package/src/index.ts +0 -2
package/src/metrics/index.ts +0 -3
package/src/metrics/judge/index.ts +0 -14
package/src/metrics/llm/answer-relevancy/index.test.ts +0 -156
package/src/metrics/llm/answer-relevancy/index.ts +0 -58
package/src/metrics/llm/answer-relevancy/metricJudge.ts +0 -56
package/src/metrics/llm/answer-relevancy/prompts.ts +0 -214
package/src/metrics/llm/bias/index.test.ts +0 -152
package/src/metrics/llm/bias/index.ts +0 -52
package/src/metrics/llm/bias/metricJudge.ts +0 -53
package/src/metrics/llm/bias/prompts.ts +0 -109
package/src/metrics/llm/context-position/index.test.ts +0 -275
package/src/metrics/llm/context-position/index.ts +0 -69
package/src/metrics/llm/context-position/metricJudge.ts +0 -55
package/src/metrics/llm/context-position/prompts.ts +0 -135
package/src/metrics/llm/context-precision/index.test.ts +0 -213
package/src/metrics/llm/context-precision/index.ts +0 -68
package/src/metrics/llm/context-precision/metricJudge.ts +0 -55
package/src/metrics/llm/context-precision/prompts.ts +0 -139
package/src/metrics/llm/context-relevancy/index.test.ts +0 -162
package/src/metrics/llm/context-relevancy/index.ts +0 -59
package/src/metrics/llm/context-relevancy/metricJudge.ts +0 -51
package/src/metrics/llm/context-relevancy/prompts.ts +0 -111
package/src/metrics/llm/contextual-recall/index.test.ts +0 -90
package/src/metrics/llm/contextual-recall/index.ts +0 -56
package/src/metrics/llm/contextual-recall/metricJudge.ts +0 -52
package/src/metrics/llm/contextual-recall/prompts.ts +0 -82
package/src/metrics/llm/faithfulness/index.test.ts +0 -254
package/src/metrics/llm/faithfulness/index.ts +0 -59
package/src/metrics/llm/faithfulness/metricJudge.ts +0 -62
package/src/metrics/llm/faithfulness/prompts.ts +0 -166
package/src/metrics/llm/hallucination/index.test.ts +0 -214
package/src/metrics/llm/hallucination/index.ts +0 -59
package/src/metrics/llm/hallucination/metricJudge.ts +0 -44
package/src/metrics/llm/hallucination/prompts.ts +0 -143
package/src/metrics/llm/index.ts +0 -11
package/src/metrics/llm/prompt-alignment/index.test.ts +0 -335
package/src/metrics/llm/prompt-alignment/index.ts +0 -116
package/src/metrics/llm/prompt-alignment/metricJudge.ts +0 -43
package/src/metrics/llm/prompt-alignment/prompts.ts +0 -224
package/src/metrics/llm/summarization/index.test.ts +0 -274
package/src/metrics/llm/summarization/index.ts +0 -72
package/src/metrics/llm/summarization/metricJudge.ts +0 -101
package/src/metrics/llm/summarization/prompts.ts +0 -252
package/src/metrics/llm/toxicity/index.test.ts +0 -84
package/src/metrics/llm/toxicity/index.ts +0 -54
package/src/metrics/llm/toxicity/metricJudge.ts +0 -39
package/src/metrics/llm/toxicity/prompts.ts +0 -87
package/src/metrics/llm/types.ts +0 -7
package/src/metrics/llm/utils.ts +0 -20
package/src/metrics/nlp/completeness/index.test.ts +0 -98
package/src/metrics/nlp/completeness/index.ts +0 -121
package/src/metrics/nlp/content-similarity/index.test.ts +0 -76
package/src/metrics/nlp/content-similarity/index.ts +0 -49
package/src/metrics/nlp/index.ts +0 -5
package/src/metrics/nlp/keyword-coverage/index.test.ts +0 -85
package/src/metrics/nlp/keyword-coverage/index.ts +0 -49
package/src/metrics/nlp/textual-difference/index.test.ts +0 -88
package/src/metrics/nlp/textual-difference/index.ts +0 -38
package/src/metrics/nlp/tone/index.test.ts +0 -100
package/src/metrics/nlp/tone/index.ts +0 -55
package/tsconfig.json +0 -5
package/vitest.config.ts +0 -12

package/dist/_tsup-dts-rollup.d.cts CHANGED Viewed

@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
     context: string[];
 }): string;
-export declare function generateEvaluatePrompt_alias_7({ context, output }: {
+export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
     context: string[];
-    output: string;
+    claims: string[];
 }): string;
 export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
 export { globalSetup }
 export { globalSetup as globalSetup_alias_1 }
-export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contradicts the provided context, focusing on identifying factual inconsistencies.\n\nKey Principles:\n1. Treat each context piece as a statement to verify\n2. Verify if the output contradicts any of these statements\n3. Consider a contradiction when the output directly conflicts with context statements\n4. Consider no contradiction when the output aligns with or doesn't mention context statements\n5. Empty outputs should be handled as having no contradictions\n6. Focus on factual inconsistencies, not omissions\n7. Never use prior knowledge in judgments\n8. Speculative language (may, might, possibly) should not be considered contradictions";
+export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n   - Using less precise dates (e.g., year when context gives month)\n   - Reasonable numerical approximations\n   - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
 export declare class HallucinationJudge extends MastraAgentJudge {
     constructor(model: LanguageModel);

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
     context: string[];
 }): string;
-export declare function generateEvaluatePrompt_alias_7({ context, output }: {
+export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
     context: string[];
-    output: string;
+    claims: string[];
 }): string;
 export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
 export { globalSetup }
 export { globalSetup as globalSetup_alias_1 }
-export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contradicts the provided context, focusing on identifying factual inconsistencies.\n\nKey Principles:\n1. Treat each context piece as a statement to verify\n2. Verify if the output contradicts any of these statements\n3. Consider a contradiction when the output directly conflicts with context statements\n4. Consider no contradiction when the output aligns with or doesn't mention context statements\n5. Empty outputs should be handled as having no contradictions\n6. Focus on factual inconsistencies, not omissions\n7. Never use prior knowledge in judgments\n8. Speculative language (may, might, possibly) should not be considered contradictions";
+export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n   - Using less precise dates (e.g., year when context gives month)\n   - Reasonable numerical approximations\n   - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
 export declare class HallucinationJudge extends MastraAgentJudge {
     constructor(model: LanguageModel);

package/dist/chunk-COBCYVZ7.cjs ADDED Viewed

@@ -0,0 +1,17 @@
+'use strict';
+var agent = require('@mastra/core/agent');
+// src/metrics/judge/index.ts
+var MastraAgentJudge = class {
+  agent;
+  constructor(name, instructions, model) {
+    this.agent = new agent.Agent({
+      name: `Mastra Eval Judge ${name}`,
+      instructions,
+      model
+    });
+  }
+};
+exports.MastraAgentJudge = MastraAgentJudge;

package/dist/chunk-CVPOREIE.cjs ADDED Viewed

@@ -0,0 +1,42 @@
+'use strict';
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
+  get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
+}) : x)(function(x) {
+  if (typeof require !== "undefined") return require.apply(this, arguments);
+  throw Error('Dynamic require of "' + x + '" is not supported');
+});
+var __commonJS = (cb, mod) => function __require2() {
+  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
+};
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+exports.__commonJS = __commonJS;
+exports.__export = __export;
+exports.__require = __require;
+exports.__toESM = __toESM;