@mastra/evals 0.1.8-alpha.8 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_tsup-dts-rollup.d.cts +3 -3
- package/dist/_tsup-dts-rollup.d.ts +3 -3
- package/dist/chunk-COBCYVZ7.cjs +17 -0
- package/dist/chunk-CVPOREIE.cjs +42 -0
- package/dist/dist-26T2FI3G.cjs +17189 -0
- package/dist/{dist-EOJDANYG.js → dist-JUX55UQU.js} +101 -885
- package/dist/index.cjs +3 -19589
- package/dist/index.js +1 -2
- package/dist/magic-string.es-5UDOWOAZ.js +0 -2
- package/dist/magic-string.es-IL2775P6.cjs +1300 -0
- package/dist/metrics/judge/index.cjs +6 -13
- package/dist/metrics/judge/index.js +0 -1
- package/dist/metrics/llm/index.cjs +87 -86
- package/dist/metrics/llm/index.js +75 -63
- package/dist/metrics/nlp/index.cjs +0 -1
- package/dist/metrics/nlp/index.js +0 -1
- package/package.json +16 -13
- package/.turbo/turbo-build.log +0 -37
- package/CHANGELOG.md +0 -921
- package/eslint.config.js +0 -6
- package/src/attachListeners.ts +0 -40
- package/src/constants.ts +0 -1
- package/src/evaluation.test.ts +0 -29
- package/src/evaluation.ts +0 -58
- package/src/index.ts +0 -2
- package/src/metrics/index.ts +0 -3
- package/src/metrics/judge/index.ts +0 -14
- package/src/metrics/llm/answer-relevancy/index.test.ts +0 -156
- package/src/metrics/llm/answer-relevancy/index.ts +0 -58
- package/src/metrics/llm/answer-relevancy/metricJudge.ts +0 -56
- package/src/metrics/llm/answer-relevancy/prompts.ts +0 -214
- package/src/metrics/llm/bias/index.test.ts +0 -152
- package/src/metrics/llm/bias/index.ts +0 -52
- package/src/metrics/llm/bias/metricJudge.ts +0 -53
- package/src/metrics/llm/bias/prompts.ts +0 -109
- package/src/metrics/llm/context-position/index.test.ts +0 -275
- package/src/metrics/llm/context-position/index.ts +0 -69
- package/src/metrics/llm/context-position/metricJudge.ts +0 -55
- package/src/metrics/llm/context-position/prompts.ts +0 -135
- package/src/metrics/llm/context-precision/index.test.ts +0 -213
- package/src/metrics/llm/context-precision/index.ts +0 -68
- package/src/metrics/llm/context-precision/metricJudge.ts +0 -55
- package/src/metrics/llm/context-precision/prompts.ts +0 -139
- package/src/metrics/llm/context-relevancy/index.test.ts +0 -162
- package/src/metrics/llm/context-relevancy/index.ts +0 -59
- package/src/metrics/llm/context-relevancy/metricJudge.ts +0 -51
- package/src/metrics/llm/context-relevancy/prompts.ts +0 -111
- package/src/metrics/llm/contextual-recall/index.test.ts +0 -90
- package/src/metrics/llm/contextual-recall/index.ts +0 -56
- package/src/metrics/llm/contextual-recall/metricJudge.ts +0 -52
- package/src/metrics/llm/contextual-recall/prompts.ts +0 -82
- package/src/metrics/llm/faithfulness/index.test.ts +0 -254
- package/src/metrics/llm/faithfulness/index.ts +0 -59
- package/src/metrics/llm/faithfulness/metricJudge.ts +0 -62
- package/src/metrics/llm/faithfulness/prompts.ts +0 -166
- package/src/metrics/llm/hallucination/index.test.ts +0 -214
- package/src/metrics/llm/hallucination/index.ts +0 -59
- package/src/metrics/llm/hallucination/metricJudge.ts +0 -44
- package/src/metrics/llm/hallucination/prompts.ts +0 -143
- package/src/metrics/llm/index.ts +0 -11
- package/src/metrics/llm/prompt-alignment/index.test.ts +0 -335
- package/src/metrics/llm/prompt-alignment/index.ts +0 -116
- package/src/metrics/llm/prompt-alignment/metricJudge.ts +0 -43
- package/src/metrics/llm/prompt-alignment/prompts.ts +0 -224
- package/src/metrics/llm/summarization/index.test.ts +0 -274
- package/src/metrics/llm/summarization/index.ts +0 -72
- package/src/metrics/llm/summarization/metricJudge.ts +0 -101
- package/src/metrics/llm/summarization/prompts.ts +0 -252
- package/src/metrics/llm/toxicity/index.test.ts +0 -84
- package/src/metrics/llm/toxicity/index.ts +0 -54
- package/src/metrics/llm/toxicity/metricJudge.ts +0 -39
- package/src/metrics/llm/toxicity/prompts.ts +0 -87
- package/src/metrics/llm/types.ts +0 -7
- package/src/metrics/llm/utils.ts +0 -20
- package/src/metrics/nlp/completeness/index.test.ts +0 -98
- package/src/metrics/nlp/completeness/index.ts +0 -121
- package/src/metrics/nlp/content-similarity/index.test.ts +0 -76
- package/src/metrics/nlp/content-similarity/index.ts +0 -49
- package/src/metrics/nlp/index.ts +0 -5
- package/src/metrics/nlp/keyword-coverage/index.test.ts +0 -85
- package/src/metrics/nlp/keyword-coverage/index.ts +0 -49
- package/src/metrics/nlp/textual-difference/index.test.ts +0 -88
- package/src/metrics/nlp/textual-difference/index.ts +0 -38
- package/src/metrics/nlp/tone/index.test.ts +0 -100
- package/src/metrics/nlp/tone/index.ts +0 -55
- package/tsconfig.json +0 -5
- package/vitest.config.ts +0 -12
|
@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
|
|
|
355
355
|
context: string[];
|
|
356
356
|
}): string;
|
|
357
357
|
|
|
358
|
-
export declare function generateEvaluatePrompt_alias_7({ context,
|
|
358
|
+
export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
|
|
359
359
|
context: string[];
|
|
360
|
-
|
|
360
|
+
claims: string[];
|
|
361
361
|
}): string;
|
|
362
362
|
|
|
363
363
|
export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
|
|
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
|
|
|
504
504
|
export { globalSetup }
|
|
505
505
|
export { globalSetup as globalSetup_alias_1 }
|
|
506
506
|
|
|
507
|
-
export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output
|
|
507
|
+
export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n - Using less precise dates (e.g., year when context gives month)\n - Reasonable numerical approximations\n - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
|
|
508
508
|
|
|
509
509
|
export declare class HallucinationJudge extends MastraAgentJudge {
|
|
510
510
|
constructor(model: LanguageModel);
|
|
@@ -355,9 +355,9 @@ export declare function generateEvaluatePrompt_alias_6({ claims, context }: {
|
|
|
355
355
|
context: string[];
|
|
356
356
|
}): string;
|
|
357
357
|
|
|
358
|
-
export declare function generateEvaluatePrompt_alias_7({ context,
|
|
358
|
+
export declare function generateEvaluatePrompt_alias_7({ context, claims }: {
|
|
359
359
|
context: string[];
|
|
360
|
-
|
|
360
|
+
claims: string[];
|
|
361
361
|
}): string;
|
|
362
362
|
|
|
363
363
|
export declare function generateEvaluatePrompt_alias_8({ instructions, input, output, }: {
|
|
@@ -504,7 +504,7 @@ declare function globalSetup(): Promise<void>;
|
|
|
504
504
|
export { globalSetup }
|
|
505
505
|
export { globalSetup as globalSetup_alias_1 }
|
|
506
506
|
|
|
507
|
-
export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output
|
|
507
|
+
export declare const HALLUCINATION_AGENT_INSTRUCTIONS = "You are a precise and thorough hallucination evaluator. Your job is to determine if an LLM's output contains information not supported by or contradicts the provided context.\n\nKey Principles:\n1. First extract all claims from the output (both factual and speculative)\n2. Then verify each extracted claim against the provided context\n3. Consider it a hallucination if a claim contradicts the context\n4. Consider it a hallucination if a claim makes assertions not supported by context\n5. Empty outputs should be handled as having no hallucinations\n6. Speculative language (may, might, possibly) about facts IN the context is NOT a hallucination\n7. Speculative language about facts NOT in the context IS a hallucination\n8. Never use prior knowledge in judgments - only use what's explicitly stated in context\n9. The following are NOT hallucinations:\n - Using less precise dates (e.g., year when context gives month)\n - Reasonable numerical approximations\n - Omitting additional details while maintaining factual accuracy\n10. Subjective claims (\"made history\", \"pioneering\", \"leading\") are hallucinations unless explicitly stated in context";
|
|
508
508
|
|
|
509
509
|
export declare class HallucinationJudge extends MastraAgentJudge {
|
|
510
510
|
constructor(model: LanguageModel);
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var agent = require('@mastra/core/agent');
|
|
4
|
+
|
|
5
|
+
// src/metrics/judge/index.ts
|
|
6
|
+
var MastraAgentJudge = class {
|
|
7
|
+
agent;
|
|
8
|
+
constructor(name, instructions, model) {
|
|
9
|
+
this.agent = new agent.Agent({
|
|
10
|
+
name: `Mastra Eval Judge ${name}`,
|
|
11
|
+
instructions,
|
|
12
|
+
model
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
exports.MastraAgentJudge = MastraAgentJudge;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var __create = Object.create;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
8
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
10
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
11
|
+
}) : x)(function(x) {
|
|
12
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
13
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
14
|
+
});
|
|
15
|
+
var __commonJS = (cb, mod) => function __require2() {
|
|
16
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
17
|
+
};
|
|
18
|
+
var __export = (target, all) => {
|
|
19
|
+
for (var name in all)
|
|
20
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
21
|
+
};
|
|
22
|
+
var __copyProps = (to, from, except, desc) => {
|
|
23
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
24
|
+
for (let key of __getOwnPropNames(from))
|
|
25
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
26
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
27
|
+
}
|
|
28
|
+
return to;
|
|
29
|
+
};
|
|
30
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
31
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
32
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
33
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
34
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
35
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
36
|
+
mod
|
|
37
|
+
));
|
|
38
|
+
|
|
39
|
+
exports.__commonJS = __commonJS;
|
|
40
|
+
exports.__export = __export;
|
|
41
|
+
exports.__require = __require;
|
|
42
|
+
exports.__toESM = __toESM;
|