npm - @mastra/evals - Versions diffs - 0.1.0-alpha.33 → 0.1.0-alpha.5 - Mend

@mastra/evals 0.1.0-alpha.33 → 0.1.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/CHANGELOG.md +0 -224
package/jest.config.ts +21 -0
package/package.json +26 -10
package/src/evaluation.test.ts +16 -17
package/src/evaluation.ts +11 -46
package/src/index.ts +0 -1
package/src/metrics/judge/index.ts +4 -5
package/src/metrics/llm/answer-relevancy/index.test.ts +72 -42
package/src/metrics/llm/answer-relevancy/index.ts +6 -9
package/src/metrics/llm/answer-relevancy/metricJudge.ts +4 -5
package/src/metrics/llm/answer-relevancy/prompts.ts +28 -26
package/src/metrics/llm/bias/index.test.ts +33 -17
package/src/metrics/llm/bias/index.ts +4 -13
package/src/metrics/llm/bias/metricJudge.ts +4 -20
package/src/metrics/llm/bias/prompts.ts +0 -27
package/src/metrics/llm/context-position/index.test.ts +107 -72
package/src/metrics/llm/context-position/index.ts +14 -14
package/src/metrics/llm/context-position/metricJudge.ts +3 -3
package/src/metrics/llm/context-position/prompts.ts +36 -31
package/src/metrics/llm/context-precision/index.test.ts +91 -62
package/src/metrics/llm/context-precision/index.ts +14 -14
package/src/metrics/llm/context-precision/metricJudge.ts +3 -3
package/src/metrics/llm/context-relevancy/index.test.ts +36 -27
package/src/metrics/llm/context-relevancy/index.ts +13 -23
package/src/metrics/llm/context-relevancy/metricJudge.ts +5 -19
package/src/metrics/llm/context-relevancy/prompts.ts +0 -37
package/src/metrics/llm/contextual-recall/index.test.ts +37 -29
package/src/metrics/llm/contextual-recall/index.ts +13 -20
package/src/metrics/llm/contextual-recall/metricJudge.ts +4 -19
package/src/metrics/llm/contextual-recall/prompts.ts +1 -42
package/src/metrics/llm/faithfulness/index.test.ts +107 -72
package/src/metrics/llm/faithfulness/index.ts +15 -22
package/src/metrics/llm/faithfulness/metricJudge.ts +13 -13
package/src/metrics/llm/hallucination/index.test.ts +101 -67
package/src/metrics/llm/hallucination/index.ts +15 -22
package/src/metrics/llm/hallucination/metricJudge.ts +16 -14
package/src/metrics/llm/hallucination/prompts.ts +35 -28
package/src/metrics/llm/index.ts +0 -1
package/src/metrics/llm/prompt-alignment/index.test.ts +71 -55
package/src/metrics/llm/prompt-alignment/index.ts +7 -16
package/src/metrics/llm/prompt-alignment/metricJudge.ts +17 -13
package/src/metrics/llm/summarization/index.test.ts +69 -25
package/src/metrics/llm/summarization/index.ts +10 -19
package/src/metrics/llm/summarization/metricJudge.ts +28 -15
package/src/metrics/llm/summarization/prompts.ts +14 -52
package/src/metrics/llm/toxicity/index.test.ts +29 -23
package/src/metrics/llm/toxicity/index.ts +7 -10
package/src/metrics/llm/toxicity/metricJudge.ts +7 -8
package/src/metrics/llm/toxicity/prompts.ts +12 -5
package/src/metrics/nlp/completeness/index.test.ts +20 -20
package/src/metrics/nlp/completeness/index.ts +6 -14
package/src/metrics/nlp/content-similarity/index.test.ts +48 -17
package/src/metrics/nlp/content-similarity/index.ts +8 -15
package/src/metrics/nlp/keyword-coverage/index.test.ts +60 -31
package/src/metrics/nlp/keyword-coverage/index.ts +9 -10
package/src/metrics/nlp/textual-difference/index.test.ts +62 -34
package/src/metrics/nlp/textual-difference/index.ts +6 -12
package/src/metrics/nlp/tone/index.test.ts +72 -49
package/src/metrics/nlp/tone/index.ts +9 -16
package/src/metrics/nlp/types.ts +13 -0
package/tsconfig.json +10 -1
package/README.md +0 -186
package/dist/chunk-4VNS5WPM.js +0 -37
package/dist/dist-XPBCCWOM.js +0 -17575
package/dist/index.d.ts +0 -9
package/dist/index.js +0 -73
package/dist/magic-string.es-5UDOWOAZ.js +0 -1296
package/dist/metrics/llm/index.d.ts +0 -139
package/dist/metrics/llm/index.js +0 -2121
package/dist/metrics/nlp/index.d.ts +0 -73
package/dist/metrics/nlp/index.js +0 -189
package/src/attachListeners.ts +0 -26
package/src/constants.ts +0 -1
package/src/metrics/llm/types.ts +0 -7
package/vitest.config.ts +0 -11

package/CHANGELOG.md CHANGED Viewed

@@ -1,229 +1,5 @@
 # @mastra/evals
-## 0.1.0-alpha.33
-### Patch Changes
-- Updated dependencies [d7d465a]
-- Updated dependencies [d7d465a]
-- Updated dependencies [2017553]
-- Updated dependencies [a10b7a3]
-- Updated dependencies [16e5b04]
-  - @mastra/core@0.2.0-alpha.91
-## 0.1.0-alpha.32
-### Patch Changes
-- Updated dependencies [8151f44]
-- Updated dependencies [e897f1c]
-- Updated dependencies [3700be1]
-  - @mastra/core@0.2.0-alpha.90
-## 0.1.0-alpha.31
-### Patch Changes
-- Updated dependencies [27275c9]
-  - @mastra/core@0.2.0-alpha.89
-## 0.1.0-alpha.30
-### Patch Changes
-- Updated dependencies [ccbc581]
-  - @mastra/core@0.2.0-alpha.88
-## 0.1.0-alpha.29
-### Patch Changes
-- Updated dependencies [7365b6c]
-  - @mastra/core@0.2.0-alpha.87
-## 0.1.0-alpha.28
-### Minor Changes
-- 5916f9d: Update deps from fixed to ^
-### Patch Changes
-- Updated dependencies [6fa4bd2]
-- Updated dependencies [e2e76de]
-- Updated dependencies [7f24c29]
-- Updated dependencies [67637ba]
-- Updated dependencies [04f3171]
-  - @mastra/core@0.2.0-alpha.86
-## 0.1.0-alpha.27
-### Patch Changes
-- Updated dependencies [e9d1b47]
-  - @mastra/core@0.2.0-alpha.85
-## 0.1.0-alpha.26
-### Patch Changes
-- Updated dependencies [2f17a5f]
-- Updated dependencies [cb290ee]
-- Updated dependencies [b4d7416]
-- Updated dependencies [38b7f66]
-  - @mastra/core@0.2.0-alpha.84
-## 0.1.0-alpha.25
-### Patch Changes
-- 9625602: Use mastra core splitted bundles in other packages
-- 8769a62: Split core into seperate entry fils
-- Updated dependencies [30322ce]
-- Updated dependencies [78eec7c]
-- Updated dependencies [9625602]
-- Updated dependencies [8769a62]
-  - @mastra/core@0.2.0-alpha.83
-## 0.1.0-alpha.24
-### Patch Changes
-- Updated dependencies [73d112c]
-  - @mastra/core@0.1.27-alpha.82
-## 0.1.0-alpha.23
-### Patch Changes
-- Updated dependencies [9fb3039]
-  - @mastra/core@0.1.27-alpha.81
-## 0.1.0-alpha.22
-### Patch Changes
-- cb2e997: Bundle evals package with tsup
-## 0.1.0-alpha.21
-### Patch Changes
-- Updated dependencies [327ece7]
-  - @mastra/core@0.1.27-alpha.80
-## 0.1.0-alpha.20
-### Patch Changes
-- Updated dependencies [21fe536]
-  - @mastra/core@0.1.27-alpha.79
-## 0.1.0-alpha.19
-### Patch Changes
-- Updated dependencies [685108a]
-- Updated dependencies [685108a]
-  - @mastra/core@0.1.27-alpha.78
-## 0.1.0-alpha.18
-### Patch Changes
-- Updated dependencies [8105fae]
-  - @mastra/core@0.1.27-alpha.77
-## 0.1.0-alpha.17
-### Patch Changes
-- Updated dependencies [ae7bf94]
-- Updated dependencies [ae7bf94]
-  - @mastra/core@0.1.27-alpha.76
-## 0.1.0-alpha.16
-### Patch Changes
-- Updated dependencies [23dcb23]
-  - @mastra/core@0.1.27-alpha.75
-## 0.1.0-alpha.15
-### Patch Changes
-- Updated dependencies [7b87567]
-  - @mastra/core@0.1.27-alpha.74
-## 0.1.0-alpha.14
-### Patch Changes
-- Updated dependencies [3427b95]
-  - @mastra/core@0.1.27-alpha.73
-## 0.1.0-alpha.13
-### Patch Changes
-- 06b2c0a: Update summarization prompt and fix eval input
-- Updated dependencies [e4d4ede]
-- Updated dependencies [06b2c0a]
-  - @mastra/core@0.1.27-alpha.72
-## 0.1.0-alpha.12
-### Patch Changes
-- Updated dependencies [d9c8dd0]
-  - @mastra/core@0.1.27-alpha.71
-## 0.1.0-alpha.11
-### Patch Changes
-- bdaf834: publish packages
-## 0.1.0-alpha.10
-### Patch Changes
-- Updated dependencies [dd6d87f]
-- Updated dependencies [04434b6]
-  - @mastra/core@0.1.27-alpha.70
-## 0.1.0-alpha.9
-### Patch Changes
-- 1944807: Unified logger and major step in better logs
-- 9ade36e: Changed measure for evals, added endpoints, attached metrics to agent, added ui for evals in playground, and updated docs
-- Updated dependencies [1944807]
-- Updated dependencies [9ade36e]
-  - @mastra/core@0.1.27-alpha.69
-## 0.1.0-alpha.8
-### Patch Changes
-- Updated dependencies [0be7181]
-- Updated dependencies [0be7181]
-  - @mastra/core@0.1.27-alpha.68
-## 0.1.0-alpha.7
-### Patch Changes
-- Updated dependencies [c8ff2f5]
-  - @mastra/core@0.1.27-alpha.67
-## 0.1.0-alpha.6
-### Patch Changes
-- aea3c13: Fix evals export for llm and nlp
 ## 0.1.0-alpha.5
 ### Minor Changes

package/jest.config.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { config } from 'dotenv';
+config();
+export default {
+  maxWorkers: 1,
+  preset: 'ts-jest',
+  extensionsToTreatAsEsm: ['.ts'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
+  transform: {
+    '^.+\\.tsx?$': [
+      'ts-jest',
+      {
+        useESM: true,
+        isolatedModules: true,
+      },
+    ],
+  },
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/evals",
-  "version": "0.1.0-alpha.33",
+  "version": "0.1.0-alpha.5",
   "description": "",
   "type": "module",
   "main": "dist/index.js",
@@ -11,18 +11,30 @@
       "import": {
         "types": "./dist/index.d.ts",
         "default": "./dist/evals.esm.js"
+      },
+      "require": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
       }
     },
     "./nlp": {
       "import": {
         "types": "./dist/metrics/nlp/index.d.ts",
-        "default": "./dist/nlp.esm.js"
+        "default": "./dist/evals.esm.js"
+      },
+      "require": {
+        "types": "./dist/metrics/nlp/index.d.ts",
+        "default": "./dist/index.js"
       }
     },
     "./llm": {
       "import": {
         "types": "./dist/metrics/llm/index.d.ts",
-        "default": "./dist/llm.esm.js"
+        "default": "./dist/evals.esm.js"
+      },
+      "require": {
+        "types": "./dist/metrics/llm/index.d.ts",
+        "default": "./dist/index.js"
       }
     },
     "./package.json": "./package.json"
@@ -38,21 +50,25 @@
     "sentiment": "^5.0.2",
     "string-similarity": "^4.0.4",
     "zod": "^3.24.1",
-    "@mastra/core": "^0.2.0-alpha.91"
+    "@mastra/core": "0.1.27-alpha.66"
   },
   "devDependencies": {
+    "@babel/preset-env": "^7.26.0",
+    "@babel/preset-typescript": "^7.26.0",
+    "@jest/globals": "^29.7.0",
     "@tsconfig/recommended": "^1.0.7",
     "@types/difflib": "^0.2.7",
     "@types/fs-extra": "^11.0.4",
+    "@types/jest": "^29.5.12",
     "@types/sentiment": "^5.0.4",
     "@types/string-similarity": "^4.0.2",
-    "tsup": "^8.0.1",
-    "vitest": "^3.0.4"
+    "dts-cli": "^2.0.5",
+    "jest": "^29.7.0",
+    "ts-jest": "^29.2.5"
   },
   "scripts": {
-    "check": "tsc --noEmit",
-    "build": "pnpm check && tsup src/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --dts --clean --treeshake",
-    "dev": "tsup src/index.ts src/metrics/llm/index.ts src/metrics/nlp/index.ts --format esm --dts --watch",
-    "test": "vitest"
+    "build": "dts build",
+    "build:dev": "dts watch",
+    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
   }
 }

package/src/evaluation.test.ts CHANGED Viewed

@@ -1,7 +1,6 @@
-import { Agent } from '@mastra/core/agent';
-import { Metric } from '@mastra/core/eval';
-import { OpenAI } from '@mastra/core/llm/openai';
-import { describe, expect, it } from 'vitest';
+import { Agent } from '@mastra/core';
+import { ModelConfig } from '@mastra/core';
+import { Metric } from '@mastra/core';
 import { evaluate } from './evaluation';
@@ -14,20 +13,20 @@ class TestMetric extends Metric {
   }
 }
-const llm = new OpenAI({
+const modelConfig: ModelConfig = {
+  provider: 'OPEN_AI',
   name: 'gpt-4o',
-});
+  toolChoice: 'auto',
+};
-describe('evaluate', () => {
-  it('should get a text response from the agent', async () => {
-    const electionAgent = new Agent({
-      name: 'US Election agent',
-      instructions: 'You know about the past US elections',
-      llm,
-    });
+it.skip('should get a text response from the agent', async () => {
+  const electionAgent = new Agent({
+    name: 'US Election agent',
+    instructions: 'You know about the past US elections',
+    model: modelConfig,
+  });
-    const result = await evaluate(electionAgent, 'Who won the 2016 US presidential election?', new TestMetric());
+  const result = await evaluate(electionAgent, 'Who won the 2016 US presidential election?', new TestMetric());
-    expect(result.score).toBe(1);
-  }, 10000);
-});
+  expect(result.score).toBe(1);
+}, 10000);

package/src/evaluation.ts CHANGED Viewed

@@ -1,55 +1,20 @@
-import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';
-import { GLOBAL_RUN_ID_ENV_KEY } from './constants';
+import { AvailableHooks, executeHook } from '@mastra/core';
+import { type Agent, type Metric } from '@mastra/core';
 export async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {
-  const testInfo = await getCurrentTestInfo();
-  let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
-  const runId = crypto.randomUUID();
-  const agentOutput = await agent.generate(input, {
-    runId,
-  });
+  const agentOutput = await agent.generate(input);
-  if (!globalRunId) {
-    globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
-    console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
-  }
+  const metricResult = await metric.measure({
+    input: input.toString(),
+    output: agentOutput.text,
+  });
-  const metricResult = await coreEvaluate({
-    agentName: agent.name,
-    input,
-    metric,
+  // capture infomration about the evaluation
+  executeHook(AvailableHooks.ON_EVALUATION, {
+    input: input.toString(),
     output: agentOutput.text,
-    globalRunId,
-    runId,
-    testInfo,
+    result: metricResult,
   });
   return metricResult;
 }
-export const getCurrentTestInfo = async () => {
-  // Jest
-  // @ts-ignore
-  if (typeof expect !== 'undefined' && expect.getState) {
-    // @ts-ignore
-    const state = expect.getState();
-    return {
-      testName: state.currentTestName,
-      testPath: state.testPath,
-    };
-  }
-  try {
-    const vitest = await import('vitest');
-    if (typeof vitest !== 'undefined' && vitest.expect?.getState) {
-      const state = vitest.expect.getState();
-      return {
-        testName: state.currentTestName,
-        testPath: state.testPath,
-      };
-    }
-  } catch {}
-  return null;
-};

package/src/index.ts CHANGED Viewed

	@@ -1,2 +1 @@
1 1	export { evaluate } from './evaluation';
2	- export { attachListeners, globalSetup } from './attachListeners';

package/src/metrics/judge/index.ts CHANGED Viewed

@@ -1,14 +1,13 @@
-import { Agent } from '@mastra/core/agent';
-import { type MastraLLMBase } from '@mastra/core/llm';
+import { Agent, ModelConfig } from '@mastra/core';
 export abstract class MastraAgentJudge {
   protected readonly agent: Agent;
-  constructor(name: string, instructions: string, llm: MastraLLMBase) {
+  constructor(name: string, instructions: string, model: ModelConfig) {
     this.agent = new Agent({
-      name: `Mastra Eval Judge ${llm.name} ${name}`,
+      name: `Mastra Eval Judge ${model.provider} ${name}`,
       instructions: instructions,
-      llm,
+      model,
     });
   }
 }

package/src/metrics/llm/answer-relevancy/index.test.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { OpenAI } from '@mastra/core/llm/openai';
-import { describe, it, expect } from 'vitest';
+import { describe, it, expect, jest } from '@jest/globals';
+import { type ModelConfig } from '@mastra/core';
 import { TestCase } from '../utils';
@@ -91,65 +91,95 @@ const testCases: TestCase[] = [
 ];
 const SECONDS = 10000;
+jest.setTimeout(15 * SECONDS);
-const llm = new OpenAI({
+const modelConfig: ModelConfig = {
+  provider: 'OPEN_AI',
   name: 'gpt-4o',
-});
+  toolChoice: 'auto',
+  apiKey: process.env.OPENAI_API_KEY,
+};
-describe(
-  'AnswerRelevancyMetric',
-  () => {
-    const metric = new AnswerRelevancyMetric(llm);
+describe('AnswerRelevancyMetric', () => {
+  const metric = new AnswerRelevancyMetric(modelConfig);
-    it('should be able to measure a prompt with perfect relevancy', async () => {
-      const result = await metric.measure(testCases[0].input, testCases[0].output);
-      expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
+  it('should be able to measure a prompt with perfect relevancy', async () => {
+    const result = await metric.measure({
+      input: testCases[0].input,
+      output: testCases[0].output,
     });
-    it('should be able to measure a prompt with mostly relevant information', async () => {
-      const result = await metric.measure(testCases[1].input, testCases[1].output);
-      const expectedScore = testCases[1].expectedResult.score;
-      const difference = Math.abs(result.score - expectedScore);
+    expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
+  });
-      expect(Math.round(difference * 10) / 10).toBeLessThanOrEqual(0.1);
+  it('should be able to measure a prompt with mostly relevant information', async () => {
+    const result = await metric.measure({
+      input: testCases[1].input,
+      output: testCases[1].output,
     });
-    it('should be able to measure a prompt with partial relevance', async () => {
-      const result = await metric.measure(testCases[2].input, testCases[2].output);
-      expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[1].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with partial relevance', async () => {
+    const result = await metric.measure({
+      input: testCases[2].input,
+      output: testCases[2].output,
     });
-    it('should be able to measure a prompt with low relevance', async () => {
-      const result = await metric.measure(testCases[3].input, testCases[3].output);
-      expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with low relevance', async () => {
+    const result = await metric.measure({
+      input: testCases[3].input,
+      output: testCases[3].output,
     });
-    it('should be able to measure a prompt with empty output', async () => {
-      const result = await metric.measure(testCases[5].input, testCases[5].output);
-      expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with empty output', async () => {
+    const result = await metric.measure({
+      input: testCases[5].input,
+      output: testCases[5].output,
     });
-    it('should be able to measure a prompt with incorrect but relevant answer', async () => {
-      const result = await metric.measure(testCases[6].input, testCases[6].output);
-      expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with incorrect but relevant answer', async () => {
+    const result = await metric.measure({
+      input: testCases[6].input,
+      output: testCases[6].output,
     });
+    expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
+  });
-    it('should be able to measure a prompt with a single word correct answer', async () => {
-      const result = await metric.measure(testCases[7].input, testCases[7].output);
-      expect(result.score).toBeCloseTo(testCases[7].expectedResult.score, 1);
+  it('should be able to measure a prompt with a single word correct answer', async () => {
+    const result = await metric.measure({
+      input: testCases[7].input,
+      output: testCases[7].output,
     });
-    it('should be able to measure a prompt with multiple questions', async () => {
-      const result = await metric.measure(testCases[8].input, testCases[8].output);
-      expect(result.score).toBeCloseTo(testCases[8].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[7].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with multiple questions', async () => {
+    const result = await metric.measure({
+      input: testCases[8].input,
+      output: testCases[8].output,
     });
-    it('should be able to measure a prompt with technical gibberish', async () => {
-      const result = await metric.measure(testCases[9].input, testCases[9].output);
-      expect(result.score).toBeCloseTo(testCases[9].expectedResult.score, 1);
+    expect(result.score).toBeCloseTo(testCases[8].expectedResult.score, 1);
+  });
+  it('should be able to measure a prompt with technical gibberish', async () => {
+    const result = await metric.measure({
+      input: testCases[9].input,
+      output: testCases[9].output,
     });
-  },
-  {
-    timeout: 15 * SECONDS,
-  },
-);
+    expect(result.score).toBeCloseTo(testCases[9].expectedResult.score, 1);
+  });
+});

package/src/metrics/llm/answer-relevancy/index.ts CHANGED Viewed

@@ -1,7 +1,5 @@
-import { Metric } from '@mastra/core/eval';
-import { type MastraLLMBase } from '@mastra/core/llm';
+import { Metric, MetricResult, ModelConfig } from '@mastra/core';
-import { type MetricResultWithReason } from '../types';
 import { roundToTwoDecimals } from '../utils';
 import { AnswerRelevancyJudge } from './metricJudge';
@@ -16,24 +14,23 @@ export class AnswerRelevancyMetric extends Metric {
   private uncertaintyWeight: number;
   private scale: number;
-  constructor(llm: MastraLLMBase, { uncertaintyWeight = 0.3, scale = 1 }: AnswerRelevancyMetricOptions = {}) {
+  constructor(model: ModelConfig, { uncertaintyWeight = 0.3, scale = 1 }: AnswerRelevancyMetricOptions = {}) {
     super();
     this.uncertaintyWeight = uncertaintyWeight;
-    this.judge = new AnswerRelevancyJudge(llm);
+    this.judge = new AnswerRelevancyJudge(model);
     this.scale = scale;
   }
-  async measure(input: string, output: string): Promise<MetricResultWithReason> {
+  async measure({ input, output }: { input: string; output: string }): Promise<MetricResult> {
     const verdicts = await this.judge.evaluate(input, output);
     const score = this.calculateScore(verdicts);
     const reason = await this.judge.getReason(input, output, score, this.scale, verdicts);
     return {
       score,
-      info: {
-        reason,
-      },
+      reason,
     };
   }

package/src/metrics/llm/answer-relevancy/metricJudge.ts CHANGED Viewed

@@ -1,18 +1,18 @@
-import { type MastraLLMBase } from '@mastra/core/llm';
+import { ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';
 import {
   generateEvaluatePrompt,
+  generateReasonPrompt,
   ANSWER_RELEVANCY_AGENT_INSTRUCTIONS,
   generateEvaluationStatementsPrompt,
-  generateReasonPrompt,
 } from './prompts';
 export class AnswerRelevancyJudge extends MastraAgentJudge {
-  constructor(llm: MastraLLMBase) {
-    super('Answer Relevancy', ANSWER_RELEVANCY_AGENT_INSTRUCTIONS, llm);
+  constructor(model: ModelConfig) {
+    super('Answer Relevancy', ANSWER_RELEVANCY_AGENT_INSTRUCTIONS, model);
   }
   async evaluate(input: string, actualOutput: string): Promise<{ verdict: string; reason: string }[]> {
@@ -50,7 +50,6 @@ export class AnswerRelevancyJudge extends MastraAgentJudge {
         reason: z.string(),
       }),
     });
     return result.object.reason;
   }
 }