npm - @mastra/evals - Versions diffs - 0.10.6 → 0.10.8-alpha.0 - Mend

@mastra/evals 0.10.6 → 0.10.8-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/{dist-IVAARSAW.cjs → dist-JD6MNRVB.cjs} +8 -8
package/dist/{dist-5JXLPLM2.js → dist-ZXFGMR47.js} +8 -8
package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/dist/{magic-string.es-LD4FLE5J.js → magic-string.es-MNZ6ZGOL.js} +1 -1
package/dist/{magic-string.es-66FD77JZ.cjs → magic-string.es-T2QO2IBJ.cjs} +1 -1
package/dist/scorers/llm/index.cjs +27 -22
package/dist/scorers/llm/index.js +27 -22
package/package.json +4 -4

package/dist/{dist-IVAARSAW.cjs → dist-JD6MNRVB.cjs} RENAMED Viewed

@@ -11988,7 +11988,7 @@ function createTestHook(name, handler) {
   };
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
 var NAME_WORKER_STATE = "__vitest_worker__";
 function getWorkerState() {
   const workerState = globalThis[NAME_WORKER_STATE];
@@ -12036,7 +12036,7 @@ async function waitForImportsToResolve() {
   await waitForImportsToResolve();
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
 var commonjsGlobal = typeof globalThis !== "undefined" ? globalThis : typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : {};
 function getDefaultExportFromCjs3(x) {
   return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, "default") ? x["default"] : x;
@@ -12889,7 +12889,7 @@ function offsetToLineNumber(source, offset) {
   return line + 1;
 }
 async function saveInlineSnapshots(environment, snapshots) {
-  const MagicString = (await import('./magic-string.es-66FD77JZ.cjs')).default;
+  const MagicString = (await import('./magic-string.es-T2QO2IBJ.cjs')).default;
   const files = new Set(snapshots.map((i) => i.file));
   await Promise.all(Array.from(files).map(async (file) => {
     const snaps = snapshots.filter((i) => i.file === file);
@@ -13666,7 +13666,7 @@ var SnapshotClient = class {
   }
 };
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
 var RealDate = Date;
 var now2 = null;
 var MockDate = class _MockDate extends RealDate {
@@ -13714,7 +13714,7 @@ function resetDate() {
   globalThis.Date = RealDate;
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
 var unsupported = [
   "matchSnapshot",
   "toMatchSnapshot",
@@ -16400,7 +16400,7 @@ function getImporter(name) {
   return stack?.file || "";
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
 var benchFns = /* @__PURE__ */ new WeakMap();
 var benchOptsMap = /* @__PURE__ */ new WeakMap();
 var bench = createBenchmark(function(name, fn2 = noop, options = {}) {
@@ -16426,12 +16426,12 @@ function formatName2(name) {
   return typeof name === "string" ? name : typeof name === "function" ? name.name || "<anonymous>" : String(name);
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
 chunkIS3BZTWE_cjs.__toESM(require_dist(), 1);
 var assertType = function assertType2() {
 };
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/index.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/index.js
 var import_expect_type2 = chunkIS3BZTWE_cjs.__toESM(require_dist(), 1);
 var export_expectTypeOf = import_expect_type2.expectTypeOf;
 /*! Bundled license information:

package/dist/{dist-5JXLPLM2.js → dist-ZXFGMR47.js} RENAMED Viewed

@@ -11986,7 +11986,7 @@ function createTestHook(name, handler) {
   };
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
 var NAME_WORKER_STATE = "__vitest_worker__";
 function getWorkerState() {
   const workerState = globalThis[NAME_WORKER_STATE];
@@ -12034,7 +12034,7 @@ async function waitForImportsToResolve() {
   await waitForImportsToResolve();
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
 var commonjsGlobal = typeof globalThis !== "undefined" ? globalThis : typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : {};
 function getDefaultExportFromCjs3(x) {
   return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, "default") ? x["default"] : x;
@@ -12887,7 +12887,7 @@ function offsetToLineNumber(source, offset) {
   return line + 1;
 }
 async function saveInlineSnapshots(environment, snapshots) {
-  const MagicString = (await import('./magic-string.es-LD4FLE5J.js')).default;
+  const MagicString = (await import('./magic-string.es-MNZ6ZGOL.js')).default;
   const files = new Set(snapshots.map((i) => i.file));
   await Promise.all(Array.from(files).map(async (file) => {
     const snaps = snapshots.filter((i) => i.file === file);
@@ -13664,7 +13664,7 @@ var SnapshotClient = class {
   }
 };
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
 var RealDate = Date;
 var now2 = null;
 var MockDate = class _MockDate extends RealDate {
@@ -13712,7 +13712,7 @@ function resetDate() {
   globalThis.Date = RealDate;
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
 var unsupported = [
   "matchSnapshot",
   "toMatchSnapshot",
@@ -16398,7 +16398,7 @@ function getImporter(name) {
   return stack?.file || "";
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
 var benchFns = /* @__PURE__ */ new WeakMap();
 var benchOptsMap = /* @__PURE__ */ new WeakMap();
 var bench = createBenchmark(function(name, fn2 = noop, options = {}) {
@@ -16424,12 +16424,12 @@ function formatName2(name) {
   return typeof name === "string" ? name : typeof name === "function" ? name.name || "<anonymous>" : String(name);
 }
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
 __toESM(require_dist(), 1);
 var assertType = function assertType2() {
 };
-// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/index.js
+// ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/index.js
 var import_expect_type2 = __toESM(require_dist(), 1);
 var export_expectTypeOf = import_expect_type2.expectTypeOf;
 /*! Bundled license information:

package/dist/index.cjs CHANGED Viewed

@@ -41,7 +41,7 @@ var getCurrentTestInfo = async () => {
     };
   }
   try {
-    const vitest = await import('./dist-IVAARSAW.cjs');
+    const vitest = await import('./dist-JD6MNRVB.cjs');
     if (typeof vitest !== "undefined" && vitest.expect?.getState) {
       const state = vitest.expect.getState();
       return {

package/dist/index.js CHANGED Viewed

@@ -39,7 +39,7 @@ var getCurrentTestInfo = async () => {
     };
   }
   try {
-    const vitest = await import('./dist-5JXLPLM2.js');
+    const vitest = await import('./dist-ZXFGMR47.js');
     if (typeof vitest !== "undefined" && vitest.expect?.getState) {
       const state = vitest.expect.getState();
       return {

package/dist/{magic-string.es-LD4FLE5J.js → magic-string.es-MNZ6ZGOL.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-// ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.3/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
+// ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.4/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
 var comma = ",".charCodeAt(0);
 var semicolon = ";".charCodeAt(0);
 var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

package/dist/{magic-string.es-66FD77JZ.cjs → magic-string.es-T2QO2IBJ.cjs} RENAMED Viewed

@@ -2,7 +2,7 @@
 Object.defineProperty(exports, '__esModule', { value: true });
-// ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.3/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
+// ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.4/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
 var comma = ",".charCodeAt(0);
 var semicolon = ";".charCodeAt(0);
 var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

package/dist/scorers/llm/index.cjs CHANGED Viewed

@@ -236,7 +236,7 @@ function createAnswerRelevancyScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })),
+      outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
       createPrompt: ({ run }) => createScorePrompt(JSON.stringify(run.input), run.extractStepResult?.statements || [])
     },
     reason: {
@@ -246,18 +246,18 @@ function createAnswerRelevancyScorer({
           input: run.input.map((input) => input.content).join(", "),
           output: run.output.text,
           score: run.score,
-          results: run.analyzeStepResult,
+          results: run.analyzeStepResult.results,
           scale: options.scale
         });
       }
     },
     calculateScore: ({ run }) => {
-      if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
+      if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
         return 0;
       }
-      const numberOfResults = run.analyzeStepResult.length;
+      const numberOfResults = run.analyzeStepResult.results.length;
       let relevancyCount = 0;
-      for (const { result } of run.analyzeStepResult) {
+      for (const { result } of run.analyzeStepResult.results) {
         if (result.trim().toLowerCase() === "yes") {
           relevancyCount++;
         } else if (result.trim().toLowerCase() === "unsure") {
@@ -455,7 +455,7 @@ function createFaithfulnessScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })),
+      outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createFaithfulnessAnalyzePrompt({
           claims: run.extractStepResult || [],
@@ -465,8 +465,8 @@ function createFaithfulnessScorer({
       }
     },
     calculateScore: ({ run }) => {
-      const totalClaims = run.analyzeStepResult.length;
-      const supportedClaims = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
+      const totalClaims = run.analyzeStepResult.verdicts.length;
+      const supportedClaims = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
       if (totalClaims === 0) {
         return 0;
       }
@@ -482,7 +482,7 @@ function createFaithfulnessScorer({
           context: options?.context || [],
           score: run.score,
           scale: options?.scale || 1,
-          verdicts: run.analyzeStepResult || []
+          verdicts: run.analyzeStepResult?.verdicts || []
         });
         return prompt;
       }
@@ -617,7 +617,7 @@ function createBiasScorer({ model, options }) {
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })),
+      outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createBiasAnalyzePrompt({
           output: run.output.text,
@@ -627,17 +627,20 @@ function createBiasScorer({ model, options }) {
       }
     },
     calculateScore: ({ run }) => {
-      if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
+      if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
         return 0;
       }
-      const biasedVerdicts = run.analyzeStepResult.filter((v) => v.result.toLowerCase() === "yes");
-      const score = biasedVerdicts.length / run.analyzeStepResult.length;
+      const biasedVerdicts = run.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
+      const score = biasedVerdicts.length / run.analyzeStepResult.results.length;
       return roundToTwoDecimals2(score * (options?.scale || 1));
     },
     reason: {
       description: "Reason about the results",
       createPrompt: ({ run }) => {
-        return createBiasReasonPrompt({ score: run.score, biases: run.analyzeStepResult?.map((v) => v.reason) || [] });
+        return createBiasReasonPrompt({
+          score: run.score,
+          biases: run.analyzeStepResult?.results.map((v) => v.reason) || []
+        });
       }
     }
   });
@@ -858,7 +861,9 @@ function createHallucinationScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: zod.z.array(zod.z.object({ statement: zod.z.string(), verdict: zod.z.string(), reason: zod.z.string() })),
+      outputSchema: zod.z.object({
+        verdicts: zod.z.array(zod.z.object({ statement: zod.z.string(), verdict: zod.z.string(), reason: zod.z.string() }))
+      }),
       createPrompt: ({ run }) => {
         const prompt = createHallucinationAnalyzePrompt({
           claims: run.extractStepResult.claims,
@@ -868,8 +873,8 @@ function createHallucinationScorer({
       }
     },
     calculateScore: ({ run }) => {
-      const totalStatements = run.analyzeStepResult.length;
-      const contradictedStatements = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
+      const totalStatements = run.analyzeStepResult.verdicts.length;
+      const contradictedStatements = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
       if (totalStatements === 0) {
         return 0;
       }
@@ -885,7 +890,7 @@ function createHallucinationScorer({
           context: run?.additionalContext?.context || [],
           score: run.score,
           scale: options?.scale || 1,
-          verdicts: run.analyzeStepResult || []
+          verdicts: run.analyzeStepResult?.verdicts || []
         });
         return prompt;
       }
@@ -991,7 +996,7 @@ function createToxicityScorer({ model, options }) {
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })),
+      outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createToxicityAnalyzePrompt({
           input: run.input.map((input) => input.content).join(", "),
@@ -1001,12 +1006,12 @@ function createToxicityScorer({ model, options }) {
       }
     },
     calculateScore: ({ run }) => {
-      const numberOfVerdicts = run.analyzeStepResult?.length || 0;
+      const numberOfVerdicts = run.analyzeStepResult?.verdicts.length || 0;
       if (numberOfVerdicts === 0) {
         return 1;
       }
       let toxicityCount = 0;
-      for (const { verdict } of run.analyzeStepResult) {
+      for (const { verdict } of run.analyzeStepResult.verdicts) {
         if (verdict.trim().toLowerCase() === "yes") {
           toxicityCount++;
         }
@@ -1019,7 +1024,7 @@ function createToxicityScorer({ model, options }) {
       createPrompt: ({ run }) => {
         const prompt = createToxicityReasonPrompt({
           score: run.score,
-          toxics: run.analyzeStepResult?.map((v) => v.reason) || []
+          toxics: run.analyzeStepResult?.verdicts.map((v) => v.reason) || []
         });
         return prompt;
       }

package/dist/scorers/llm/index.js CHANGED Viewed

@@ -234,7 +234,7 @@ function createAnswerRelevancyScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: z.array(z.object({ result: z.string(), reason: z.string() })),
+      outputSchema: z.object({ results: z.array(z.object({ result: z.string(), reason: z.string() })) }),
       createPrompt: ({ run }) => createScorePrompt(JSON.stringify(run.input), run.extractStepResult?.statements || [])
     },
     reason: {
@@ -244,18 +244,18 @@ function createAnswerRelevancyScorer({
           input: run.input.map((input) => input.content).join(", "),
           output: run.output.text,
           score: run.score,
-          results: run.analyzeStepResult,
+          results: run.analyzeStepResult.results,
           scale: options.scale
         });
       }
     },
     calculateScore: ({ run }) => {
-      if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
+      if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
         return 0;
       }
-      const numberOfResults = run.analyzeStepResult.length;
+      const numberOfResults = run.analyzeStepResult.results.length;
       let relevancyCount = 0;
-      for (const { result } of run.analyzeStepResult) {
+      for (const { result } of run.analyzeStepResult.results) {
         if (result.trim().toLowerCase() === "yes") {
           relevancyCount++;
         } else if (result.trim().toLowerCase() === "unsure") {
@@ -453,7 +453,7 @@ function createFaithfulnessScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: z.array(z.object({ verdict: z.string(), reason: z.string() })),
+      outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createFaithfulnessAnalyzePrompt({
           claims: run.extractStepResult || [],
@@ -463,8 +463,8 @@ function createFaithfulnessScorer({
       }
     },
     calculateScore: ({ run }) => {
-      const totalClaims = run.analyzeStepResult.length;
-      const supportedClaims = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
+      const totalClaims = run.analyzeStepResult.verdicts.length;
+      const supportedClaims = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
       if (totalClaims === 0) {
         return 0;
       }
@@ -480,7 +480,7 @@ function createFaithfulnessScorer({
           context: options?.context || [],
           score: run.score,
           scale: options?.scale || 1,
-          verdicts: run.analyzeStepResult || []
+          verdicts: run.analyzeStepResult?.verdicts || []
         });
         return prompt;
       }
@@ -615,7 +615,7 @@ function createBiasScorer({ model, options }) {
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: z.array(z.object({ result: z.string(), reason: z.string() })),
+      outputSchema: z.object({ results: z.array(z.object({ result: z.string(), reason: z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createBiasAnalyzePrompt({
           output: run.output.text,
@@ -625,17 +625,20 @@ function createBiasScorer({ model, options }) {
       }
     },
     calculateScore: ({ run }) => {
-      if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
+      if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
         return 0;
       }
-      const biasedVerdicts = run.analyzeStepResult.filter((v) => v.result.toLowerCase() === "yes");
-      const score = biasedVerdicts.length / run.analyzeStepResult.length;
+      const biasedVerdicts = run.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
+      const score = biasedVerdicts.length / run.analyzeStepResult.results.length;
       return roundToTwoDecimals2(score * (options?.scale || 1));
     },
     reason: {
       description: "Reason about the results",
       createPrompt: ({ run }) => {
-        return createBiasReasonPrompt({ score: run.score, biases: run.analyzeStepResult?.map((v) => v.reason) || [] });
+        return createBiasReasonPrompt({
+          score: run.score,
+          biases: run.analyzeStepResult?.results.map((v) => v.reason) || []
+        });
       }
     }
   });
@@ -856,7 +859,9 @@ function createHallucinationScorer({
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: z.array(z.object({ statement: z.string(), verdict: z.string(), reason: z.string() })),
+      outputSchema: z.object({
+        verdicts: z.array(z.object({ statement: z.string(), verdict: z.string(), reason: z.string() }))
+      }),
       createPrompt: ({ run }) => {
         const prompt = createHallucinationAnalyzePrompt({
           claims: run.extractStepResult.claims,
@@ -866,8 +871,8 @@ function createHallucinationScorer({
       }
     },
     calculateScore: ({ run }) => {
-      const totalStatements = run.analyzeStepResult.length;
-      const contradictedStatements = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
+      const totalStatements = run.analyzeStepResult.verdicts.length;
+      const contradictedStatements = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
       if (totalStatements === 0) {
         return 0;
       }
@@ -883,7 +888,7 @@ function createHallucinationScorer({
           context: run?.additionalContext?.context || [],
           score: run.score,
           scale: options?.scale || 1,
-          verdicts: run.analyzeStepResult || []
+          verdicts: run.analyzeStepResult?.verdicts || []
         });
         return prompt;
       }
@@ -989,7 +994,7 @@ function createToxicityScorer({ model, options }) {
     },
     analyze: {
       description: "Score the relevance of the statements to the input",
-      outputSchema: z.array(z.object({ verdict: z.string(), reason: z.string() })),
+      outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
       createPrompt: ({ run }) => {
         const prompt = createToxicityAnalyzePrompt({
           input: run.input.map((input) => input.content).join(", "),
@@ -999,12 +1004,12 @@ function createToxicityScorer({ model, options }) {
       }
     },
     calculateScore: ({ run }) => {
-      const numberOfVerdicts = run.analyzeStepResult?.length || 0;
+      const numberOfVerdicts = run.analyzeStepResult?.verdicts.length || 0;
       if (numberOfVerdicts === 0) {
         return 1;
       }
       let toxicityCount = 0;
-      for (const { verdict } of run.analyzeStepResult) {
+      for (const { verdict } of run.analyzeStepResult.verdicts) {
         if (verdict.trim().toLowerCase() === "yes") {
           toxicityCount++;
         }
@@ -1017,7 +1022,7 @@ function createToxicityScorer({ model, options }) {
       createPrompt: ({ run }) => {
         const prompt = createToxicityReasonPrompt({
           score: run.score,
-          toxics: run.analyzeStepResult?.map((v) => v.reason) || []
+          toxics: run.analyzeStepResult?.verdicts.map((v) => v.reason) || []
         });
         return prompt;
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/evals",
-  "version": "0.10.6",
+  "version": "0.10.8-alpha.0",
   "description": "",
   "type": "module",
   "files": [
@@ -84,7 +84,7 @@
     "zod": "^3.25.67"
   },
   "peerDependencies": {
-    "@mastra/core": "^0.10.1-alpha.0",
+    "@mastra/core": ">=0.11.0-0 <0.12.0-0",
     "ai": "^4.0.0"
   },
   "devDependencies": {
@@ -100,8 +100,8 @@
     "tsup": "^8.5.0",
     "typescript": "^5.8.3",
     "vitest": "^3.2.4",
-    "@internal/lint": "0.0.21",
-    "@mastra/core": "0.11.0"
+    "@internal/lint": "0.0.23",
+    "@mastra/core": "0.12.0-alpha.0"
   },
   "scripts": {
     "check": "tsc --noEmit",