@mastra/evals 0.10.7 → 0.10.8-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11988,7 +11988,7 @@ function createTestHook(name, handler) {
11988
11988
  };
11989
11989
  }
11990
11990
 
11991
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
11991
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
11992
11992
  var NAME_WORKER_STATE = "__vitest_worker__";
11993
11993
  function getWorkerState() {
11994
11994
  const workerState = globalThis[NAME_WORKER_STATE];
@@ -12036,7 +12036,7 @@ async function waitForImportsToResolve() {
12036
12036
  await waitForImportsToResolve();
12037
12037
  }
12038
12038
 
12039
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
12039
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
12040
12040
  var commonjsGlobal = typeof globalThis !== "undefined" ? globalThis : typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : {};
12041
12041
  function getDefaultExportFromCjs3(x) {
12042
12042
  return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, "default") ? x["default"] : x;
@@ -12889,7 +12889,7 @@ function offsetToLineNumber(source, offset) {
12889
12889
  return line + 1;
12890
12890
  }
12891
12891
  async function saveInlineSnapshots(environment, snapshots) {
12892
- const MagicString = (await import('./magic-string.es-66FD77JZ.cjs')).default;
12892
+ const MagicString = (await import('./magic-string.es-T2QO2IBJ.cjs')).default;
12893
12893
  const files = new Set(snapshots.map((i) => i.file));
12894
12894
  await Promise.all(Array.from(files).map(async (file) => {
12895
12895
  const snaps = snapshots.filter((i) => i.file === file);
@@ -13666,7 +13666,7 @@ var SnapshotClient = class {
13666
13666
  }
13667
13667
  };
13668
13668
 
13669
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
13669
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
13670
13670
  var RealDate = Date;
13671
13671
  var now2 = null;
13672
13672
  var MockDate = class _MockDate extends RealDate {
@@ -13714,7 +13714,7 @@ function resetDate() {
13714
13714
  globalThis.Date = RealDate;
13715
13715
  }
13716
13716
 
13717
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
13717
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
13718
13718
  var unsupported = [
13719
13719
  "matchSnapshot",
13720
13720
  "toMatchSnapshot",
@@ -16400,7 +16400,7 @@ function getImporter(name) {
16400
16400
  return stack?.file || "";
16401
16401
  }
16402
16402
 
16403
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
16403
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
16404
16404
  var benchFns = /* @__PURE__ */ new WeakMap();
16405
16405
  var benchOptsMap = /* @__PURE__ */ new WeakMap();
16406
16406
  var bench = createBenchmark(function(name, fn2 = noop, options = {}) {
@@ -16426,12 +16426,12 @@ function formatName2(name) {
16426
16426
  return typeof name === "string" ? name : typeof name === "function" ? name.name || "<anonymous>" : String(name);
16427
16427
  }
16428
16428
 
16429
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
16429
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
16430
16430
  chunkIS3BZTWE_cjs.__toESM(require_dist(), 1);
16431
16431
  var assertType = function assertType2() {
16432
16432
  };
16433
16433
 
16434
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/index.js
16434
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/index.js
16435
16435
  var import_expect_type2 = chunkIS3BZTWE_cjs.__toESM(require_dist(), 1);
16436
16436
  var export_expectTypeOf = import_expect_type2.expectTypeOf;
16437
16437
  /*! Bundled license information:
@@ -11986,7 +11986,7 @@ function createTestHook(name, handler) {
11986
11986
  };
11987
11987
  }
11988
11988
 
11989
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
11989
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/utils.XdZDrNZV.js
11990
11990
  var NAME_WORKER_STATE = "__vitest_worker__";
11991
11991
  function getWorkerState() {
11992
11992
  const workerState = globalThis[NAME_WORKER_STATE];
@@ -12034,7 +12034,7 @@ async function waitForImportsToResolve() {
12034
12034
  await waitForImportsToResolve();
12035
12035
  }
12036
12036
 
12037
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
12037
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/_commonjsHelpers.BFTU3MAI.js
12038
12038
  var commonjsGlobal = typeof globalThis !== "undefined" ? globalThis : typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : {};
12039
12039
  function getDefaultExportFromCjs3(x) {
12040
12040
  return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, "default") ? x["default"] : x;
@@ -12887,7 +12887,7 @@ function offsetToLineNumber(source, offset) {
12887
12887
  return line + 1;
12888
12888
  }
12889
12889
  async function saveInlineSnapshots(environment, snapshots) {
12890
- const MagicString = (await import('./magic-string.es-LD4FLE5J.js')).default;
12890
+ const MagicString = (await import('./magic-string.es-MNZ6ZGOL.js')).default;
12891
12891
  const files = new Set(snapshots.map((i) => i.file));
12892
12892
  await Promise.all(Array.from(files).map(async (file) => {
12893
12893
  const snaps = snapshots.filter((i) => i.file === file);
@@ -13664,7 +13664,7 @@ var SnapshotClient = class {
13664
13664
  }
13665
13665
  };
13666
13666
 
13667
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
13667
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/date.Bq6ZW5rf.js
13668
13668
  var RealDate = Date;
13669
13669
  var now2 = null;
13670
13670
  var MockDate = class _MockDate extends RealDate {
@@ -13712,7 +13712,7 @@ function resetDate() {
13712
13712
  globalThis.Date = RealDate;
13713
13713
  }
13714
13714
 
13715
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
13715
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/vi.bdSIJ99Y.js
13716
13716
  var unsupported = [
13717
13717
  "matchSnapshot",
13718
13718
  "toMatchSnapshot",
@@ -16398,7 +16398,7 @@ function getImporter(name) {
16398
16398
  return stack?.file || "";
16399
16399
  }
16400
16400
 
16401
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
16401
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/benchmark.CYdenmiT.js
16402
16402
  var benchFns = /* @__PURE__ */ new WeakMap();
16403
16403
  var benchOptsMap = /* @__PURE__ */ new WeakMap();
16404
16404
  var bench = createBenchmark(function(name, fn2 = noop, options = {}) {
@@ -16424,12 +16424,12 @@ function formatName2(name) {
16424
16424
  return typeof name === "string" ? name : typeof name === "function" ? name.name || "<anonymous>" : String(name);
16425
16425
  }
16426
16426
 
16427
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
16427
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/chunks/index.CdQS2e2Q.js
16428
16428
  __toESM(require_dist(), 1);
16429
16429
  var assertType = function assertType2() {
16430
16430
  };
16431
16431
 
16432
- // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.2_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_bcb3b08cf6e48f0a4d90e52d1b155bc5/node_modules/vitest/dist/index.js
16432
+ // ../../node_modules/.pnpm/vitest@3.2.4_@types+debug@4.1.12_@types+node@20.19.9_@vitest+ui@3.2.3_jiti@2.4.2_jsdom@_0090e69ea15e68f4eaa34b37eb448faf/node_modules/vitest/dist/index.js
16433
16433
  var import_expect_type2 = __toESM(require_dist(), 1);
16434
16434
  var export_expectTypeOf = import_expect_type2.expectTypeOf;
16435
16435
  /*! Bundled license information:
package/dist/index.cjs CHANGED
@@ -41,7 +41,7 @@ var getCurrentTestInfo = async () => {
41
41
  };
42
42
  }
43
43
  try {
44
- const vitest = await import('./dist-IVAARSAW.cjs');
44
+ const vitest = await import('./dist-JD6MNRVB.cjs');
45
45
  if (typeof vitest !== "undefined" && vitest.expect?.getState) {
46
46
  const state = vitest.expect.getState();
47
47
  return {
package/dist/index.js CHANGED
@@ -39,7 +39,7 @@ var getCurrentTestInfo = async () => {
39
39
  };
40
40
  }
41
41
  try {
42
- const vitest = await import('./dist-5JXLPLM2.js');
42
+ const vitest = await import('./dist-ZXFGMR47.js');
43
43
  if (typeof vitest !== "undefined" && vitest.expect?.getState) {
44
44
  const state = vitest.expect.getState();
45
45
  return {
@@ -1,4 +1,4 @@
1
- // ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.3/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
1
+ // ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.4/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
2
2
  var comma = ",".charCodeAt(0);
3
3
  var semicolon = ";".charCodeAt(0);
4
4
  var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -2,7 +2,7 @@
2
2
 
3
3
  Object.defineProperty(exports, '__esModule', { value: true });
4
4
 
5
- // ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.3/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
5
+ // ../../node_modules/.pnpm/@jridgewell+sourcemap-codec@1.5.4/node_modules/@jridgewell/sourcemap-codec/dist/sourcemap-codec.mjs
6
6
  var comma = ",".charCodeAt(0);
7
7
  var semicolon = ";".charCodeAt(0);
8
8
  var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -236,7 +236,7 @@ function createAnswerRelevancyScorer({
236
236
  },
237
237
  analyze: {
238
238
  description: "Score the relevance of the statements to the input",
239
- outputSchema: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })),
239
+ outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
240
240
  createPrompt: ({ run }) => createScorePrompt(JSON.stringify(run.input), run.extractStepResult?.statements || [])
241
241
  },
242
242
  reason: {
@@ -246,18 +246,18 @@ function createAnswerRelevancyScorer({
246
246
  input: run.input.map((input) => input.content).join(", "),
247
247
  output: run.output.text,
248
248
  score: run.score,
249
- results: run.analyzeStepResult,
249
+ results: run.analyzeStepResult.results,
250
250
  scale: options.scale
251
251
  });
252
252
  }
253
253
  },
254
254
  calculateScore: ({ run }) => {
255
- if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
255
+ if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
256
256
  return 0;
257
257
  }
258
- const numberOfResults = run.analyzeStepResult.length;
258
+ const numberOfResults = run.analyzeStepResult.results.length;
259
259
  let relevancyCount = 0;
260
- for (const { result } of run.analyzeStepResult) {
260
+ for (const { result } of run.analyzeStepResult.results) {
261
261
  if (result.trim().toLowerCase() === "yes") {
262
262
  relevancyCount++;
263
263
  } else if (result.trim().toLowerCase() === "unsure") {
@@ -455,7 +455,7 @@ function createFaithfulnessScorer({
455
455
  },
456
456
  analyze: {
457
457
  description: "Score the relevance of the statements to the input",
458
- outputSchema: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })),
458
+ outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
459
459
  createPrompt: ({ run }) => {
460
460
  const prompt = createFaithfulnessAnalyzePrompt({
461
461
  claims: run.extractStepResult || [],
@@ -465,8 +465,8 @@ function createFaithfulnessScorer({
465
465
  }
466
466
  },
467
467
  calculateScore: ({ run }) => {
468
- const totalClaims = run.analyzeStepResult.length;
469
- const supportedClaims = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
468
+ const totalClaims = run.analyzeStepResult.verdicts.length;
469
+ const supportedClaims = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
470
470
  if (totalClaims === 0) {
471
471
  return 0;
472
472
  }
@@ -482,7 +482,7 @@ function createFaithfulnessScorer({
482
482
  context: options?.context || [],
483
483
  score: run.score,
484
484
  scale: options?.scale || 1,
485
- verdicts: run.analyzeStepResult || []
485
+ verdicts: run.analyzeStepResult?.verdicts || []
486
486
  });
487
487
  return prompt;
488
488
  }
@@ -617,7 +617,7 @@ function createBiasScorer({ model, options }) {
617
617
  },
618
618
  analyze: {
619
619
  description: "Score the relevance of the statements to the input",
620
- outputSchema: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })),
620
+ outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
621
621
  createPrompt: ({ run }) => {
622
622
  const prompt = createBiasAnalyzePrompt({
623
623
  output: run.output.text,
@@ -627,17 +627,20 @@ function createBiasScorer({ model, options }) {
627
627
  }
628
628
  },
629
629
  calculateScore: ({ run }) => {
630
- if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
630
+ if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
631
631
  return 0;
632
632
  }
633
- const biasedVerdicts = run.analyzeStepResult.filter((v) => v.result.toLowerCase() === "yes");
634
- const score = biasedVerdicts.length / run.analyzeStepResult.length;
633
+ const biasedVerdicts = run.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
634
+ const score = biasedVerdicts.length / run.analyzeStepResult.results.length;
635
635
  return roundToTwoDecimals2(score * (options?.scale || 1));
636
636
  },
637
637
  reason: {
638
638
  description: "Reason about the results",
639
639
  createPrompt: ({ run }) => {
640
- return createBiasReasonPrompt({ score: run.score, biases: run.analyzeStepResult?.map((v) => v.reason) || [] });
640
+ return createBiasReasonPrompt({
641
+ score: run.score,
642
+ biases: run.analyzeStepResult?.results.map((v) => v.reason) || []
643
+ });
641
644
  }
642
645
  }
643
646
  });
@@ -858,7 +861,9 @@ function createHallucinationScorer({
858
861
  },
859
862
  analyze: {
860
863
  description: "Score the relevance of the statements to the input",
861
- outputSchema: zod.z.array(zod.z.object({ statement: zod.z.string(), verdict: zod.z.string(), reason: zod.z.string() })),
864
+ outputSchema: zod.z.object({
865
+ verdicts: zod.z.array(zod.z.object({ statement: zod.z.string(), verdict: zod.z.string(), reason: zod.z.string() }))
866
+ }),
862
867
  createPrompt: ({ run }) => {
863
868
  const prompt = createHallucinationAnalyzePrompt({
864
869
  claims: run.extractStepResult.claims,
@@ -868,8 +873,8 @@ function createHallucinationScorer({
868
873
  }
869
874
  },
870
875
  calculateScore: ({ run }) => {
871
- const totalStatements = run.analyzeStepResult.length;
872
- const contradictedStatements = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
876
+ const totalStatements = run.analyzeStepResult.verdicts.length;
877
+ const contradictedStatements = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
873
878
  if (totalStatements === 0) {
874
879
  return 0;
875
880
  }
@@ -885,7 +890,7 @@ function createHallucinationScorer({
885
890
  context: run?.additionalContext?.context || [],
886
891
  score: run.score,
887
892
  scale: options?.scale || 1,
888
- verdicts: run.analyzeStepResult || []
893
+ verdicts: run.analyzeStepResult?.verdicts || []
889
894
  });
890
895
  return prompt;
891
896
  }
@@ -991,7 +996,7 @@ function createToxicityScorer({ model, options }) {
991
996
  },
992
997
  analyze: {
993
998
  description: "Score the relevance of the statements to the input",
994
- outputSchema: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })),
999
+ outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
995
1000
  createPrompt: ({ run }) => {
996
1001
  const prompt = createToxicityAnalyzePrompt({
997
1002
  input: run.input.map((input) => input.content).join(", "),
@@ -1001,12 +1006,12 @@ function createToxicityScorer({ model, options }) {
1001
1006
  }
1002
1007
  },
1003
1008
  calculateScore: ({ run }) => {
1004
- const numberOfVerdicts = run.analyzeStepResult?.length || 0;
1009
+ const numberOfVerdicts = run.analyzeStepResult?.verdicts.length || 0;
1005
1010
  if (numberOfVerdicts === 0) {
1006
1011
  return 1;
1007
1012
  }
1008
1013
  let toxicityCount = 0;
1009
- for (const { verdict } of run.analyzeStepResult) {
1014
+ for (const { verdict } of run.analyzeStepResult.verdicts) {
1010
1015
  if (verdict.trim().toLowerCase() === "yes") {
1011
1016
  toxicityCount++;
1012
1017
  }
@@ -1019,7 +1024,7 @@ function createToxicityScorer({ model, options }) {
1019
1024
  createPrompt: ({ run }) => {
1020
1025
  const prompt = createToxicityReasonPrompt({
1021
1026
  score: run.score,
1022
- toxics: run.analyzeStepResult?.map((v) => v.reason) || []
1027
+ toxics: run.analyzeStepResult?.verdicts.map((v) => v.reason) || []
1023
1028
  });
1024
1029
  return prompt;
1025
1030
  }
@@ -234,7 +234,7 @@ function createAnswerRelevancyScorer({
234
234
  },
235
235
  analyze: {
236
236
  description: "Score the relevance of the statements to the input",
237
- outputSchema: z.array(z.object({ result: z.string(), reason: z.string() })),
237
+ outputSchema: z.object({ results: z.array(z.object({ result: z.string(), reason: z.string() })) }),
238
238
  createPrompt: ({ run }) => createScorePrompt(JSON.stringify(run.input), run.extractStepResult?.statements || [])
239
239
  },
240
240
  reason: {
@@ -244,18 +244,18 @@ function createAnswerRelevancyScorer({
244
244
  input: run.input.map((input) => input.content).join(", "),
245
245
  output: run.output.text,
246
246
  score: run.score,
247
- results: run.analyzeStepResult,
247
+ results: run.analyzeStepResult.results,
248
248
  scale: options.scale
249
249
  });
250
250
  }
251
251
  },
252
252
  calculateScore: ({ run }) => {
253
- if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
253
+ if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
254
254
  return 0;
255
255
  }
256
- const numberOfResults = run.analyzeStepResult.length;
256
+ const numberOfResults = run.analyzeStepResult.results.length;
257
257
  let relevancyCount = 0;
258
- for (const { result } of run.analyzeStepResult) {
258
+ for (const { result } of run.analyzeStepResult.results) {
259
259
  if (result.trim().toLowerCase() === "yes") {
260
260
  relevancyCount++;
261
261
  } else if (result.trim().toLowerCase() === "unsure") {
@@ -453,7 +453,7 @@ function createFaithfulnessScorer({
453
453
  },
454
454
  analyze: {
455
455
  description: "Score the relevance of the statements to the input",
456
- outputSchema: z.array(z.object({ verdict: z.string(), reason: z.string() })),
456
+ outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
457
457
  createPrompt: ({ run }) => {
458
458
  const prompt = createFaithfulnessAnalyzePrompt({
459
459
  claims: run.extractStepResult || [],
@@ -463,8 +463,8 @@ function createFaithfulnessScorer({
463
463
  }
464
464
  },
465
465
  calculateScore: ({ run }) => {
466
- const totalClaims = run.analyzeStepResult.length;
467
- const supportedClaims = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
466
+ const totalClaims = run.analyzeStepResult.verdicts.length;
467
+ const supportedClaims = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
468
468
  if (totalClaims === 0) {
469
469
  return 0;
470
470
  }
@@ -480,7 +480,7 @@ function createFaithfulnessScorer({
480
480
  context: options?.context || [],
481
481
  score: run.score,
482
482
  scale: options?.scale || 1,
483
- verdicts: run.analyzeStepResult || []
483
+ verdicts: run.analyzeStepResult?.verdicts || []
484
484
  });
485
485
  return prompt;
486
486
  }
@@ -615,7 +615,7 @@ function createBiasScorer({ model, options }) {
615
615
  },
616
616
  analyze: {
617
617
  description: "Score the relevance of the statements to the input",
618
- outputSchema: z.array(z.object({ result: z.string(), reason: z.string() })),
618
+ outputSchema: z.object({ results: z.array(z.object({ result: z.string(), reason: z.string() })) }),
619
619
  createPrompt: ({ run }) => {
620
620
  const prompt = createBiasAnalyzePrompt({
621
621
  output: run.output.text,
@@ -625,17 +625,20 @@ function createBiasScorer({ model, options }) {
625
625
  }
626
626
  },
627
627
  calculateScore: ({ run }) => {
628
- if (!run.analyzeStepResult || run.analyzeStepResult.length === 0) {
628
+ if (!run.analyzeStepResult || run.analyzeStepResult.results.length === 0) {
629
629
  return 0;
630
630
  }
631
- const biasedVerdicts = run.analyzeStepResult.filter((v) => v.result.toLowerCase() === "yes");
632
- const score = biasedVerdicts.length / run.analyzeStepResult.length;
631
+ const biasedVerdicts = run.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
632
+ const score = biasedVerdicts.length / run.analyzeStepResult.results.length;
633
633
  return roundToTwoDecimals2(score * (options?.scale || 1));
634
634
  },
635
635
  reason: {
636
636
  description: "Reason about the results",
637
637
  createPrompt: ({ run }) => {
638
- return createBiasReasonPrompt({ score: run.score, biases: run.analyzeStepResult?.map((v) => v.reason) || [] });
638
+ return createBiasReasonPrompt({
639
+ score: run.score,
640
+ biases: run.analyzeStepResult?.results.map((v) => v.reason) || []
641
+ });
639
642
  }
640
643
  }
641
644
  });
@@ -856,7 +859,9 @@ function createHallucinationScorer({
856
859
  },
857
860
  analyze: {
858
861
  description: "Score the relevance of the statements to the input",
859
- outputSchema: z.array(z.object({ statement: z.string(), verdict: z.string(), reason: z.string() })),
862
+ outputSchema: z.object({
863
+ verdicts: z.array(z.object({ statement: z.string(), verdict: z.string(), reason: z.string() }))
864
+ }),
860
865
  createPrompt: ({ run }) => {
861
866
  const prompt = createHallucinationAnalyzePrompt({
862
867
  claims: run.extractStepResult.claims,
@@ -866,8 +871,8 @@ function createHallucinationScorer({
866
871
  }
867
872
  },
868
873
  calculateScore: ({ run }) => {
869
- const totalStatements = run.analyzeStepResult.length;
870
- const contradictedStatements = run.analyzeStepResult.filter((v) => v.verdict === "yes").length;
874
+ const totalStatements = run.analyzeStepResult.verdicts.length;
875
+ const contradictedStatements = run.analyzeStepResult.verdicts.filter((v) => v.verdict === "yes").length;
871
876
  if (totalStatements === 0) {
872
877
  return 0;
873
878
  }
@@ -883,7 +888,7 @@ function createHallucinationScorer({
883
888
  context: run?.additionalContext?.context || [],
884
889
  score: run.score,
885
890
  scale: options?.scale || 1,
886
- verdicts: run.analyzeStepResult || []
891
+ verdicts: run.analyzeStepResult?.verdicts || []
887
892
  });
888
893
  return prompt;
889
894
  }
@@ -989,7 +994,7 @@ function createToxicityScorer({ model, options }) {
989
994
  },
990
995
  analyze: {
991
996
  description: "Score the relevance of the statements to the input",
992
- outputSchema: z.array(z.object({ verdict: z.string(), reason: z.string() })),
997
+ outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
993
998
  createPrompt: ({ run }) => {
994
999
  const prompt = createToxicityAnalyzePrompt({
995
1000
  input: run.input.map((input) => input.content).join(", "),
@@ -999,12 +1004,12 @@ function createToxicityScorer({ model, options }) {
999
1004
  }
1000
1005
  },
1001
1006
  calculateScore: ({ run }) => {
1002
- const numberOfVerdicts = run.analyzeStepResult?.length || 0;
1007
+ const numberOfVerdicts = run.analyzeStepResult?.verdicts.length || 0;
1003
1008
  if (numberOfVerdicts === 0) {
1004
1009
  return 1;
1005
1010
  }
1006
1011
  let toxicityCount = 0;
1007
- for (const { verdict } of run.analyzeStepResult) {
1012
+ for (const { verdict } of run.analyzeStepResult.verdicts) {
1008
1013
  if (verdict.trim().toLowerCase() === "yes") {
1009
1014
  toxicityCount++;
1010
1015
  }
@@ -1017,7 +1022,7 @@ function createToxicityScorer({ model, options }) {
1017
1022
  createPrompt: ({ run }) => {
1018
1023
  const prompt = createToxicityReasonPrompt({
1019
1024
  score: run.score,
1020
- toxics: run.analyzeStepResult?.map((v) => v.reason) || []
1025
+ toxics: run.analyzeStepResult?.verdicts.map((v) => v.reason) || []
1021
1026
  });
1022
1027
  return prompt;
1023
1028
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "0.10.7",
3
+ "version": "0.10.8-alpha.0",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "files": [
@@ -100,8 +100,8 @@
100
100
  "tsup": "^8.5.0",
101
101
  "typescript": "^5.8.3",
102
102
  "vitest": "^3.2.4",
103
- "@internal/lint": "0.0.22",
104
- "@mastra/core": "0.11.1"
103
+ "@internal/lint": "0.0.23",
104
+ "@mastra/core": "0.12.0-alpha.0"
105
105
  },
106
106
  "scripts": {
107
107
  "check": "tsc --noEmit",