npm - @mastra/evals - Versions diffs - 0.1.0-alpha.23 → 0.1.0-alpha.25 - Mend

@mastra/evals 0.1.0-alpha.23 → 0.1.0-alpha.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +19 -0
package/dist/index.js +2 -1
package/dist/metrics/nlp/index.d.ts +1 -1
package/dist/metrics/nlp/index.js +1 -1
package/package.json +2 -4
package/src/attachListeners.ts +1 -1
package/src/evaluation.test.ts +3 -3
package/src/metrics/judge/index.ts +1 -1
package/src/metrics/llm/answer-relevancy/metricJudge.ts +1 -1
package/src/metrics/llm/bias/index.test.ts +10 -7
package/src/metrics/llm/bias/metricJudge.ts +1 -1
package/src/metrics/llm/context-position/metricJudge.ts +1 -1
package/src/metrics/llm/context-precision/metricJudge.ts +1 -1
package/src/metrics/llm/context-relevancy/metricJudge.ts +1 -1
package/src/metrics/llm/contextual-recall/metricJudge.ts +1 -1
package/src/metrics/llm/faithfulness/metricJudge.ts +1 -1
package/src/metrics/llm/hallucination/metricJudge.ts +1 -1
package/src/metrics/llm/prompt-alignment/metricJudge.ts +1 -1
package/src/metrics/llm/summarization/index.test.ts +84 -86
package/src/metrics/llm/summarization/metricJudge.ts +1 -1
package/src/metrics/llm/toxicity/metricJudge.ts +1 -1
package/src/metrics/nlp/completeness/index.ts +1 -1
package/src/metrics/nlp/content-similarity/index.ts +1 -1
package/src/metrics/nlp/keyword-coverage/index.ts +1 -1
package/src/metrics/nlp/textual-difference/index.ts +1 -1
package/src/metrics/nlp/tone/index.ts +1 -1
package/vitest.config.ts +2 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # @mastra/evals
+## 0.1.0-alpha.25
+### Patch Changes
+- 9625602: Use mastra core splitted bundles in other packages
+- 8769a62: Split core into seperate entry fils
+- Updated dependencies [30322ce]
+- Updated dependencies [78eec7c]
+- Updated dependencies [9625602]
+- Updated dependencies [8769a62]
+  - @mastra/core@0.2.0-alpha.83
+## 0.1.0-alpha.24
+### Patch Changes
+- Updated dependencies [73d112c]
+  - @mastra/core@0.1.27-alpha.82
 ## 0.1.0-alpha.23
 ### Patch Changes

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import './chunk-4VNS5WPM.js';
-import { evaluate as evaluate$1, registerHook, AvailableHooks } from '@mastra/core';
+import { evaluate as evaluate$1 } from '@mastra/core';
+import { registerHook, AvailableHooks } from '@mastra/core/hooks';
 import { mkdirSync, appendFile } from 'fs';
 import { join } from 'path';

package/dist/metrics/nlp/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, MetricResult } from '@mastra/core';
+import { Metric, MetricResult } from '@mastra/core/eval';
 interface CompletenessMetricResult extends MetricResult {
     info: {

package/dist/metrics/nlp/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import '../../chunk-4VNS5WPM.js';
-import { Metric } from '@mastra/core';
+import { Metric } from '@mastra/core/eval';
 import nlp from 'compromise';
 import stringSimilarity from 'string-similarity';
 import { SequenceMatcher } from 'difflib';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/evals",
-  "version": "0.1.0-alpha.23",
+  "version": "0.1.0-alpha.25",
   "description": "",
   "type": "module",
   "main": "dist/index.js",
@@ -38,11 +38,9 @@
     "sentiment": "^5.0.2",
     "string-similarity": "^4.0.4",
     "zod": "^3.24.1",
-    "@mastra/core": "0.1.27-alpha.81"
+    "@mastra/core": "0.2.0-alpha.83"
   },
   "devDependencies": {
-    "@babel/preset-env": "^7.26.0",
-    "@babel/preset-typescript": "^7.26.0",
     "@tsconfig/recommended": "^1.0.7",
     "@types/difflib": "^0.2.7",
     "@types/fs-extra": "^11.0.4",

package/src/attachListeners.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { AvailableHooks, registerHook } from '@mastra/core';
+import { AvailableHooks, registerHook } from '@mastra/core/hooks';
 import { mkdirSync, appendFile } from 'fs';
 import { join } from 'path';

package/src/evaluation.test.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import { Agent } from '@mastra/core';
-import { ModelConfig } from '@mastra/core';
-import { Metric } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
+import { Agent } from '@mastra/core/agent';
+import { Metric } from '@mastra/core/eval';
 import { describe, expect, it } from 'vitest';
 import { evaluate } from './evaluation';

package/src/metrics/judge/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Agent, ModelConfig } from '@mastra/core';
+import { Agent, type ModelConfig } from '@mastra/core';
 export abstract class MastraAgentJudge {
   protected readonly agent: Agent;

package/src/metrics/llm/answer-relevancy/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/bias/index.test.ts CHANGED Viewed

@@ -1,7 +1,6 @@
 import { type ModelConfig } from '@mastra/core';
-import { describe, it, expect } from 'vitest';
+import { describe, it, expect, vi } from 'vitest';
-import { isCloserTo } from '../utils';
 import { TestCase } from '../utils';
 import { BiasMetric } from './index';
@@ -41,7 +40,11 @@ const testCases: TestCase[] = [
   },
 ];
-const SECONDS = 10000;
+const SECONDS = 1000;
+vi.setConfig({
+  testTimeout: 20 * SECONDS,
+});
 const modelConfig: ModelConfig = {
   provider: 'OPEN_AI',
@@ -56,20 +59,20 @@ describe('BiasMetric', () => {
   it('should be able to measure a prompt that is biased', async () => {
     const result = await metric.measure(testCases[0].input, testCases[0].output);
     expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
-  }, 10000);
+  });
   it('should be able to measure a prompt that is almost not biased', async () => {
     const result = await metric.measure(testCases[1].input, testCases[1].output);
     expect(result.score).toBeLessThan(0.5);
-  }, 10000);
+  });
   it('should be able to measure a prompt that is mildly biased but actually not', async () => {
     const result = await metric.measure(testCases[2].input, testCases[2].output);
     expect(result.score).toBe(0);
-  }, 10000);
+  });
   it('should be able to measure a prompt that is mildly biased', async () => {
     const result = await metric.measure(testCases[3].input, testCases[3].output);
     expect(result.score).toBeLessThan(0.8);
-  }, 10000);
+  });
 });

package/src/metrics/llm/bias/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/context-position/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/context-precision/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/context-relevancy/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/contextual-recall/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/faithfulness/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/hallucination/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/prompt-alignment/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/summarization/index.test.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { type ModelConfig } from '@mastra/core';
-import { describe, it, expect } from 'vitest';
+import { describe, it, expect, vi } from 'vitest';
 import { TestCase } from '../utils';
@@ -165,7 +165,7 @@ const testCases: TestCase[] = [
   },
 ];
-const SECONDS = 10000;
+const SECONDS = 1000;
 const modelConfig: ModelConfig = {
   provider: 'OPEN_AI',
@@ -174,102 +174,100 @@ const modelConfig: ModelConfig = {
   apiKey: process.env.OPENAI_API_KEY,
 };
-describe(
-  'SummarizationMetric',
-  () => {
-    const metric = new SummarizationMetric(modelConfig);
+vi.setConfig({
+  testTimeout: 20 * SECONDS,
+});
-    it('should handle perfect summarization', async () => {
-      const testCase = testCases[0]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+describe('SummarizationMetric', () => {
+  const metric = new SummarizationMetric(modelConfig);
-    it('should handle mixed accuracy with contradictions', async () => {
-      const testCase = testCases[1]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle perfect summarization', async () => {
+    const testCase = testCases[0]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle missing key information', async () => {
-      const testCase = testCases[2]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle mixed accuracy with contradictions', async () => {
+    const testCase = testCases[1]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle empty output', async () => {
-      const testCase = testCases[3]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBe(testCase.expectedResult.score);
-    });
+  it('should handle missing key information', async () => {
+    const testCase = testCases[2]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle speculative additions', async () => {
-      const testCase = testCases[4]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle empty output', async () => {
+    const testCase = testCases[3]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBe(testCase.expectedResult.score);
+  });
-    it('should handle incorrect emphasis', async () => {
-      const testCase = testCases[5]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle speculative additions', async () => {
+    const testCase = testCases[4]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle technical accuracy with missing context', async () => {
-      const testCase = testCases[6]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle incorrect emphasis', async () => {
+    const testCase = testCases[5]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle numerical approximation', async () => {
-      const testCase = testCases[7]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle technical accuracy with missing context', async () => {
+    const testCase = testCases[6]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle mixed tenses', async () => {
-      const testCase = testCases[8]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle numerical approximation', async () => {
+    const testCase = testCases[7]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle subjective interpretation', async () => {
-      const testCase = testCases[9]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle mixed tenses', async () => {
+    const testCase = testCases[8]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle high alignment with low coverage', async () => {
-      const testCase = testCases[10]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle subjective interpretation', async () => {
+    const testCase = testCases[9]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle low alignment with high coverage', async () => {
-      const testCase = testCases[11]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle high alignment with low coverage', async () => {
+    const testCase = testCases[10]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle single word summary', async () => {
-      const testCase = testCases[12]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle low alignment with high coverage', async () => {
+    const testCase = testCases[11]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle repetitive summary', async () => {
-      const testCase = testCases[13]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
+  it('should handle single word summary', async () => {
+    const testCase = testCases[12]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
-    it('should handle overly verbose summary', async () => {
-      const testCase = testCases[14]!;
-      const result = await metric.measure(testCase.input, testCase.output);
-      expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
-    });
-  },
-  {
-    timeout: 15 * SECONDS,
-  },
-);
+  it('should handle repetitive summary', async () => {
+    const testCase = testCases[13]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
+  it('should handle overly verbose summary', async () => {
+    const testCase = testCases[14]!;
+    const result = await metric.measure(testCase.input, testCase.output);
+    expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
+  });
+});

package/src/metrics/llm/summarization/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/llm/toxicity/metricJudge.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelConfig } from '@mastra/core';
+import { type ModelConfig } from '@mastra/core';
 import { z } from 'zod';
 import { MastraAgentJudge } from '../../judge';

package/src/metrics/nlp/completeness/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, type MetricResult } from '@mastra/core';
+import { Metric, type MetricResult } from '@mastra/core/eval';
 import nlp from 'compromise';
 interface CompletenessMetricResult extends MetricResult {

package/src/metrics/nlp/content-similarity/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, type MetricResult } from '@mastra/core';
+import { Metric, type MetricResult } from '@mastra/core/eval';
 import stringSimilarity from 'string-similarity';
 interface ContentSimilarityResult extends MetricResult {

package/src/metrics/nlp/keyword-coverage/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, type MetricResult } from '@mastra/core';
+import { Metric, type MetricResult } from '@mastra/core/eval';
 import keyword_extractor from 'keyword-extractor';
 interface KeywordCoverageResult extends MetricResult {

package/src/metrics/nlp/textual-difference/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, type MetricResult } from '@mastra/core';
+import { Metric, type MetricResult } from '@mastra/core/eval';
 import { SequenceMatcher } from 'difflib';
 interface TextualDifferenceResult extends MetricResult {

package/src/metrics/nlp/tone/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Metric, type MetricResult } from '@mastra/core';
+import { Metric, type MetricResult } from '@mastra/core/eval';
 import Sentiment from 'sentiment';
 interface ToneConsitencyResult extends MetricResult {

package/vitest.config.ts CHANGED Viewed

@@ -5,5 +5,7 @@ export default defineConfig({
     environment: 'node',
     include: ['src/**/*.test.ts'],
     exclude: ['**/node_modules/**', '**/dist/**'],
+    maxConcurrency: 1,
+    fileParallelism: false,
   },
 });