@assay-ai/core 0.2.1-beta → 1.3.1-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -84
- package/dist/index.cjs +576 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +69 -1
- package/dist/index.d.ts +69 -1
- package/dist/index.js +568 -0
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,16 +1,23 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# @assay-ai/core
|
|
2
4
|
|
|
3
|
-
The
|
|
5
|
+
*The evaluation engine powering Assay -- 18 metrics, 5 providers, zero `any`*
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@assay-ai/core)
|
|
8
|
+
[](https://www.npmjs.com/package/@assay-ai/core)
|
|
9
|
+
[](https://github.com/assay-ai/assay/blob/main/LICENSE)
|
|
4
10
|
|
|
5
|
-
[
|
|
6
|
-
|
|
11
|
+
[Documentation](https://assay.js.org) · [Metrics](https://assay.js.org/metrics/) · [API Reference](https://assay.js.org/api/)
|
|
12
|
+
|
|
13
|
+
</div>
|
|
7
14
|
|
|
8
15
|
## Installation
|
|
9
16
|
|
|
10
17
|
```bash
|
|
11
|
-
|
|
12
|
-
#
|
|
13
|
-
|
|
18
|
+
pnpm add @assay-ai/core # pnpm
|
|
19
|
+
npm install @assay-ai/core # npm
|
|
20
|
+
yarn add @assay-ai/core # Yarn
|
|
14
21
|
```
|
|
15
22
|
|
|
16
23
|
## Quick Start
|
|
@@ -26,14 +33,17 @@ import {
|
|
|
26
33
|
const results = await evaluate(
|
|
27
34
|
[
|
|
28
35
|
{
|
|
29
|
-
input: "What is the
|
|
30
|
-
actualOutput: "
|
|
31
|
-
|
|
36
|
+
input: "What is the refund policy?",
|
|
37
|
+
actualOutput: "You can request a full refund within 30 days.",
|
|
38
|
+
retrievalContext: [
|
|
39
|
+
"Refund Policy: Full refund within 30 days of purchase.",
|
|
40
|
+
],
|
|
41
|
+
context: ["Our refund policy allows returns within 30 days."],
|
|
32
42
|
},
|
|
33
43
|
],
|
|
34
44
|
[
|
|
35
|
-
new FaithfulnessMetric({ threshold: 0.7 }),
|
|
36
45
|
new AnswerRelevancyMetric({ threshold: 0.7 }),
|
|
46
|
+
new FaithfulnessMetric({ threshold: 0.7 }),
|
|
37
47
|
new HallucinationMetric({ threshold: 0.3 }),
|
|
38
48
|
],
|
|
39
49
|
);
|
|
@@ -41,78 +51,11 @@ const results = await evaluate(
|
|
|
41
51
|
console.log(`Pass rate: ${results.summary.passRate.toFixed(1)}%`);
|
|
42
52
|
```
|
|
43
53
|
|
|
44
|
-
##
|
|
45
|
-
|
|
46
|
-
Assay ships with 12 evaluation metrics out of the box:
|
|
47
|
-
|
|
48
|
-
| Metric | Description | Required Fields |
|
|
49
|
-
|--------|-------------|-----------------|
|
|
50
|
-
| `AnswerRelevancyMetric` | Measures how relevant the output is to the input | `input`, `actualOutput` |
|
|
51
|
-
| `FaithfulnessMetric` | Measures whether the output is grounded in context | `input`, `actualOutput`, `retrievalContext` |
|
|
52
|
-
| `HallucinationMetric` | Detects claims not supported by context | `input`, `actualOutput`, `context` |
|
|
53
|
-
| `ContextualPrecisionMetric` | Measures whether relevant context items are ranked higher | `input`, `expectedOutput`, `retrievalContext` |
|
|
54
|
-
| `ContextualRecallMetric` | Measures whether all relevant information is retrieved | `input`, `expectedOutput`, `retrievalContext` |
|
|
55
|
-
| `ContextualRelevancyMetric` | Measures whether retrieved context is relevant | `input`, `actualOutput`, `retrievalContext` |
|
|
56
|
-
| `BiasMetric` | Detects demographic or ideological bias | `input`, `actualOutput` |
|
|
57
|
-
| `ToxicityMetric` | Detects toxic or harmful content | `input`, `actualOutput` |
|
|
58
|
-
| `GEval` | Custom LLM-as-judge with user-defined criteria | `input`, `actualOutput` |
|
|
59
|
-
| `SummarizationMetric` | Evaluates summary quality | `input`, `actualOutput` |
|
|
60
|
-
| `ExactMatchMetric` | Exact string comparison (no LLM needed) | `actualOutput`, `expectedOutput` |
|
|
61
|
-
| `JsonCorrectnessMetric` | Validates JSON structure (no LLM needed) | `actualOutput` |
|
|
62
|
-
|
|
63
|
-
## Configuration
|
|
64
|
-
|
|
65
|
-
### Provider
|
|
66
|
-
|
|
67
|
-
Assay auto-detects your LLM provider from environment variables:
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
# OpenAI (default)
|
|
71
|
-
export OPENAI_API_KEY="sk-..."
|
|
72
|
-
|
|
73
|
-
# Anthropic
|
|
74
|
-
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
### Metric Options
|
|
78
|
-
|
|
79
|
-
Every metric accepts optional configuration:
|
|
80
|
-
|
|
81
|
-
```typescript
|
|
82
|
-
new FaithfulnessMetric({
|
|
83
|
-
threshold: 0.7, // Minimum score to pass (default: 0.5)
|
|
84
|
-
model: "gpt-4o-mini", // LLM model for evaluation
|
|
85
|
-
verbose: true, // Log detailed reasoning
|
|
86
|
-
});
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
### Custom Metrics with GEval
|
|
90
|
-
|
|
91
|
-
Define any evaluation criteria in plain English:
|
|
92
|
-
|
|
93
|
-
```typescript
|
|
94
|
-
import { GEval } from "@assay-ai/core";
|
|
95
|
-
|
|
96
|
-
const politeness = new GEval({
|
|
97
|
-
name: "Politeness",
|
|
98
|
-
criteria: "The response should be polite and professional.",
|
|
99
|
-
evaluationSteps: [
|
|
100
|
-
"Check if the response uses polite phrases",
|
|
101
|
-
"Verify the tone is respectful",
|
|
102
|
-
],
|
|
103
|
-
});
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
## Exports
|
|
107
|
-
|
|
108
|
-
This package exports:
|
|
109
|
-
|
|
110
|
-
- **Metrics**: `AnswerRelevancyMetric`, `FaithfulnessMetric`, `HallucinationMetric`, `ContextualPrecisionMetric`, `ContextualRecallMetric`, `ContextualRelevancyMetric`, `BiasMetric`, `ToxicityMetric`, `GEval`, `SummarizationMetric`, `ExactMatchMetric`, `JsonCorrectnessMetric`
|
|
111
|
-
- **Evaluation**: `evaluate`, `assertEval`
|
|
112
|
-
- **Providers**: `BaseLLMProvider`, `OpenAIProvider`, `AnthropicProvider`, `OllamaProvider`, `resolveProvider`
|
|
113
|
-
- **Utilities**: `parseJson`, `tryParseJson`, `createLimiter`, `ConsoleReporter`
|
|
114
|
-
- **Types**: `LLMTestCase`, `MetricResult`, `MetricConfig`, `EvaluateConfig`, `EvaluateResult`, `EvaluationDataset`
|
|
115
|
-
|
|
116
|
-
## License
|
|
54
|
+
## Part of the [Assay](https://github.com/assay-ai/assay) monorepo
|
|
117
55
|
|
|
118
|
-
|
|
56
|
+
<p align="center">
|
|
57
|
+
<a href="https://assay.js.org"><img src="https://img.shields.io/badge/Documentation-6366f1?style=for-the-badge&logo=readthedocs&logoColor=white" alt="Documentation" /></a>
|
|
58
|
+
<a href="https://www.npmjs.com/package/@assay-ai/core"><img src="https://img.shields.io/badge/npm-cb3837?style=for-the-badge&logo=npm&logoColor=white" alt="npm" /></a>
|
|
59
|
+
<a href="https://github.com/assay-ai/assay"><img src="https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="GitHub" /></a>
|
|
60
|
+
<a href="https://github.com/assay-ai/assay/issues"><img src="https://img.shields.io/badge/Issues-6366f1?style=for-the-badge&logo=github&logoColor=white" alt="Issues" /></a>
|
|
61
|
+
</p>
|