@eva-llm/eva-judge 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/dst/config.d.ts +1 -14
- package/dst/config.js.map +1 -1
- package/dst/types.d.ts +13 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -14,7 +14,7 @@ npm install @eva-llm/eva-judge
|
|
|
14
14
|
```
|
|
15
15
|
|
|
16
16
|
```ts
|
|
17
|
-
import { llmRubric, gEval, bEval } from '@eva-llm/eva-judge'
|
|
17
|
+
import { llmRubric, gEval, bEval } from '@eva-llm/eva-judge';
|
|
18
18
|
|
|
19
19
|
const prompt = 'Hello! How are you?';
|
|
20
20
|
const answer = 'Hi! I am fine. And you?';
|
|
@@ -45,7 +45,6 @@ const result = await llmRubric(
|
|
|
45
45
|
// result: { reason: string, pass: boolean, score: number }
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
|
|
49
48
|
### gEval
|
|
50
49
|
|
|
51
50
|
Evaluates a reply against criteria and derived steps using an LLM. Returns a reason and normalized score (0.0-1.0).
|
|
@@ -98,7 +97,7 @@ Specify the provider name and model name in `llmRubric`, `gEval`, or `bEval`.
|
|
|
98
97
|
> **Note:** Each provider integration is based on its respective ai-sdk package. Be sure to follow the provider's documentation for setup and authentication. Most providers require you to export an API key or token as an environment variable (e.g., `export OPENAI_API_KEY=...`).
|
|
99
98
|
|
|
100
99
|
## Enterprise
|
|
101
|
-
### Hooks
|
|
100
|
+
### LLM Judge Hooks
|
|
102
101
|
|
|
103
102
|
You can provide hooks to receive notifications about evaluation events (success or error) for logging, monitoring, or custom handling. Hooks can also be used to integrate with observability tools such as OpenTelemetry for tracing and metrics. Set these in the config:
|
|
104
103
|
|
|
@@ -115,6 +114,19 @@ Config.hooks = {
|
|
|
115
114
|
};
|
|
116
115
|
```
|
|
117
116
|
|
|
117
|
+
### Configuring
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
import Config from '@eva-llm/eva-judge';
|
|
121
|
+
|
|
122
|
+
Config.restartModelCache(500); // cache 500 (default 100) models by provider:model with LRU Cache
|
|
123
|
+
Config.restartStepsCache(1000); // cache 1000 (default 500) Evaluations Steps by criteria with LRU Cache
|
|
124
|
+
Config.enableModelCache();
|
|
125
|
+
Config.disableModelCache();
|
|
126
|
+
Config.enableStepsCache();
|
|
127
|
+
Config.disableStepsCache();
|
|
128
|
+
```
|
|
129
|
+
|
|
118
130
|
### G-Eval/B-Eval Evaluation Steps Persistent Storage
|
|
119
131
|
|
|
120
132
|
For advanced use, you can implement your own cache storage for evaluation steps (e.g., using Redis or another backend) by providing a custom cache via `setStepsCache()`:
|
package/dst/config.d.ts
CHANGED
|
@@ -1,19 +1,6 @@
|
|
|
1
1
|
import { LRUCache } from 'lru-cache';
|
|
2
2
|
import { type LanguageModel } from 'ai';
|
|
3
|
-
import { type
|
|
4
|
-
export interface EvaHooks {
|
|
5
|
-
onSuccess?: (data: {
|
|
6
|
-
method: EvalMethod;
|
|
7
|
-
params: any;
|
|
8
|
-
result: any;
|
|
9
|
-
duration: number;
|
|
10
|
-
}) => void;
|
|
11
|
-
onError?: (data: {
|
|
12
|
-
method: EvalMethod;
|
|
13
|
-
error: any;
|
|
14
|
-
duration: number;
|
|
15
|
-
}) => void;
|
|
16
|
-
}
|
|
3
|
+
import { type EvaHooks, type IStepsCache } from './types';
|
|
17
4
|
declare const _default: {
|
|
18
5
|
gevalMaxScore: number;
|
|
19
6
|
isModelCached: boolean;
|
package/dst/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;AASrC,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;AASrC,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAc;IAKrB,QAAQ,CAAC,KAAe;QACtB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
package/dst/types.d.ts
CHANGED
|
@@ -7,3 +7,16 @@ export interface EvalOptions {
|
|
|
7
7
|
temperature?: number;
|
|
8
8
|
providerOptions?: Record<string, any>;
|
|
9
9
|
}
|
|
10
|
+
export interface EvaHooks {
|
|
11
|
+
onSuccess?: (data: {
|
|
12
|
+
method: EvalMethod;
|
|
13
|
+
params: any;
|
|
14
|
+
result: any;
|
|
15
|
+
duration: number;
|
|
16
|
+
}) => void;
|
|
17
|
+
onError?: (data: {
|
|
18
|
+
method: EvalMethod;
|
|
19
|
+
error: any;
|
|
20
|
+
duration: number;
|
|
21
|
+
}) => void;
|
|
22
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eva-llm/eva-judge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "LLM-as-a-Judge abstraction layer using ai-sdk and plugins",
|
|
5
5
|
"main": "dst/index.js",
|
|
6
6
|
"types": "dst/index.d.ts",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"bugs": {
|
|
20
20
|
"url": "https://github.com/eva-llm/eva-judge/issues"
|
|
21
21
|
},
|
|
22
|
-
"homepage": "https://
|
|
22
|
+
"homepage": "https://eva-llm.github.io/eva-judge",
|
|
23
23
|
"devDependencies": {
|
|
24
24
|
"@types/jest": "^30.0.0",
|
|
25
25
|
"@types/mustache": "^4.2.6",
|