@eva-llm/eva-judge 0.1.3 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -38
- package/dst/config.d.ts +1 -18
- package/dst/config.js.map +1 -1
- package/dst/index.d.ts +2 -4
- package/dst/index.js +1 -0
- package/dst/index.js.map +1 -1
- package/dst/types.d.ts +21 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,41 +1,35 @@
|
|
|
1
|
-
# Project Inspiration & Attribution
|
|
2
|
-
|
|
3
|
-
This project is inspired by [promptfoo](https://github.com/promptfoo/promptfoo), including author's work on the G-Eval framework there. The LLM-as-a-Judge prompts are copied from promptfoo and adapted for project-specific issues.
|
|
4
|
-
|
|
5
1
|
# eva-judge
|
|
6
2
|
|
|
7
|
-
A TypeScript/Node.js
|
|
3
|
+
A TypeScript/Node.js library for automated text evaluation with AI analysis through **LLM-Rubric**, **G-Eval**, or **B-Eval** (Binary G-Eval).
|
|
8
4
|
|
|
9
|
-
##
|
|
10
|
-
- Configuration management for evaluation workflows
|
|
11
|
-
- Prompt handling and manipulation
|
|
12
|
-
- Registry for test cases and evaluation items
|
|
13
|
-
- Designed for integration with Jest and other test runners
|
|
5
|
+
## Project Inspiration & Attribution
|
|
14
6
|
|
|
15
|
-
|
|
7
|
+
This project is inspired by [promptfoo](https://github.com/promptfoo/promptfoo), including [author's work](https://github.com/promptfoo/promptfoo/issues?q=state%3Aclosed%20is%3Apr%20author%3A%40schipiga) on the [G-Eval](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded/g-eval/) framework there.<br />
|
|
8
|
+
The LLM-as-a-Judge prompts are copied from promptfoo and adapted for project-specific issues.
|
|
16
9
|
|
|
17
|
-
|
|
10
|
+
## Quick Start
|
|
18
11
|
|
|
19
12
|
```bash
|
|
20
13
|
npm install @eva-llm/eva-judge
|
|
21
|
-
# or
|
|
22
|
-
pnpm add @eva-llm/eva-judge
|
|
23
14
|
```
|
|
24
15
|
|
|
25
|
-
|
|
16
|
+
```ts
|
|
17
|
+
import { llmRubric, gEval, bEval } from '@eva-llm/eva-judge';
|
|
26
18
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```
|
|
19
|
+
const prompt = 'Hello! How are you?';
|
|
20
|
+
const answer = 'Hi! I am fine. And you?';
|
|
30
21
|
|
|
31
|
-
|
|
22
|
+
await llmRubric(answer, 'answer is polite', 'openai', 'gpt-4.1-mini');
|
|
23
|
+
// { pass: true, score: 1, reason: "The answer is definitely polite and sympathetic" }
|
|
32
24
|
|
|
33
|
-
|
|
25
|
+
await gEval(prompt, answer, 'answer is relevant to question', 'openai', 'gpt-4.1-mini');
|
|
26
|
+
// { score: 0.9, reason: 'The answer is quite well relevant to the question' }
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
|
|
28
|
+
await bEval(prompt, answer, 'answer is coherent to question', 'openai', 'gpt-4.1-mini');
|
|
29
|
+
// { score: 1, reason: 'The answer is definitely coherent to the question' }
|
|
37
30
|
```
|
|
38
31
|
|
|
32
|
+
## API
|
|
39
33
|
### llmRubric
|
|
40
34
|
|
|
41
35
|
Evaluates an output against a rubric using an LLM. Returns a reason, pass/fail, and normalized score.
|
|
@@ -51,10 +45,9 @@ const result = await llmRubric(
|
|
|
51
45
|
// result: { reason: string, pass: boolean, score: number }
|
|
52
46
|
```
|
|
53
47
|
|
|
54
|
-
|
|
55
48
|
### gEval
|
|
56
49
|
|
|
57
|
-
Evaluates a reply against criteria and derived steps using an LLM. Returns a reason and normalized score (0.0
|
|
50
|
+
Evaluates a reply against criteria and derived steps using an LLM. Returns a reason and normalized score (0.0-1.0).
|
|
58
51
|
|
|
59
52
|
```typescript
|
|
60
53
|
const result = await gEval(
|
|
@@ -84,17 +77,8 @@ const result = await bEval(
|
|
|
84
77
|
// result: { reason: string, score: number } // score will be 0 or 1
|
|
85
78
|
```
|
|
86
79
|
|
|
87
|
-
## Development
|
|
88
|
-
- Source code is in `src/`
|
|
89
|
-
- Tests are in `tests/`
|
|
90
|
-
- Uses TypeScript and Jest for testing
|
|
91
|
-
|
|
92
|
-
## License
|
|
93
|
-
MIT
|
|
94
|
-
|
|
95
80
|
## Supported Providers
|
|
96
81
|
|
|
97
|
-
|
|
98
82
|
The following LLM providers are supported (via [Vercel ai-sdk](https://github.com/vercel/ai)):
|
|
99
83
|
|
|
100
84
|
- OpenAI (`openai`)
|
|
@@ -108,11 +92,12 @@ The following LLM providers are supported (via [Vercel ai-sdk](https://github.co
|
|
|
108
92
|
- Perplexity (`perplexity`)
|
|
109
93
|
- xAI (`xai`)
|
|
110
94
|
|
|
111
|
-
Specify the provider name and model name in `llmRubric` or `
|
|
95
|
+
Specify the provider name and model name in `llmRubric`, `gEval`, or `bEval`.
|
|
112
96
|
|
|
113
97
|
> **Note:** Each provider integration is based on its respective ai-sdk package. Be sure to follow the provider's documentation for setup and authentication. Most providers require you to export an API key or token as an environment variable (e.g., `export OPENAI_API_KEY=...`).
|
|
114
98
|
|
|
115
|
-
##
|
|
99
|
+
## Enterprise
|
|
100
|
+
### LLM Judge Hooks
|
|
116
101
|
|
|
117
102
|
You can provide hooks to receive notifications about evaluation events (success or error) for logging, monitoring, or custom handling. Hooks can also be used to integrate with observability tools such as OpenTelemetry for tracing and metrics. Set these in the config:
|
|
118
103
|
|
|
@@ -129,12 +114,33 @@ Config.hooks = {
|
|
|
129
114
|
};
|
|
130
115
|
```
|
|
131
116
|
|
|
117
|
+
### Configuring
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
import Config from '@eva-llm/eva-judge';
|
|
121
|
+
|
|
122
|
+
Config.restartModelCache(500); // cache 500 (default 100) models by provider:model with LRU Cache
|
|
123
|
+
Config.restartStepsCache(1000); // cache 1000 (default 500) Evaluations Steps by criteria with LRU Cache
|
|
124
|
+
Config.enableModelCache();
|
|
125
|
+
Config.disableModelCache();
|
|
126
|
+
Config.enableStepsCache();
|
|
127
|
+
Config.disableStepsCache();
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### G-Eval/B-Eval Evaluation Steps Persistent Storage
|
|
131
|
+
|
|
132
132
|
For advanced use, you can implement your own cache storage for evaluation steps (e.g., using Redis or another backend) by providing a custom cache via `setStepsCache()`:
|
|
133
133
|
|
|
134
134
|
```typescript
|
|
135
|
-
import Config from '@eva-llm/eva-judge';
|
|
135
|
+
import Config, { type IStepsCache } from '@eva-llm/eva-judge';
|
|
136
|
+
|
|
137
|
+
class RedisCache implements IStepsCache {
|
|
138
|
+
...
|
|
139
|
+
};
|
|
136
140
|
|
|
137
|
-
Config.setStepsCache(RedisCache);
|
|
141
|
+
Config.setStepsCache(RedisCache);
|
|
138
142
|
```
|
|
139
143
|
|
|
140
|
-
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT
|
package/dst/config.d.ts
CHANGED
|
@@ -1,23 +1,6 @@
|
|
|
1
1
|
import { LRUCache } from 'lru-cache';
|
|
2
2
|
import { type LanguageModel } from 'ai';
|
|
3
|
-
import { type
|
|
4
|
-
export interface IStepsCache {
|
|
5
|
-
set(key: string, value: string[]): Promise<void>;
|
|
6
|
-
get(key: string): Promise<string[] | undefined>;
|
|
7
|
-
}
|
|
8
|
-
export interface EvaHooks {
|
|
9
|
-
onSuccess?: (data: {
|
|
10
|
-
method: EvalMethod;
|
|
11
|
-
params: any;
|
|
12
|
-
result: any;
|
|
13
|
-
duration: number;
|
|
14
|
-
}) => void;
|
|
15
|
-
onError?: (data: {
|
|
16
|
-
method: EvalMethod;
|
|
17
|
-
error: any;
|
|
18
|
-
duration: number;
|
|
19
|
-
}) => void;
|
|
20
|
-
}
|
|
3
|
+
import { type EvaHooks, type IStepsCache } from './types';
|
|
21
4
|
declare const _default: {
|
|
22
5
|
gevalMaxScore: number;
|
|
23
6
|
isModelCached: boolean;
|
package/dst/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;AASrC,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAc;IAKrB,QAAQ,CAAC,KAAe;QACtB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
package/dst/index.d.ts
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import z from 'zod';
|
|
2
|
+
import { type EvalOptions } from './types';
|
|
2
3
|
export * from './config';
|
|
3
4
|
export { default } from './config';
|
|
4
|
-
export
|
|
5
|
-
temperature?: number;
|
|
6
|
-
providerOptions?: Record<string, any>;
|
|
7
|
-
}
|
|
5
|
+
export * from './types';
|
|
8
6
|
export declare const RubricResultSchema: z.ZodObject<{
|
|
9
7
|
reason: z.ZodString;
|
|
10
8
|
pass: z.ZodBoolean;
|
package/dst/index.js
CHANGED
|
@@ -49,6 +49,7 @@ const config_1 = __importDefault(require("./config"));
|
|
|
49
49
|
__exportStar(require("./config"), exports);
|
|
50
50
|
var config_2 = require("./config");
|
|
51
51
|
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(config_2).default; } });
|
|
52
|
+
__exportStar(require("./types"), exports);
|
|
52
53
|
exports.RubricResultSchema = zod_1.default.object({
|
|
53
54
|
reason: zod_1.default.string().describe('Detailed explanation of the score based on the rubric'),
|
|
54
55
|
pass: zod_1.default.boolean().describe('Whether the output satisfies the minimum requirements'),
|
package/dst/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA0C;AAC1C,mDAAqC;AACrC,8CAAoB;AAEpB,qCAKkB;AAClB,yCAA0D;AAC1D,sDAA4B;AAG5B,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA0C;AAC1C,mDAAqC;AACrC,8CAAoB;AAEpB,qCAKkB;AAClB,yCAA0D;AAC1D,sDAA4B;AAG5B,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;AAChB,0CAAwB;AAMX,QAAA,kBAAkB,GAAG,aAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,aAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUU,QAAA,sBAAsB,GAAG,aAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,aAAC,CAAC,KAAK,CAAC,aAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAWU,QAAA,yBAAyB,GAAG,aAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC;AAgBI,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACF,EAAE;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,iCAAwB;YAChC,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAvCY,QAAA,SAAS,aAuCrB;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAsB,EACtB,UAAuB,EAAE,EACK,EAAE;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;gBACF,GAAG,OAAO;aACX,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,8BAAqB,EAAE;YAC9D,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,MAAM;YACb,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEH,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,KAAK;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACtE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAaM,MAAM,KAAK,GAAG,KAAK,EACxB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACK,EAAE,CAAC,MAAM,CACvC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,gBAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAhBW,QAAA,KAAK,SAgBhB;AAaK,MAAM,KAAK,GAAG,KAAK,EACxB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACK,EAAE,CAAC,MAAM,CACvC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC;AAhBW,QAAA,KAAK,SAgBhB"}
|
package/dst/types.d.ts
CHANGED
|
@@ -1 +1,22 @@
|
|
|
1
1
|
export type EvalMethod = 'bEval' | 'gEval' | 'llmRubric';
|
|
2
|
+
export interface IStepsCache {
|
|
3
|
+
set(key: string, value: string[]): Promise<void>;
|
|
4
|
+
get(key: string): Promise<string[] | undefined>;
|
|
5
|
+
}
|
|
6
|
+
export interface EvalOptions {
|
|
7
|
+
temperature?: number;
|
|
8
|
+
providerOptions?: Record<string, any>;
|
|
9
|
+
}
|
|
10
|
+
export interface EvaHooks {
|
|
11
|
+
onSuccess?: (data: {
|
|
12
|
+
method: EvalMethod;
|
|
13
|
+
params: any;
|
|
14
|
+
result: any;
|
|
15
|
+
duration: number;
|
|
16
|
+
}) => void;
|
|
17
|
+
onError?: (data: {
|
|
18
|
+
method: EvalMethod;
|
|
19
|
+
error: any;
|
|
20
|
+
duration: number;
|
|
21
|
+
}) => void;
|
|
22
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eva-llm/eva-judge",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "LLM-as-a-Judge abstraction layer using ai-sdk and plugins",
|
|
5
5
|
"main": "dst/index.js",
|
|
6
6
|
"types": "dst/index.d.ts",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"bugs": {
|
|
20
20
|
"url": "https://github.com/eva-llm/eva-judge/issues"
|
|
21
21
|
},
|
|
22
|
-
"homepage": "https://
|
|
22
|
+
"homepage": "https://eva-llm.github.io/eva-judge",
|
|
23
23
|
"devDependencies": {
|
|
24
24
|
"@types/jest": "^30.0.0",
|
|
25
25
|
"@types/mustache": "^4.2.6",
|