@eva-llm/eva-judge 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dst/config.d.ts +1 -1
- package/dst/config.js +5 -7
- package/dst/config.js.map +1 -1
- package/dst/index.d.ts +4 -4
- package/dst/index.js +36 -60
- package/dst/index.js.map +1 -1
- package/dst/prompt.js +6 -9
- package/dst/prompt.js.map +1 -1
- package/dst/registry.js +31 -73
- package/dst/registry.js.map +1 -1
- package/dst/types.js +10 -13
- package/dst/types.js.map +1 -1
- package/package.json +2 -1
- package/dst/src/config.d.ts +0 -20
- package/dst/src/config.js +0 -48
- package/dst/src/config.js.map +0 -1
- package/dst/src/index.d.ts +0 -7
- package/dst/src/index.js +0 -157
- package/dst/src/index.js.map +0 -1
- package/dst/src/prompt.d.ts +0 -10
- package/dst/src/prompt.js +0 -124
- package/dst/src/prompt.js.map +0 -1
- package/dst/src/registry.d.ts +0 -4
- package/dst/src/registry.js +0 -94
- package/dst/src/registry.js.map +0 -1
- package/dst/src/types.d.ts +0 -39
- package/dst/src/types.js +0 -17
- package/dst/src/types.js.map +0 -1
package/dst/config.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { LRUCache } from 'lru-cache';
|
|
2
2
|
import { type LanguageModel } from 'ai';
|
|
3
|
-
import { type IJudgeHooks, type IStepsCache } from './types';
|
|
3
|
+
import { type IJudgeHooks, type IStepsCache } from './types.js';
|
|
4
4
|
declare const _default: {
|
|
5
5
|
gevalMaxScore: number;
|
|
6
6
|
isModelCached: boolean;
|
package/dst/config.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const lru_cache_1 = require("lru-cache");
|
|
1
|
+
import { LRUCache, } from 'lru-cache';
|
|
4
2
|
class StepsMemoryAdapter {
|
|
5
3
|
cache;
|
|
6
4
|
constructor(size) {
|
|
7
|
-
this.cache = new
|
|
5
|
+
this.cache = new LRUCache({ max: size });
|
|
8
6
|
}
|
|
9
7
|
async set(key, value) {
|
|
10
8
|
this.cache.set(key, value);
|
|
@@ -13,14 +11,14 @@ class StepsMemoryAdapter {
|
|
|
13
11
|
return this.cache.get(key);
|
|
14
12
|
}
|
|
15
13
|
}
|
|
16
|
-
|
|
14
|
+
export default {
|
|
17
15
|
gevalMaxScore: 10,
|
|
18
16
|
isModelCached: true,
|
|
19
17
|
isStepsCached: true,
|
|
20
|
-
modelCache: new
|
|
18
|
+
modelCache: new LRUCache({ max: 100 }),
|
|
21
19
|
stepsCache: new StepsMemoryAdapter(500),
|
|
22
20
|
restartModelCache(size = 100) {
|
|
23
|
-
this.modelCache = new
|
|
21
|
+
this.modelCache = new LRUCache({ max: size });
|
|
24
22
|
},
|
|
25
23
|
restartStepsCache(size = 500) {
|
|
26
24
|
this.stepsCache = new StepsMemoryAdapter(size);
|
package/dst/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GACT,MAAM,WAAW,CAAC;AAcnB,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,QAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,eAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,QAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,QAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAiB;IAKxB,QAAQ,CAAC,KAAkB;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
package/dst/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { type TVercelOptions, type TGevalInput, type TRubricResult, type TGevalEvaluateResult } from './types';
|
|
2
|
-
export * from './config';
|
|
3
|
-
export { default } from './config';
|
|
4
|
-
export * from './types';
|
|
1
|
+
import { type TVercelOptions, type TGevalInput, type TRubricResult, type TGevalEvaluateResult } from './types.js';
|
|
2
|
+
export * from './config.js';
|
|
3
|
+
export { default } from './config.js';
|
|
4
|
+
export * from './types.js';
|
|
5
5
|
export declare const llmRubric: (output: string, rubric: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TRubricResult>;
|
|
6
6
|
export declare const gEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
|
7
7
|
export declare const bEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
package/dst/index.js
CHANGED
|
@@ -1,51 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
})
|
|
13
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
-
};
|
|
16
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
17
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
|
-
};
|
|
19
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
-
exports.bEval = exports.gEval = exports.llmRubric = exports.default = void 0;
|
|
21
|
-
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
22
|
-
const mustache_1 = __importDefault(require("mustache"));
|
|
23
|
-
const ai_1 = require("ai");
|
|
24
|
-
const config_1 = __importDefault(require("./config"));
|
|
25
|
-
const prompt_1 = require("./prompt");
|
|
26
|
-
const registry_1 = require("./registry");
|
|
27
|
-
const types_1 = require("./types");
|
|
28
|
-
__exportStar(require("./config"), exports);
|
|
29
|
-
var config_2 = require("./config");
|
|
30
|
-
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(config_2).default; } });
|
|
31
|
-
__exportStar(require("./types"), exports);
|
|
32
|
-
const getHashId = () => node_crypto_1.default.randomBytes(16).toString('hex');
|
|
33
|
-
const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
|
|
1
|
+
import crypto from 'node:crypto';
|
|
2
|
+
import Mustache from 'mustache';
|
|
3
|
+
import { generateText, Output, } from 'ai';
|
|
4
|
+
import CONF from './config.js';
|
|
5
|
+
import { GEVAL_EVALUATE_PROMPT, GEVAL_EVALUATE_REPLY_PROMPT, GEVAL_STEPS_PROMPT, GEVAL_SYSTEM_PROMPT, LLM_RUBRIC_SYSTEM_PROMPT, LLM_RUBRIC_USER_PROMPT, } from './prompt.js';
|
|
6
|
+
import { getModel, getSteps, setSteps, } from './registry.js';
|
|
7
|
+
import { RubricResultSchema, GevalStepsResultSchema, GevalEvaluateResultSchema, } from './types.js';
|
|
8
|
+
export * from './config.js';
|
|
9
|
+
export { default } from './config.js';
|
|
10
|
+
export * from './types.js';
|
|
11
|
+
const getHashId = () => crypto.randomBytes(16).toString('hex');
|
|
12
|
+
export const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
|
|
34
13
|
const start = Date.now();
|
|
35
14
|
try {
|
|
36
|
-
const userPrompt =
|
|
37
|
-
const { output: result } = await
|
|
15
|
+
const userPrompt = Mustache.render(LLM_RUBRIC_USER_PROMPT, { output, rubric });
|
|
16
|
+
const { output: result } = await generateText({
|
|
38
17
|
...options,
|
|
39
18
|
messages: undefined,
|
|
40
19
|
tools: undefined,
|
|
41
|
-
model:
|
|
42
|
-
system:
|
|
20
|
+
model: getModel(providerName, modelName),
|
|
21
|
+
system: Mustache.render(LLM_RUBRIC_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
43
22
|
prompt: userPrompt,
|
|
44
|
-
output:
|
|
45
|
-
schema:
|
|
23
|
+
output: Output.object({
|
|
24
|
+
schema: RubricResultSchema,
|
|
46
25
|
}),
|
|
47
26
|
});
|
|
48
|
-
|
|
27
|
+
CONF.hooks.onSuccess?.({
|
|
49
28
|
method: 'llmRubric',
|
|
50
29
|
params: { output, rubric, providerName, modelName, options },
|
|
51
30
|
result,
|
|
@@ -54,7 +33,7 @@ const llmRubric = async (output, rubric, providerName, modelName, options = {})
|
|
|
54
33
|
return result;
|
|
55
34
|
}
|
|
56
35
|
catch (error) {
|
|
57
|
-
|
|
36
|
+
CONF.hooks.onError?.({
|
|
58
37
|
method: 'llmRubric',
|
|
59
38
|
error,
|
|
60
39
|
duration: Date.now() - start,
|
|
@@ -62,7 +41,6 @@ const llmRubric = async (output, rubric, providerName, modelName, options = {})
|
|
|
62
41
|
throw error;
|
|
63
42
|
}
|
|
64
43
|
};
|
|
65
|
-
exports.llmRubric = llmRubric;
|
|
66
44
|
const _gEval = async (input, criteria, providerName, modelName, maxScore, methodName, options = {}) => {
|
|
67
45
|
if (typeof input === 'string') {
|
|
68
46
|
input = { query: '', answer: input };
|
|
@@ -70,40 +48,40 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
70
48
|
const { query, answer } = input;
|
|
71
49
|
const start = Date.now();
|
|
72
50
|
try {
|
|
73
|
-
const model =
|
|
74
|
-
let steps = await
|
|
51
|
+
const model = getModel(providerName, modelName);
|
|
52
|
+
let steps = await getSteps(criteria);
|
|
75
53
|
if (!steps) {
|
|
76
|
-
const stepsPrompt =
|
|
77
|
-
const { output: stepsResult } = await
|
|
54
|
+
const stepsPrompt = Mustache.render(GEVAL_STEPS_PROMPT, { criteria });
|
|
55
|
+
const { output: stepsResult } = await generateText({
|
|
78
56
|
...options,
|
|
79
57
|
system: undefined,
|
|
80
58
|
messages: undefined,
|
|
81
59
|
tools: undefined,
|
|
82
60
|
model,
|
|
83
61
|
prompt: stepsPrompt,
|
|
84
|
-
output:
|
|
85
|
-
schema:
|
|
62
|
+
output: Output.object({
|
|
63
|
+
schema: GevalStepsResultSchema,
|
|
86
64
|
}),
|
|
87
65
|
});
|
|
88
66
|
steps = stepsResult.steps;
|
|
89
|
-
|
|
67
|
+
setSteps(criteria, stepsResult.steps);
|
|
90
68
|
}
|
|
91
|
-
const evaluationPrompt =
|
|
69
|
+
const evaluationPrompt = Mustache.render(query ? GEVAL_EVALUATE_PROMPT : GEVAL_EVALUATE_REPLY_PROMPT, {
|
|
92
70
|
criteria,
|
|
93
71
|
steps: steps.join('\n- '),
|
|
94
72
|
input: query,
|
|
95
73
|
output: answer,
|
|
96
74
|
maxScore,
|
|
97
75
|
});
|
|
98
|
-
const { output: evalResult } = await
|
|
76
|
+
const { output: evalResult } = await generateText({
|
|
99
77
|
...options,
|
|
100
78
|
messages: undefined,
|
|
101
79
|
tools: undefined,
|
|
102
80
|
model,
|
|
103
|
-
system:
|
|
81
|
+
system: Mustache.render(GEVAL_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
104
82
|
prompt: evaluationPrompt,
|
|
105
|
-
output:
|
|
106
|
-
schema:
|
|
83
|
+
output: Output.object({
|
|
84
|
+
schema: GevalEvaluateResultSchema,
|
|
107
85
|
}),
|
|
108
86
|
...options,
|
|
109
87
|
});
|
|
@@ -111,7 +89,7 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
111
89
|
reason: evalResult.reason,
|
|
112
90
|
score: evalResult.score / maxScore,
|
|
113
91
|
};
|
|
114
|
-
|
|
92
|
+
CONF.hooks.onSuccess?.({
|
|
115
93
|
method: methodName,
|
|
116
94
|
params: { query, answer, criteria, providerName, modelName, options },
|
|
117
95
|
result,
|
|
@@ -120,7 +98,7 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
120
98
|
return result;
|
|
121
99
|
}
|
|
122
100
|
catch (error) {
|
|
123
|
-
|
|
101
|
+
CONF.hooks.onError?.({
|
|
124
102
|
method: methodName,
|
|
125
103
|
error,
|
|
126
104
|
duration: Date.now() - start,
|
|
@@ -128,8 +106,6 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
128
106
|
throw error;
|
|
129
107
|
}
|
|
130
108
|
};
|
|
131
|
-
const gEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName,
|
|
132
|
-
|
|
133
|
-
const bEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, 1, 'bEval', options);
|
|
134
|
-
exports.bEval = bEval;
|
|
109
|
+
export const gEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, CONF.gevalMaxScore, 'gEval', options);
|
|
110
|
+
export const bEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, 1, 'bEval', options);
|
|
135
111
|
//# sourceMappingURL=index.js.map
|
package/dst/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,EACL,YAAY,EACZ,MAAM,GACP,MAAM,IAAI,CAAC;AAEZ,OAAO,IAAI,MAAM,aAAa,CAAC;AAC/B,OAAO,EACL,qBAAqB,EACrB,2BAA2B,EAC3B,kBAAkB,EAClB,mBAAmB,EACnB,wBAAwB,EACxB,sBAAsB,GACvB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,QAAQ,EACR,QAAQ,EACR,QAAQ,GACT,MAAM,eAAe,CAAC;AACvB,OAAO,EAML,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,MAAM,YAAY,CAAC;AAEpB,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,cAAc,YAAY,CAAC;AAE3B,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAY/D,MAAM,CAAC,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACJ,EAAE;IAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,sBAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,YAAY,CAAC;YAC5C,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,QAAQ,CAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,wBAAwB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YAC3E,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,kBAAkB;aAC3B,CAAC;SACJ,CAAC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAwB,EACxB,UAA0B,EAAE,EACG,EAAE;IACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,KAAK,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAEhC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,QAAQ,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,kBAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,YAAY,CAAC;gBACjD,GAAG,OAAO;gBACV,MAAM,EAAE,SAAS;gBACjB,QAAQ,EAAE,SAAS;gBACnB,KAAK,EAAE,SAAS;gBAChB,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,sBAAsB;iBAC/B,CAAC;aACH,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,QAAQ,CAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CACtC,KAAK,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,2BAA2B,EAC3D;YACE,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEL,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,YAAY,CAAC;YAChD,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK;YACL,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YACtE,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,yBAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACrE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAYD,MAAM,CAAC,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,IAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAYF,MAAM,CAAC,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC"}
|
package/dst/prompt.js
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
/*!
|
|
3
2
|
* Portions of this code are based on Promptfoo (MIT License)
|
|
4
3
|
* Copyright (c) 2025 Promptfoo
|
|
5
4
|
*/
|
|
6
|
-
|
|
7
|
-
exports.GEVAL_EVALUATE_PROMPT = exports.GEVAL_EVALUATE_REPLY_PROMPT = exports.GEVAL_STEPS_PROMPT = exports.GEVAL_SYSTEM_PROMPT = exports.LLM_RUBRIC_USER_PROMPT = exports.LLM_RUBRIC_SYSTEM_PROMPT = void 0;
|
|
8
|
-
exports.LLM_RUBRIC_SYSTEM_PROMPT = `
|
|
5
|
+
export const LLM_RUBRIC_SYSTEM_PROMPT = `
|
|
9
6
|
Instruction #{{hash_id}}.
|
|
10
7
|
|
|
11
8
|
You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. And score 1.0 indicates full compliance with the rubric, but 0.0 indicates no compliance at all. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
|
|
@@ -20,13 +17,13 @@ Examples:
|
|
|
20
17
|
<Rubric>Does not speak like a pirate</Rubric>
|
|
21
18
|
{"reason": "'avast ye' is a common pirate term", "pass": false, "score": 0.0}
|
|
22
19
|
`;
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
export const LLM_RUBRIC_USER_PROMPT = '<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>';
|
|
21
|
+
export const GEVAL_SYSTEM_PROMPT = `
|
|
25
22
|
Instruction #{{hash_id}}.
|
|
26
23
|
|
|
27
24
|
You are an impartial Judge. Your role is to perform an independent audit according to provided criteria.
|
|
28
25
|
`;
|
|
29
|
-
|
|
26
|
+
export const GEVAL_STEPS_PROMPT = `
|
|
30
27
|
Given an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.
|
|
31
28
|
|
|
32
29
|
**EVALUATION CRITERIA**
|
|
@@ -50,7 +47,7 @@ Example:
|
|
|
50
47
|
Here are the 3-4 concise evaluation steps, formatted as required in a minified JSON:
|
|
51
48
|
JSON:
|
|
52
49
|
`;
|
|
53
|
-
|
|
50
|
+
export const GEVAL_EVALUATE_REPLY_PROMPT = `
|
|
54
51
|
You will be given one Reply below. Your task is to rate the Reply on one metric.
|
|
55
52
|
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
56
53
|
|
|
@@ -83,7 +80,7 @@ Example JSON:
|
|
|
83
80
|
Here is the final evaluation in the required minified JSON format:
|
|
84
81
|
JSON:
|
|
85
82
|
`;
|
|
86
|
-
|
|
83
|
+
export const GEVAL_EVALUATE_PROMPT = `
|
|
87
84
|
You will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.
|
|
88
85
|
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
89
86
|
|
package/dst/prompt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,CAAC,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;CAcvC,CAAC;AAKF,MAAM,CAAC,MAAM,sBAAsB,GAAG,kEAAkE,CAAC;AAEzG,MAAM,CAAC,MAAM,mBAAmB,GAAG;;;;CAIlC,CAAC;AAKF,MAAM,CAAC,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;CAuBjC,CAAC;AAKF,MAAM,CAAC,MAAM,2BAA2B,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgC1C,CAAC;AAKF,MAAM,CAAC,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqCpC,CAAC"}
|
package/dst/registry.js
CHANGED
|
@@ -1,94 +1,52 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
-
};
|
|
38
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.setSteps = exports.getSteps = exports.getModel = void 0;
|
|
40
|
-
const crypto = __importStar(require("node:crypto"));
|
|
41
|
-
const openai_1 = require("@ai-sdk/openai");
|
|
42
|
-
const anthropic_1 = require("@ai-sdk/anthropic");
|
|
43
|
-
const google_1 = require("@ai-sdk/google");
|
|
44
|
-
const mistral_1 = require("@ai-sdk/mistral");
|
|
45
|
-
const amazon_bedrock_1 = require("@ai-sdk/amazon-bedrock");
|
|
46
|
-
const azure_1 = require("@ai-sdk/azure");
|
|
47
|
-
const deepseek_1 = require("@ai-sdk/deepseek");
|
|
48
|
-
const groq_1 = require("@ai-sdk/groq");
|
|
49
|
-
const perplexity_1 = require("@ai-sdk/perplexity");
|
|
50
|
-
const xai_1 = require("@ai-sdk/xai");
|
|
51
|
-
const config_1 = __importDefault(require("./config"));
|
|
1
|
+
import * as crypto from 'node:crypto';
|
|
2
|
+
import { openai, } from '@ai-sdk/openai';
|
|
3
|
+
import { anthropic, } from '@ai-sdk/anthropic';
|
|
4
|
+
import { google, } from '@ai-sdk/google';
|
|
5
|
+
import { mistral, } from '@ai-sdk/mistral';
|
|
6
|
+
import { bedrock, } from '@ai-sdk/amazon-bedrock';
|
|
7
|
+
import { azure, } from '@ai-sdk/azure';
|
|
8
|
+
import { deepseek, } from '@ai-sdk/deepseek';
|
|
9
|
+
import { groq, } from '@ai-sdk/groq';
|
|
10
|
+
import { perplexity, } from '@ai-sdk/perplexity';
|
|
11
|
+
import { xai, } from '@ai-sdk/xai';
|
|
12
|
+
import CONF from './config.js';
|
|
52
13
|
const PROVIDERS = {
|
|
53
|
-
openai
|
|
54
|
-
anthropic
|
|
55
|
-
google
|
|
56
|
-
mistral
|
|
57
|
-
bedrock
|
|
58
|
-
azure
|
|
59
|
-
deepseek
|
|
60
|
-
groq
|
|
61
|
-
perplexity
|
|
62
|
-
xai
|
|
14
|
+
openai,
|
|
15
|
+
anthropic,
|
|
16
|
+
google,
|
|
17
|
+
mistral,
|
|
18
|
+
bedrock,
|
|
19
|
+
azure,
|
|
20
|
+
deepseek,
|
|
21
|
+
groq,
|
|
22
|
+
perplexity,
|
|
23
|
+
xai,
|
|
63
24
|
};
|
|
64
|
-
const getModel = (providerName, modelName) => {
|
|
25
|
+
export const getModel = (providerName, modelName) => {
|
|
65
26
|
const cacheKey = `${providerName}:${modelName}`;
|
|
66
|
-
let model =
|
|
27
|
+
let model = CONF.isModelCached ? CONF.modelCache.get(cacheKey) : undefined;
|
|
67
28
|
if (!model) {
|
|
68
29
|
const provider = PROVIDERS[providerName];
|
|
69
30
|
if (!provider) {
|
|
70
31
|
throw new Error(`Unknown provider: "${providerName}". Available providers: ${Object.keys(PROVIDERS).join(', ')}`);
|
|
71
32
|
}
|
|
72
33
|
model = provider(modelName);
|
|
73
|
-
if (
|
|
74
|
-
|
|
34
|
+
if (CONF.isModelCached) {
|
|
35
|
+
CONF.modelCache.set(cacheKey, model);
|
|
75
36
|
}
|
|
76
37
|
}
|
|
77
38
|
return model;
|
|
78
39
|
};
|
|
79
|
-
exports.getModel = getModel;
|
|
80
40
|
const md5 = (str) => {
|
|
81
41
|
return crypto.createHash('md5').update(str).digest('hex');
|
|
82
42
|
};
|
|
83
|
-
const getSteps = (criteria) => {
|
|
84
|
-
return
|
|
43
|
+
export const getSteps = (criteria) => {
|
|
44
|
+
return CONF.isStepsCached ? CONF.stepsCache.get(md5(criteria)) : Promise.resolve(undefined);
|
|
85
45
|
};
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
return config_1.default.stepsCache.set(md5(criteria), steps);
|
|
46
|
+
export const setSteps = (criteria, steps) => {
|
|
47
|
+
if (CONF.isStepsCached) {
|
|
48
|
+
return CONF.stepsCache.set(md5(criteria), steps);
|
|
90
49
|
}
|
|
91
50
|
return Promise.resolve();
|
|
92
51
|
};
|
|
93
|
-
exports.setSteps = setSteps;
|
|
94
52
|
//# sourceMappingURL=registry.js.map
|
package/dst/registry.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AAItC,OAAO,EACL,MAAM,GACP,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,SAAS,GACV,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,MAAM,GACP,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,OAAO,GACR,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,OAAO,GACR,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,KAAK,GACN,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,QAAQ,GACT,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,IAAI,GACL,MAAM,cAAc,CAAC;AACtB,OAAO,EACL,UAAU,GACX,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,GAAG,GACJ,MAAM,aAAa,CAAC;AAErB,OAAO,IAAI,MAAM,aAAa,CAAC;AAM/B,MAAM,SAAS,GAA6B;IAC1C,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,QAAQ;IACR,IAAI;IACJ,UAAU;IACV,GAAG;CACJ,CAAC;AAQF,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AAOD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;AAOD,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAQD,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA"}
|
package/dst/types.js
CHANGED
|
@@ -1,17 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
reason: zod_1.z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
7
|
-
pass: zod_1.z.boolean().describe('Whether the output satisfies the minimum requirements'),
|
|
8
|
-
score: zod_1.z.number().min(0).max(1).describe('Numeric representation of quality'),
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const RubricResultSchema = z.object({
|
|
3
|
+
reason: z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
4
|
+
pass: z.boolean().describe('Whether the output satisfies the minimum requirements'),
|
|
5
|
+
score: z.number().min(0).max(1).describe('Numeric representation of quality'),
|
|
9
6
|
});
|
|
10
|
-
|
|
11
|
-
steps:
|
|
7
|
+
export const GevalStepsResultSchema = z.object({
|
|
8
|
+
steps: z.array(z.string()).describe('List of concise evaluation steps derived from the criteria'),
|
|
12
9
|
});
|
|
13
|
-
|
|
14
|
-
reason:
|
|
15
|
-
score:
|
|
10
|
+
export const GevalEvaluateResultSchema = z.object({
|
|
11
|
+
reason: z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
12
|
+
score: z.number().min(0).describe('Numeric representation of quality'),
|
|
16
13
|
});
|
|
17
14
|
//# sourceMappingURL=types.js.map
|
package/dst/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA+DxB,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUH,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAUH,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eva-llm/eva-judge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "LLM-as-a-Judge abstraction layer using ai-sdk and plugins",
|
|
5
5
|
"main": "dst/index.js",
|
|
6
6
|
"types": "dst/index.d.ts",
|
|
7
|
+
"type": "module",
|
|
7
8
|
"engines": {
|
|
8
9
|
"node": ">=22"
|
|
9
10
|
},
|
package/dst/src/config.d.ts
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import { LRUCache } from 'lru-cache';
|
|
2
|
-
import { type LanguageModel } from 'ai';
|
|
3
|
-
import { type IJudgeHooks, type IStepsCache } from './types';
|
|
4
|
-
declare const _default: {
|
|
5
|
-
gevalMaxScore: number;
|
|
6
|
-
isModelCached: boolean;
|
|
7
|
-
isStepsCached: boolean;
|
|
8
|
-
modelCache: LRUCache<string, LanguageModel, unknown>;
|
|
9
|
-
stepsCache: IStepsCache;
|
|
10
|
-
restartModelCache(size?: number): void;
|
|
11
|
-
restartStepsCache(size?: number): void;
|
|
12
|
-
setStepsCache(cache: IStepsCache): void;
|
|
13
|
-
enableModelCache(): void;
|
|
14
|
-
disableModelCache(): void;
|
|
15
|
-
enableStepsCache(): void;
|
|
16
|
-
disableStepsCache(): void;
|
|
17
|
-
hooks: IJudgeHooks;
|
|
18
|
-
setHooks(hooks: IJudgeHooks): void;
|
|
19
|
-
};
|
|
20
|
-
export default _default;
|
package/dst/src/config.js
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const lru_cache_1 = require("lru-cache");
|
|
4
|
-
class StepsMemoryAdapter {
|
|
5
|
-
cache;
|
|
6
|
-
constructor(size) {
|
|
7
|
-
this.cache = new lru_cache_1.LRUCache({ max: size });
|
|
8
|
-
}
|
|
9
|
-
async set(key, value) {
|
|
10
|
-
this.cache.set(key, value);
|
|
11
|
-
}
|
|
12
|
-
async get(key) {
|
|
13
|
-
return this.cache.get(key);
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
exports.default = {
|
|
17
|
-
gevalMaxScore: 10,
|
|
18
|
-
isModelCached: true,
|
|
19
|
-
isStepsCached: true,
|
|
20
|
-
modelCache: new lru_cache_1.LRUCache({ max: 100 }),
|
|
21
|
-
stepsCache: new StepsMemoryAdapter(500),
|
|
22
|
-
restartModelCache(size = 100) {
|
|
23
|
-
this.modelCache = new lru_cache_1.LRUCache({ max: size });
|
|
24
|
-
},
|
|
25
|
-
restartStepsCache(size = 500) {
|
|
26
|
-
this.stepsCache = new StepsMemoryAdapter(size);
|
|
27
|
-
},
|
|
28
|
-
setStepsCache(cache) {
|
|
29
|
-
this.stepsCache = cache;
|
|
30
|
-
},
|
|
31
|
-
enableModelCache() {
|
|
32
|
-
this.isModelCached = true;
|
|
33
|
-
},
|
|
34
|
-
disableModelCache() {
|
|
35
|
-
this.isModelCached = false;
|
|
36
|
-
},
|
|
37
|
-
enableStepsCache() {
|
|
38
|
-
this.isStepsCached = true;
|
|
39
|
-
},
|
|
40
|
-
disableStepsCache() {
|
|
41
|
-
this.isStepsCached = false;
|
|
42
|
-
},
|
|
43
|
-
hooks: {},
|
|
44
|
-
setHooks(hooks) {
|
|
45
|
-
this.hooks = hooks;
|
|
46
|
-
}
|
|
47
|
-
};
|
|
48
|
-
//# sourceMappingURL=config.js.map
|
package/dst/src/config.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/config.ts"],"names":[],"mappings":";;AAAA,yCAEmB;AAcnB,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAiB;IAKxB,QAAQ,CAAC,KAAkB;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
package/dst/src/index.d.ts
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
import { type TVercelOptions, type TGevalInput, type TRubricResult, type TGevalEvaluateResult } from './types';
|
|
2
|
-
export * from './config';
|
|
3
|
-
export { default } from './config';
|
|
4
|
-
export * from './types';
|
|
5
|
-
export declare const llmRubric: (output: string, rubric: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TRubricResult>;
|
|
6
|
-
export declare const gEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
|
7
|
-
export declare const bEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
package/dst/src/index.js
DELETED
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
36
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
37
|
-
};
|
|
38
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
39
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
40
|
-
};
|
|
41
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
-
exports.bEval = exports.gEval = exports.llmRubric = exports.default = void 0;
|
|
43
|
-
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
44
|
-
const Mustache = __importStar(require("mustache"));
|
|
45
|
-
const ai_1 = require("ai");
|
|
46
|
-
const config_1 = __importDefault(require("./config"));
|
|
47
|
-
const prompt_1 = require("./prompt");
|
|
48
|
-
const registry_1 = require("./registry");
|
|
49
|
-
const types_1 = require("./types");
|
|
50
|
-
__exportStar(require("./config"), exports);
|
|
51
|
-
var config_2 = require("./config");
|
|
52
|
-
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(config_2).default; } });
|
|
53
|
-
__exportStar(require("./types"), exports);
|
|
54
|
-
const getHashId = () => node_crypto_1.default.randomBytes(16).toString('hex');
|
|
55
|
-
const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
|
|
56
|
-
const start = Date.now();
|
|
57
|
-
try {
|
|
58
|
-
const userPrompt = Mustache.render(prompt_1.LLM_RUBRIC_USER_PROMPT, { output, rubric });
|
|
59
|
-
const { output: result } = await (0, ai_1.generateText)({
|
|
60
|
-
...options,
|
|
61
|
-
messages: undefined,
|
|
62
|
-
tools: undefined,
|
|
63
|
-
model: (0, registry_1.getModel)(providerName, modelName),
|
|
64
|
-
system: Mustache.render(prompt_1.LLM_RUBRIC_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
65
|
-
prompt: userPrompt,
|
|
66
|
-
output: ai_1.Output.object({
|
|
67
|
-
schema: types_1.RubricResultSchema,
|
|
68
|
-
}),
|
|
69
|
-
});
|
|
70
|
-
config_1.default.hooks.onSuccess?.({
|
|
71
|
-
method: 'llmRubric',
|
|
72
|
-
params: { output, rubric, providerName, modelName, options },
|
|
73
|
-
result,
|
|
74
|
-
duration: Date.now() - start,
|
|
75
|
-
});
|
|
76
|
-
return result;
|
|
77
|
-
}
|
|
78
|
-
catch (error) {
|
|
79
|
-
config_1.default.hooks.onError?.({
|
|
80
|
-
method: 'llmRubric',
|
|
81
|
-
error,
|
|
82
|
-
duration: Date.now() - start,
|
|
83
|
-
});
|
|
84
|
-
throw error;
|
|
85
|
-
}
|
|
86
|
-
};
|
|
87
|
-
exports.llmRubric = llmRubric;
|
|
88
|
-
const _gEval = async (input, criteria, providerName, modelName, maxScore, methodName, options = {}) => {
|
|
89
|
-
if (typeof input === 'string') {
|
|
90
|
-
input = { query: '', answer: input };
|
|
91
|
-
}
|
|
92
|
-
const { query, answer } = input;
|
|
93
|
-
const start = Date.now();
|
|
94
|
-
try {
|
|
95
|
-
const model = (0, registry_1.getModel)(providerName, modelName);
|
|
96
|
-
let steps = await (0, registry_1.getSteps)(criteria);
|
|
97
|
-
if (!steps) {
|
|
98
|
-
const stepsPrompt = Mustache.render(prompt_1.GEVAL_STEPS_PROMPT, { criteria });
|
|
99
|
-
const { output: stepsResult } = await (0, ai_1.generateText)({
|
|
100
|
-
...options,
|
|
101
|
-
system: undefined,
|
|
102
|
-
messages: undefined,
|
|
103
|
-
tools: undefined,
|
|
104
|
-
model,
|
|
105
|
-
prompt: stepsPrompt,
|
|
106
|
-
output: ai_1.Output.object({
|
|
107
|
-
schema: types_1.GevalStepsResultSchema,
|
|
108
|
-
}),
|
|
109
|
-
});
|
|
110
|
-
steps = stepsResult.steps;
|
|
111
|
-
(0, registry_1.setSteps)(criteria, stepsResult.steps);
|
|
112
|
-
}
|
|
113
|
-
const evaluationPrompt = Mustache.render(query ? prompt_1.GEVAL_EVALUATE_PROMPT : prompt_1.GEVAL_EVALUATE_REPLY_PROMPT, {
|
|
114
|
-
criteria,
|
|
115
|
-
steps: steps.join('\n- '),
|
|
116
|
-
input: query,
|
|
117
|
-
output: answer,
|
|
118
|
-
maxScore,
|
|
119
|
-
});
|
|
120
|
-
const { output: evalResult } = await (0, ai_1.generateText)({
|
|
121
|
-
...options,
|
|
122
|
-
messages: undefined,
|
|
123
|
-
tools: undefined,
|
|
124
|
-
model,
|
|
125
|
-
system: Mustache.render(prompt_1.GEVAL_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
126
|
-
prompt: evaluationPrompt,
|
|
127
|
-
output: ai_1.Output.object({
|
|
128
|
-
schema: types_1.GevalEvaluateResultSchema,
|
|
129
|
-
}),
|
|
130
|
-
...options,
|
|
131
|
-
});
|
|
132
|
-
const result = {
|
|
133
|
-
reason: evalResult.reason,
|
|
134
|
-
score: evalResult.score / maxScore,
|
|
135
|
-
};
|
|
136
|
-
config_1.default.hooks.onSuccess?.({
|
|
137
|
-
method: methodName,
|
|
138
|
-
params: { query, answer, criteria, providerName, modelName, options },
|
|
139
|
-
result,
|
|
140
|
-
duration: Date.now() - start,
|
|
141
|
-
});
|
|
142
|
-
return result;
|
|
143
|
-
}
|
|
144
|
-
catch (error) {
|
|
145
|
-
config_1.default.hooks.onError?.({
|
|
146
|
-
method: methodName,
|
|
147
|
-
error,
|
|
148
|
-
duration: Date.now() - start,
|
|
149
|
-
});
|
|
150
|
-
throw error;
|
|
151
|
-
}
|
|
152
|
-
};
|
|
153
|
-
const gEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, config_1.default.gevalMaxScore, 'gEval', options);
|
|
154
|
-
exports.gEval = gEval;
|
|
155
|
-
const bEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, 1, 'bEval', options);
|
|
156
|
-
exports.bEval = bEval;
|
|
157
|
-
//# sourceMappingURL=index.js.map
|
package/dst/src/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,8DAAiC;AACjC,mDAAqC;AACrC,2BAGY;AAEZ,sDAA4B;AAC5B,qCAOkB;AAClB,yCAIoB;AACpB,mCASiB;AAEjB,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;AAChB,0CAAwB;AAExB,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,qBAAM,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAYxD,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACJ,EAAE;IAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,iCAAwB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YAC3E,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;SACJ,CAAC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAzCY,QAAA,SAAS,aAyCrB;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAwB,EACxB,UAA0B,EAAE,EACG,EAAE;IACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,KAAK,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAEhC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,GAAG,OAAO;gBACV,MAAM,EAAE,SAAS;gBACjB,QAAQ,EAAE,SAAS;gBACnB,KAAK,EAAE,SAAS;gBAChB,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;aACH,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CACtC,KAAK,CAAC,CAAC,CAAC,8BAAqB,CAAC,CAAC,CAAC,oCAA2B,EAC3D;YACE,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEL,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK;YACL,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,4BAAmB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YACtE,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACrE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAYM,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,gBAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB;AAYK,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB"}
|
package/dst/src/prompt.d.ts
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/*!
|
|
2
|
-
* Portions of this code are based on Promptfoo (MIT License)
|
|
3
|
-
* Copyright (c) 2025 Promptfoo
|
|
4
|
-
*/
|
|
5
|
-
export declare const LLM_RUBRIC_SYSTEM_PROMPT = "\nInstruction #{{hash_id}}.\n\nYou are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. And score 1.0 indicates full compliance with the rubric, but 0.0 indicates no compliance at all. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}\n\nExamples:\n\n<Output>Hello world</Output>\n<Rubric>Content contains a greeting</Rubric>\n{\"reason\": \"the content contains the word 'Hello'\", \"pass\": true, \"score\": 1.0}\n\n<Output>Avast ye swabs, repel the invaders!</Output>\n<Rubric>Does not speak like a pirate</Rubric>\n{\"reason\": \"'avast ye' is a common pirate term\", \"pass\": false, \"score\": 0.0}\n";
|
|
6
|
-
export declare const LLM_RUBRIC_USER_PROMPT = "<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>";
|
|
7
|
-
export declare const GEVAL_SYSTEM_PROMPT = "\nInstruction #{{hash_id}}.\n\nYou are an impartial Judge. Your role is to perform an independent audit according to provided criteria.\n";
|
|
8
|
-
export declare const GEVAL_STEPS_PROMPT = "\nGiven an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.\n\n**EVALUATION CRITERIA**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**OUTPUT FORMAT**\nIMPORTANT:\n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain a single key, \"steps\", whose value is a list of strings.\n- Each string must represent one evaluation step.\n- Do NOT include any explanations, commentary, extra text, or additional formatting.\n\nFormat:\n{\"steps\": <list_of_strings>}\n\nExample:\n{\"steps\":[\"<Evaluation Step 1>\",\"<Evaluation Step 2>\",\"<Evaluation Step 3>\",\"<Evaluation Step 4>\"]}\n\nHere are the 3-4 concise evaluation steps, formatted as required in a minified JSON:\nJSON:\n";
|
|
9
|
-
export declare const GEVAL_EVALUATE_REPLY_PROMPT = "\nYou will be given one Reply below. Your task is to rate the Reply on one metric.\nPlease make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.\n\n**Evaluation Criteria**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**Evaluation Steps**\n- {{steps}}\nGiven the evaluation steps, return a JSON with two keys: \n 1) a \"score\" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;\n 2) a \"reason\" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Reply in your reason, but be very concise with it!\n\n**Reply**\n<Reply>\n{{output}}\n</Reply>\n\n**OUTPUT FORMAT**\nIMPORTANT: \n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain exactly two keys: \"score\" and \"reason\".\n- No additional words, explanations, or formatting are needed.\n- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.\n\nExample JSON:\n{\"score\":0,\"reason\":\"The text of Reply does not follow the evaluation criteria provided.\"}\n\nHere is the final evaluation in the required minified JSON format:\nJSON:\n";
|
|
10
|
-
export declare const GEVAL_EVALUATE_PROMPT = "\nYou will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.\nPlease make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.\n\n**Evaluation Criteria**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**Evaluation Steps**\n- {{steps}}\nGiven the evaluation steps, return a JSON with two keys: \n 1) a \"score\" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;\n 2) a \"reason\" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Prompt and Reply in your reason, but be very concise with it!\n\n**Prompt**\n<Prompt>\n{{input}}\n</Prompt>\n\n**Reply**\n<Reply>\n{{output}}\n</Reply>\n\n**OUTPUT FORMAT**\nIMPORTANT: \n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain exactly two keys: \"score\" and \"reason\".\n- No additional words, explanations, or formatting are needed.\n- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.\n\nExample JSON:\n{\"score\":0,\"reason\":\"The text of Reply does not follow the evaluation criteria provided.\"}\n\nHere is the final evaluation in the required minified JSON format:\nJSON:\n";
|
package/dst/src/prompt.js
DELETED
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/*!
|
|
3
|
-
* Portions of this code are based on Promptfoo (MIT License)
|
|
4
|
-
* Copyright (c) 2025 Promptfoo
|
|
5
|
-
*/
|
|
6
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
-
exports.GEVAL_EVALUATE_PROMPT = exports.GEVAL_EVALUATE_REPLY_PROMPT = exports.GEVAL_STEPS_PROMPT = exports.GEVAL_SYSTEM_PROMPT = exports.LLM_RUBRIC_USER_PROMPT = exports.LLM_RUBRIC_SYSTEM_PROMPT = void 0;
|
|
8
|
-
exports.LLM_RUBRIC_SYSTEM_PROMPT = `
|
|
9
|
-
Instruction #{{hash_id}}.
|
|
10
|
-
|
|
11
|
-
You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. And score 1.0 indicates full compliance with the rubric, but 0.0 indicates no compliance at all. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
|
|
12
|
-
|
|
13
|
-
Examples:
|
|
14
|
-
|
|
15
|
-
<Output>Hello world</Output>
|
|
16
|
-
<Rubric>Content contains a greeting</Rubric>
|
|
17
|
-
{"reason": "the content contains the word 'Hello'", "pass": true, "score": 1.0}
|
|
18
|
-
|
|
19
|
-
<Output>Avast ye swabs, repel the invaders!</Output>
|
|
20
|
-
<Rubric>Does not speak like a pirate</Rubric>
|
|
21
|
-
{"reason": "'avast ye' is a common pirate term", "pass": false, "score": 0.0}
|
|
22
|
-
`;
|
|
23
|
-
exports.LLM_RUBRIC_USER_PROMPT = '<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>';
|
|
24
|
-
exports.GEVAL_SYSTEM_PROMPT = `
|
|
25
|
-
Instruction #{{hash_id}}.
|
|
26
|
-
|
|
27
|
-
You are an impartial Judge. Your role is to perform an independent audit according to provided criteria.
|
|
28
|
-
`;
|
|
29
|
-
exports.GEVAL_STEPS_PROMPT = `
|
|
30
|
-
Given an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.
|
|
31
|
-
|
|
32
|
-
**EVALUATION CRITERIA**
|
|
33
|
-
<Criteria>
|
|
34
|
-
{{criteria}}
|
|
35
|
-
</Criteria>
|
|
36
|
-
|
|
37
|
-
**OUTPUT FORMAT**
|
|
38
|
-
IMPORTANT:
|
|
39
|
-
- Return output ONLY as a minified JSON object (no code fences).
|
|
40
|
-
- The JSON object must contain a single key, "steps", whose value is a list of strings.
|
|
41
|
-
- Each string must represent one evaluation step.
|
|
42
|
-
- Do NOT include any explanations, commentary, extra text, or additional formatting.
|
|
43
|
-
|
|
44
|
-
Format:
|
|
45
|
-
{"steps": <list_of_strings>}
|
|
46
|
-
|
|
47
|
-
Example:
|
|
48
|
-
{"steps":["<Evaluation Step 1>","<Evaluation Step 2>","<Evaluation Step 3>","<Evaluation Step 4>"]}
|
|
49
|
-
|
|
50
|
-
Here are the 3-4 concise evaluation steps, formatted as required in a minified JSON:
|
|
51
|
-
JSON:
|
|
52
|
-
`;
|
|
53
|
-
exports.GEVAL_EVALUATE_REPLY_PROMPT = `
|
|
54
|
-
You will be given one Reply below. Your task is to rate the Reply on one metric.
|
|
55
|
-
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
56
|
-
|
|
57
|
-
**Evaluation Criteria**
|
|
58
|
-
<Criteria>
|
|
59
|
-
{{criteria}}
|
|
60
|
-
</Criteria>
|
|
61
|
-
|
|
62
|
-
**Evaluation Steps**
|
|
63
|
-
- {{steps}}
|
|
64
|
-
Given the evaluation steps, return a JSON with two keys:
|
|
65
|
-
1) a "score" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;
|
|
66
|
-
2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Reply in your reason, but be very concise with it!
|
|
67
|
-
|
|
68
|
-
**Reply**
|
|
69
|
-
<Reply>
|
|
70
|
-
{{output}}
|
|
71
|
-
</Reply>
|
|
72
|
-
|
|
73
|
-
**OUTPUT FORMAT**
|
|
74
|
-
IMPORTANT:
|
|
75
|
-
- Return output ONLY as a minified JSON object (no code fences).
|
|
76
|
-
- The JSON object must contain exactly two keys: "score" and "reason".
|
|
77
|
-
- No additional words, explanations, or formatting are needed.
|
|
78
|
-
- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.
|
|
79
|
-
|
|
80
|
-
Example JSON:
|
|
81
|
-
{"score":0,"reason":"The text of Reply does not follow the evaluation criteria provided."}
|
|
82
|
-
|
|
83
|
-
Here is the final evaluation in the required minified JSON format:
|
|
84
|
-
JSON:
|
|
85
|
-
`;
|
|
86
|
-
exports.GEVAL_EVALUATE_PROMPT = `
|
|
87
|
-
You will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.
|
|
88
|
-
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
89
|
-
|
|
90
|
-
**Evaluation Criteria**
|
|
91
|
-
<Criteria>
|
|
92
|
-
{{criteria}}
|
|
93
|
-
</Criteria>
|
|
94
|
-
|
|
95
|
-
**Evaluation Steps**
|
|
96
|
-
- {{steps}}
|
|
97
|
-
Given the evaluation steps, return a JSON with two keys:
|
|
98
|
-
1) a "score" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;
|
|
99
|
-
2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Prompt and Reply in your reason, but be very concise with it!
|
|
100
|
-
|
|
101
|
-
**Prompt**
|
|
102
|
-
<Prompt>
|
|
103
|
-
{{input}}
|
|
104
|
-
</Prompt>
|
|
105
|
-
|
|
106
|
-
**Reply**
|
|
107
|
-
<Reply>
|
|
108
|
-
{{output}}
|
|
109
|
-
</Reply>
|
|
110
|
-
|
|
111
|
-
**OUTPUT FORMAT**
|
|
112
|
-
IMPORTANT:
|
|
113
|
-
- Return output ONLY as a minified JSON object (no code fences).
|
|
114
|
-
- The JSON object must contain exactly two keys: "score" and "reason".
|
|
115
|
-
- No additional words, explanations, or formatting are needed.
|
|
116
|
-
- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.
|
|
117
|
-
|
|
118
|
-
Example JSON:
|
|
119
|
-
{"score":0,"reason":"The text of Reply does not follow the evaluation criteria provided."}
|
|
120
|
-
|
|
121
|
-
Here is the final evaluation in the required minified JSON format:
|
|
122
|
-
JSON:
|
|
123
|
-
`;
|
|
124
|
-
//# sourceMappingURL=prompt.js.map
|
package/dst/src/prompt.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/prompt.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKU,QAAA,wBAAwB,GAAG;;;;;;;;;;;;;;CAcvC,CAAC;AAKW,QAAA,sBAAsB,GAAG,kEAAkE,CAAC;AAE5F,QAAA,mBAAmB,GAAG;;;;CAIlC,CAAC;AAKW,QAAA,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;CAuBjC,CAAC;AAKW,QAAA,2BAA2B,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgC1C,CAAC;AAKW,QAAA,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqCpC,CAAC"}
|
package/dst/src/registry.d.ts
DELETED
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
import { type LanguageModel } from 'ai';
|
|
2
|
-
export declare const getModel: (providerName: string, modelName: string) => LanguageModel;
|
|
3
|
-
export declare const getSteps: (criteria: string) => Promise<string[] | undefined>;
|
|
4
|
-
export declare const setSteps: (criteria: string, steps: string[]) => Promise<void>;
|
package/dst/src/registry.js
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
-
};
|
|
38
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.setSteps = exports.getSteps = exports.getModel = void 0;
|
|
40
|
-
const crypto = __importStar(require("node:crypto"));
|
|
41
|
-
const openai_1 = require("@ai-sdk/openai");
|
|
42
|
-
const anthropic_1 = require("@ai-sdk/anthropic");
|
|
43
|
-
const google_1 = require("@ai-sdk/google");
|
|
44
|
-
const mistral_1 = require("@ai-sdk/mistral");
|
|
45
|
-
const amazon_bedrock_1 = require("@ai-sdk/amazon-bedrock");
|
|
46
|
-
const azure_1 = require("@ai-sdk/azure");
|
|
47
|
-
const deepseek_1 = require("@ai-sdk/deepseek");
|
|
48
|
-
const groq_1 = require("@ai-sdk/groq");
|
|
49
|
-
const perplexity_1 = require("@ai-sdk/perplexity");
|
|
50
|
-
const xai_1 = require("@ai-sdk/xai");
|
|
51
|
-
const config_1 = __importDefault(require("./config"));
|
|
52
|
-
const PROVIDERS = {
|
|
53
|
-
openai: openai_1.openai,
|
|
54
|
-
anthropic: anthropic_1.anthropic,
|
|
55
|
-
google: google_1.google,
|
|
56
|
-
mistral: mistral_1.mistral,
|
|
57
|
-
bedrock: amazon_bedrock_1.bedrock,
|
|
58
|
-
azure: azure_1.azure,
|
|
59
|
-
deepseek: deepseek_1.deepseek,
|
|
60
|
-
groq: groq_1.groq,
|
|
61
|
-
perplexity: perplexity_1.perplexity,
|
|
62
|
-
xai: xai_1.xai,
|
|
63
|
-
};
|
|
64
|
-
const getModel = (providerName, modelName) => {
|
|
65
|
-
const cacheKey = `${providerName}:${modelName}`;
|
|
66
|
-
let model = config_1.default.isModelCached ? config_1.default.modelCache.get(cacheKey) : undefined;
|
|
67
|
-
if (!model) {
|
|
68
|
-
const provider = PROVIDERS[providerName];
|
|
69
|
-
if (!provider) {
|
|
70
|
-
throw new Error(`Unknown provider: "${providerName}". Available providers: ${Object.keys(PROVIDERS).join(', ')}`);
|
|
71
|
-
}
|
|
72
|
-
model = provider(modelName);
|
|
73
|
-
if (config_1.default.isModelCached) {
|
|
74
|
-
config_1.default.modelCache.set(cacheKey, model);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
return model;
|
|
78
|
-
};
|
|
79
|
-
exports.getModel = getModel;
|
|
80
|
-
const md5 = (str) => {
|
|
81
|
-
return crypto.createHash('md5').update(str).digest('hex');
|
|
82
|
-
};
|
|
83
|
-
const getSteps = (criteria) => {
|
|
84
|
-
return config_1.default.isStepsCached ? config_1.default.stepsCache.get(md5(criteria)) : Promise.resolve(undefined);
|
|
85
|
-
};
|
|
86
|
-
exports.getSteps = getSteps;
|
|
87
|
-
const setSteps = (criteria, steps) => {
|
|
88
|
-
if (config_1.default.isStepsCached) {
|
|
89
|
-
return config_1.default.stepsCache.set(md5(criteria), steps);
|
|
90
|
-
}
|
|
91
|
-
return Promise.resolve();
|
|
92
|
-
};
|
|
93
|
-
exports.setSteps = setSteps;
|
|
94
|
-
//# sourceMappingURL=registry.js.map
|
package/dst/src/registry.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;AAItC,2CAEwB;AACxB,iDAE2B;AAC3B,2CAEwB;AACxB,6CAEyB;AACzB,2DAEgC;AAChC,yCAEuB;AACvB,+CAE0B;AAC1B,uCAEsB;AACtB,mDAE4B;AAC5B,qCAEqB;AAErB,sDAA4B;AAM5B,MAAM,SAAS,GAA6B;IAC1C,MAAM,EAAN,eAAM;IACN,SAAS,EAAT,qBAAS;IACT,MAAM,EAAN,eAAM;IACN,OAAO,EAAP,iBAAO;IACP,OAAO,EAAP,wBAAO;IACP,KAAK,EAAL,aAAK;IACL,QAAQ,EAAR,mBAAQ;IACR,IAAI,EAAJ,WAAI;IACJ,UAAU,EAAV,uBAAU;IACV,GAAG,EAAH,SAAG;CACJ,CAAC;AAQK,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;YACvB,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AApBY,QAAA,QAAQ,YAoBpB;AAOD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;AAOM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAFY,QAAA,QAAQ,YAEpB;AAQM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA;AANY,QAAA,QAAQ,YAMpB"}
|
package/dst/src/types.d.ts
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
export type TJudgeMethod = 'bEval' | 'gEval' | 'llmRubric';
|
|
3
|
-
export type TGevalInput = string | {
|
|
4
|
-
query: string;
|
|
5
|
-
answer: string;
|
|
6
|
-
};
|
|
7
|
-
export interface IStepsCache {
|
|
8
|
-
set(key: string, value: string[]): Promise<void>;
|
|
9
|
-
get(key: string): Promise<string[] | undefined>;
|
|
10
|
-
}
|
|
11
|
-
export type TVercelOptions = Record<string, any>;
|
|
12
|
-
export interface IJudgeHooks {
|
|
13
|
-
onSuccess?: (data: {
|
|
14
|
-
method: TJudgeMethod;
|
|
15
|
-
params: any;
|
|
16
|
-
result: any;
|
|
17
|
-
duration: number;
|
|
18
|
-
}) => void;
|
|
19
|
-
onError?: (data: {
|
|
20
|
-
method: TJudgeMethod;
|
|
21
|
-
error: any;
|
|
22
|
-
duration: number;
|
|
23
|
-
}) => void;
|
|
24
|
-
}
|
|
25
|
-
export declare const RubricResultSchema: z.ZodObject<{
|
|
26
|
-
reason: z.ZodString;
|
|
27
|
-
pass: z.ZodBoolean;
|
|
28
|
-
score: z.ZodNumber;
|
|
29
|
-
}, z.core.$strip>;
|
|
30
|
-
export type TRubricResult = z.infer<typeof RubricResultSchema>;
|
|
31
|
-
export declare const GevalStepsResultSchema: z.ZodObject<{
|
|
32
|
-
steps: z.ZodArray<z.ZodString>;
|
|
33
|
-
}, z.core.$strip>;
|
|
34
|
-
export type TGevalStepsResult = z.infer<typeof GevalStepsResultSchema>;
|
|
35
|
-
export declare const GevalEvaluateResultSchema: z.ZodObject<{
|
|
36
|
-
reason: z.ZodString;
|
|
37
|
-
score: z.ZodNumber;
|
|
38
|
-
}, z.core.$strip>;
|
|
39
|
-
export type TGevalEvaluateResult = z.infer<typeof GevalEvaluateResultSchema>;
|
package/dst/src/types.js
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.GevalEvaluateResultSchema = exports.GevalStepsResultSchema = exports.RubricResultSchema = void 0;
|
|
4
|
-
const zod_1 = require("zod");
|
|
5
|
-
exports.RubricResultSchema = zod_1.z.object({
|
|
6
|
-
reason: zod_1.z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
7
|
-
pass: zod_1.z.boolean().describe('Whether the output satisfies the minimum requirements'),
|
|
8
|
-
score: zod_1.z.number().min(0).max(1).describe('Numeric representation of quality'),
|
|
9
|
-
});
|
|
10
|
-
exports.GevalStepsResultSchema = zod_1.z.object({
|
|
11
|
-
steps: zod_1.z.array(zod_1.z.string()).describe('List of concise evaluation steps derived from the criteria'),
|
|
12
|
-
});
|
|
13
|
-
exports.GevalEvaluateResultSchema = zod_1.z.object({
|
|
14
|
-
reason: zod_1.z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
15
|
-
score: zod_1.z.number().min(0).describe('Numeric representation of quality'),
|
|
16
|
-
});
|
|
17
|
-
//# sourceMappingURL=types.js.map
|
package/dst/src/types.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AA+DX,QAAA,kBAAkB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,OAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUU,QAAA,sBAAsB,GAAG,OAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAUU,QAAA,yBAAyB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC"}
|