@eva-llm/eva-judge 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dst/config.js.map +1 -1
- package/dst/index.js +6 -28
- package/dst/index.js.map +1 -1
- package/dst/registry.js.map +1 -1
- package/dst/src/config.d.ts +20 -0
- package/dst/src/config.js +48 -0
- package/dst/src/config.js.map +1 -0
- package/dst/src/index.d.ts +7 -0
- package/dst/src/index.js +157 -0
- package/dst/src/index.js.map +1 -0
- package/dst/src/prompt.d.ts +10 -0
- package/dst/src/prompt.js +124 -0
- package/dst/src/prompt.js.map +1 -0
- package/dst/src/registry.d.ts +4 -0
- package/dst/src/registry.js +94 -0
- package/dst/src/registry.js.map +1 -0
- package/dst/src/types.d.ts +39 -0
- package/dst/src/types.js +17 -0
- package/dst/src/types.js.map +1 -0
- package/dst/types.js.map +1 -1
- package/package.json +24 -18
package/dst/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAEmB;AAcnB,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAiB;IAKxB,QAAQ,CAAC,KAAkB;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
package/dst/index.js
CHANGED
|
@@ -10,28 +10,6 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
|
|
|
10
10
|
if (k2 === undefined) k2 = k;
|
|
11
11
|
o[k2] = m[k];
|
|
12
12
|
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
13
|
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
36
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
37
15
|
};
|
|
@@ -41,7 +19,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
41
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
20
|
exports.bEval = exports.gEval = exports.llmRubric = exports.default = void 0;
|
|
43
21
|
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
44
|
-
const
|
|
22
|
+
const mustache_1 = __importDefault(require("mustache"));
|
|
45
23
|
const ai_1 = require("ai");
|
|
46
24
|
const config_1 = __importDefault(require("./config"));
|
|
47
25
|
const prompt_1 = require("./prompt");
|
|
@@ -55,13 +33,13 @@ const getHashId = () => node_crypto_1.default.randomBytes(16).toString('hex');
|
|
|
55
33
|
const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
|
|
56
34
|
const start = Date.now();
|
|
57
35
|
try {
|
|
58
|
-
const userPrompt =
|
|
36
|
+
const userPrompt = mustache_1.default.render(prompt_1.LLM_RUBRIC_USER_PROMPT, { output, rubric });
|
|
59
37
|
const { output: result } = await (0, ai_1.generateText)({
|
|
60
38
|
...options,
|
|
61
39
|
messages: undefined,
|
|
62
40
|
tools: undefined,
|
|
63
41
|
model: (0, registry_1.getModel)(providerName, modelName),
|
|
64
|
-
system:
|
|
42
|
+
system: mustache_1.default.render(prompt_1.LLM_RUBRIC_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
65
43
|
prompt: userPrompt,
|
|
66
44
|
output: ai_1.Output.object({
|
|
67
45
|
schema: types_1.RubricResultSchema,
|
|
@@ -95,7 +73,7 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
95
73
|
const model = (0, registry_1.getModel)(providerName, modelName);
|
|
96
74
|
let steps = await (0, registry_1.getSteps)(criteria);
|
|
97
75
|
if (!steps) {
|
|
98
|
-
const stepsPrompt =
|
|
76
|
+
const stepsPrompt = mustache_1.default.render(prompt_1.GEVAL_STEPS_PROMPT, { criteria });
|
|
99
77
|
const { output: stepsResult } = await (0, ai_1.generateText)({
|
|
100
78
|
...options,
|
|
101
79
|
system: undefined,
|
|
@@ -110,7 +88,7 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
110
88
|
steps = stepsResult.steps;
|
|
111
89
|
(0, registry_1.setSteps)(criteria, stepsResult.steps);
|
|
112
90
|
}
|
|
113
|
-
const evaluationPrompt =
|
|
91
|
+
const evaluationPrompt = mustache_1.default.render(query ? prompt_1.GEVAL_EVALUATE_PROMPT : prompt_1.GEVAL_EVALUATE_REPLY_PROMPT, {
|
|
114
92
|
criteria,
|
|
115
93
|
steps: steps.join('\n- '),
|
|
116
94
|
input: query,
|
|
@@ -122,7 +100,7 @@ const _gEval = async (input, criteria, providerName, modelName, maxScore, method
|
|
|
122
100
|
messages: undefined,
|
|
123
101
|
tools: undefined,
|
|
124
102
|
model,
|
|
125
|
-
system:
|
|
103
|
+
system: mustache_1.default.render(prompt_1.GEVAL_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
126
104
|
prompt: evaluationPrompt,
|
|
127
105
|
output: ai_1.Output.object({
|
|
128
106
|
schema: types_1.GevalEvaluateResultSchema,
|
package/dst/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA,8DAAiC;AACjC,wDAAgC;AAChC,2BAGY;AAEZ,sDAA4B;AAC5B,qCAOkB;AAClB,yCAIoB;AACpB,mCASiB;AAEjB,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;AAChB,0CAAwB;AAExB,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,qBAAM,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAYxD,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACJ,EAAE;IAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,kBAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,kBAAQ,CAAC,MAAM,CAAC,iCAAwB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YAC3E,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;SACJ,CAAC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAzCY,QAAA,SAAS,aAyCrB;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAwB,EACxB,UAA0B,EAAE,EACG,EAAE;IACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,KAAK,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAEhC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,kBAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,GAAG,OAAO;gBACV,MAAM,EAAE,SAAS;gBACjB,QAAQ,EAAE,SAAS;gBACnB,KAAK,EAAE,SAAS;gBAChB,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;aACH,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,kBAAQ,CAAC,MAAM,CACtC,KAAK,CAAC,CAAC,CAAC,8BAAqB,CAAC,CAAC,CAAC,oCAA2B,EAC3D;YACE,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEL,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK;YACL,MAAM,EAAE,kBAAQ,CAAC,MAAM,CAAC,4BAAmB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YACtE,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACrE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAYM,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,gBAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB;AAYK,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB"}
|
package/dst/registry.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;AAItC,2CAEwB;AACxB,iDAE2B;AAC3B,2CAEwB;AACxB,6CAEyB;AACzB,2DAEgC;AAChC,yCAEuB;AACvB,+CAE0B;AAC1B,uCAEsB;AACtB,mDAE4B;AAC5B,qCAEqB;AAErB,sDAA4B;AAM5B,MAAM,SAAS,GAA6B;IAC1C,MAAM,EAAN,eAAM;IACN,SAAS,EAAT,qBAAS;IACT,MAAM,EAAN,eAAM;IACN,OAAO,EAAP,iBAAO;IACP,OAAO,EAAP,wBAAO;IACP,KAAK,EAAL,aAAK;IACL,QAAQ,EAAR,mBAAQ;IACR,IAAI,EAAJ,WAAI;IACJ,UAAU,EAAV,uBAAU;IACV,GAAG,EAAH,SAAG;CACJ,CAAC;AAQK,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;YACvB,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AApBY,QAAA,QAAQ,YAoBpB;AAOD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;AAOM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAFY,QAAA,QAAQ,YAEpB;AAQM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA;AANY,QAAA,QAAQ,YAMpB"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { LRUCache } from 'lru-cache';
|
|
2
|
+
import { type LanguageModel } from 'ai';
|
|
3
|
+
import { type IJudgeHooks, type IStepsCache } from './types';
|
|
4
|
+
declare const _default: {
|
|
5
|
+
gevalMaxScore: number;
|
|
6
|
+
isModelCached: boolean;
|
|
7
|
+
isStepsCached: boolean;
|
|
8
|
+
modelCache: LRUCache<string, LanguageModel, unknown>;
|
|
9
|
+
stepsCache: IStepsCache;
|
|
10
|
+
restartModelCache(size?: number): void;
|
|
11
|
+
restartStepsCache(size?: number): void;
|
|
12
|
+
setStepsCache(cache: IStepsCache): void;
|
|
13
|
+
enableModelCache(): void;
|
|
14
|
+
disableModelCache(): void;
|
|
15
|
+
enableStepsCache(): void;
|
|
16
|
+
disableStepsCache(): void;
|
|
17
|
+
hooks: IJudgeHooks;
|
|
18
|
+
setHooks(hooks: IJudgeHooks): void;
|
|
19
|
+
};
|
|
20
|
+
export default _default;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const lru_cache_1 = require("lru-cache");
|
|
4
|
+
class StepsMemoryAdapter {
|
|
5
|
+
cache;
|
|
6
|
+
constructor(size) {
|
|
7
|
+
this.cache = new lru_cache_1.LRUCache({ max: size });
|
|
8
|
+
}
|
|
9
|
+
async set(key, value) {
|
|
10
|
+
this.cache.set(key, value);
|
|
11
|
+
}
|
|
12
|
+
async get(key) {
|
|
13
|
+
return this.cache.get(key);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
exports.default = {
|
|
17
|
+
gevalMaxScore: 10,
|
|
18
|
+
isModelCached: true,
|
|
19
|
+
isStepsCached: true,
|
|
20
|
+
modelCache: new lru_cache_1.LRUCache({ max: 100 }),
|
|
21
|
+
stepsCache: new StepsMemoryAdapter(500),
|
|
22
|
+
restartModelCache(size = 100) {
|
|
23
|
+
this.modelCache = new lru_cache_1.LRUCache({ max: size });
|
|
24
|
+
},
|
|
25
|
+
restartStepsCache(size = 500) {
|
|
26
|
+
this.stepsCache = new StepsMemoryAdapter(size);
|
|
27
|
+
},
|
|
28
|
+
setStepsCache(cache) {
|
|
29
|
+
this.stepsCache = cache;
|
|
30
|
+
},
|
|
31
|
+
enableModelCache() {
|
|
32
|
+
this.isModelCached = true;
|
|
33
|
+
},
|
|
34
|
+
disableModelCache() {
|
|
35
|
+
this.isModelCached = false;
|
|
36
|
+
},
|
|
37
|
+
enableStepsCache() {
|
|
38
|
+
this.isStepsCached = true;
|
|
39
|
+
},
|
|
40
|
+
disableStepsCache() {
|
|
41
|
+
this.isStepsCached = false;
|
|
42
|
+
},
|
|
43
|
+
hooks: {},
|
|
44
|
+
setHooks(hooks) {
|
|
45
|
+
this.hooks = hooks;
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/config.ts"],"names":[],"mappings":";;AAAA,yCAEmB;AAcnB,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAOD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAiB;IAKxB,QAAQ,CAAC,KAAkB;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { type TVercelOptions, type TGevalInput, type TRubricResult, type TGevalEvaluateResult } from './types';
|
|
2
|
+
export * from './config';
|
|
3
|
+
export { default } from './config';
|
|
4
|
+
export * from './types';
|
|
5
|
+
export declare const llmRubric: (output: string, rubric: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TRubricResult>;
|
|
6
|
+
export declare const gEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
|
7
|
+
export declare const bEval: (input: TGevalInput, criteria: string, providerName: string, modelName: string, options?: TVercelOptions) => Promise<TGevalEvaluateResult>;
|
package/dst/src/index.js
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
36
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
37
|
+
};
|
|
38
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
39
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
40
|
+
};
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.bEval = exports.gEval = exports.llmRubric = exports.default = void 0;
|
|
43
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
44
|
+
const Mustache = __importStar(require("mustache"));
|
|
45
|
+
const ai_1 = require("ai");
|
|
46
|
+
const config_1 = __importDefault(require("./config"));
|
|
47
|
+
const prompt_1 = require("./prompt");
|
|
48
|
+
const registry_1 = require("./registry");
|
|
49
|
+
const types_1 = require("./types");
|
|
50
|
+
__exportStar(require("./config"), exports);
|
|
51
|
+
var config_2 = require("./config");
|
|
52
|
+
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(config_2).default; } });
|
|
53
|
+
__exportStar(require("./types"), exports);
|
|
54
|
+
const getHashId = () => node_crypto_1.default.randomBytes(16).toString('hex');
|
|
55
|
+
const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
|
|
56
|
+
const start = Date.now();
|
|
57
|
+
try {
|
|
58
|
+
const userPrompt = Mustache.render(prompt_1.LLM_RUBRIC_USER_PROMPT, { output, rubric });
|
|
59
|
+
const { output: result } = await (0, ai_1.generateText)({
|
|
60
|
+
...options,
|
|
61
|
+
messages: undefined,
|
|
62
|
+
tools: undefined,
|
|
63
|
+
model: (0, registry_1.getModel)(providerName, modelName),
|
|
64
|
+
system: Mustache.render(prompt_1.LLM_RUBRIC_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
65
|
+
prompt: userPrompt,
|
|
66
|
+
output: ai_1.Output.object({
|
|
67
|
+
schema: types_1.RubricResultSchema,
|
|
68
|
+
}),
|
|
69
|
+
});
|
|
70
|
+
config_1.default.hooks.onSuccess?.({
|
|
71
|
+
method: 'llmRubric',
|
|
72
|
+
params: { output, rubric, providerName, modelName, options },
|
|
73
|
+
result,
|
|
74
|
+
duration: Date.now() - start,
|
|
75
|
+
});
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
config_1.default.hooks.onError?.({
|
|
80
|
+
method: 'llmRubric',
|
|
81
|
+
error,
|
|
82
|
+
duration: Date.now() - start,
|
|
83
|
+
});
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
exports.llmRubric = llmRubric;
|
|
88
|
+
const _gEval = async (input, criteria, providerName, modelName, maxScore, methodName, options = {}) => {
|
|
89
|
+
if (typeof input === 'string') {
|
|
90
|
+
input = { query: '', answer: input };
|
|
91
|
+
}
|
|
92
|
+
const { query, answer } = input;
|
|
93
|
+
const start = Date.now();
|
|
94
|
+
try {
|
|
95
|
+
const model = (0, registry_1.getModel)(providerName, modelName);
|
|
96
|
+
let steps = await (0, registry_1.getSteps)(criteria);
|
|
97
|
+
if (!steps) {
|
|
98
|
+
const stepsPrompt = Mustache.render(prompt_1.GEVAL_STEPS_PROMPT, { criteria });
|
|
99
|
+
const { output: stepsResult } = await (0, ai_1.generateText)({
|
|
100
|
+
...options,
|
|
101
|
+
system: undefined,
|
|
102
|
+
messages: undefined,
|
|
103
|
+
tools: undefined,
|
|
104
|
+
model,
|
|
105
|
+
prompt: stepsPrompt,
|
|
106
|
+
output: ai_1.Output.object({
|
|
107
|
+
schema: types_1.GevalStepsResultSchema,
|
|
108
|
+
}),
|
|
109
|
+
});
|
|
110
|
+
steps = stepsResult.steps;
|
|
111
|
+
(0, registry_1.setSteps)(criteria, stepsResult.steps);
|
|
112
|
+
}
|
|
113
|
+
const evaluationPrompt = Mustache.render(query ? prompt_1.GEVAL_EVALUATE_PROMPT : prompt_1.GEVAL_EVALUATE_REPLY_PROMPT, {
|
|
114
|
+
criteria,
|
|
115
|
+
steps: steps.join('\n- '),
|
|
116
|
+
input: query,
|
|
117
|
+
output: answer,
|
|
118
|
+
maxScore,
|
|
119
|
+
});
|
|
120
|
+
const { output: evalResult } = await (0, ai_1.generateText)({
|
|
121
|
+
...options,
|
|
122
|
+
messages: undefined,
|
|
123
|
+
tools: undefined,
|
|
124
|
+
model,
|
|
125
|
+
system: Mustache.render(prompt_1.GEVAL_SYSTEM_PROMPT, { hash_id: getHashId() }),
|
|
126
|
+
prompt: evaluationPrompt,
|
|
127
|
+
output: ai_1.Output.object({
|
|
128
|
+
schema: types_1.GevalEvaluateResultSchema,
|
|
129
|
+
}),
|
|
130
|
+
...options,
|
|
131
|
+
});
|
|
132
|
+
const result = {
|
|
133
|
+
reason: evalResult.reason,
|
|
134
|
+
score: evalResult.score / maxScore,
|
|
135
|
+
};
|
|
136
|
+
config_1.default.hooks.onSuccess?.({
|
|
137
|
+
method: methodName,
|
|
138
|
+
params: { query, answer, criteria, providerName, modelName, options },
|
|
139
|
+
result,
|
|
140
|
+
duration: Date.now() - start,
|
|
141
|
+
});
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
config_1.default.hooks.onError?.({
|
|
146
|
+
method: methodName,
|
|
147
|
+
error,
|
|
148
|
+
duration: Date.now() - start,
|
|
149
|
+
});
|
|
150
|
+
throw error;
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
const gEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, config_1.default.gevalMaxScore, 'gEval', options);
|
|
154
|
+
exports.gEval = gEval;
|
|
155
|
+
const bEval = async (input, criteria, providerName, modelName, options = {}) => _gEval(input, criteria, providerName, modelName, 1, 'bEval', options);
|
|
156
|
+
exports.bEval = bEval;
|
|
157
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,8DAAiC;AACjC,mDAAqC;AACrC,2BAGY;AAEZ,sDAA4B;AAC5B,qCAOkB;AAClB,yCAIoB;AACpB,mCASiB;AAEjB,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;AAChB,0CAAwB;AAExB,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,qBAAM,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAYxD,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACJ,EAAE;IAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,iCAAwB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YAC3E,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;SACJ,CAAC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAzCY,QAAA,SAAS,aAyCrB;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAwB,EACxB,UAA0B,EAAE,EACG,EAAE;IACjC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,KAAK,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAEhC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,GAAG,OAAO;gBACV,MAAM,EAAE,SAAS;gBACjB,QAAQ,EAAE,SAAS;gBACnB,KAAK,EAAE,SAAS;gBAChB,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;aACH,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CACtC,KAAK,CAAC,CAAC,CAAC,8BAAqB,CAAC,CAAC,CAAC,oCAA2B,EAC3D;YACE,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEL,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,GAAG,OAAO;YACV,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,SAAS;YAChB,KAAK;YACL,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,4BAAmB,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC;YACtE,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACrE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAYM,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,gBAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB;AAYK,MAAM,KAAK,GAAG,KAAK,EACxB,KAAkB,EAClB,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAA0B,EAAE,EACG,EAAE,CAAC,MAAM,CACxC,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC;AAdW,QAAA,KAAK,SAchB"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/*!
|
|
2
|
+
* Portions of this code are based on Promptfoo (MIT License)
|
|
3
|
+
* Copyright (c) 2025 Promptfoo
|
|
4
|
+
*/
|
|
5
|
+
export declare const LLM_RUBRIC_SYSTEM_PROMPT = "\nInstruction #{{hash_id}}.\n\nYou are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. And score 1.0 indicates full compliance with the rubric, but 0.0 indicates no compliance at all. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}\n\nExamples:\n\n<Output>Hello world</Output>\n<Rubric>Content contains a greeting</Rubric>\n{\"reason\": \"the content contains the word 'Hello'\", \"pass\": true, \"score\": 1.0}\n\n<Output>Avast ye swabs, repel the invaders!</Output>\n<Rubric>Does not speak like a pirate</Rubric>\n{\"reason\": \"'avast ye' is a common pirate term\", \"pass\": false, \"score\": 0.0}\n";
|
|
6
|
+
export declare const LLM_RUBRIC_USER_PROMPT = "<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>";
|
|
7
|
+
export declare const GEVAL_SYSTEM_PROMPT = "\nInstruction #{{hash_id}}.\n\nYou are an impartial Judge. Your role is to perform an independent audit according to provided criteria.\n";
|
|
8
|
+
export declare const GEVAL_STEPS_PROMPT = "\nGiven an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.\n\n**EVALUATION CRITERIA**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**OUTPUT FORMAT**\nIMPORTANT:\n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain a single key, \"steps\", whose value is a list of strings.\n- Each string must represent one evaluation step.\n- Do NOT include any explanations, commentary, extra text, or additional formatting.\n\nFormat:\n{\"steps\": <list_of_strings>}\n\nExample:\n{\"steps\":[\"<Evaluation Step 1>\",\"<Evaluation Step 2>\",\"<Evaluation Step 3>\",\"<Evaluation Step 4>\"]}\n\nHere are the 3-4 concise evaluation steps, formatted as required in a minified JSON:\nJSON:\n";
|
|
9
|
+
export declare const GEVAL_EVALUATE_REPLY_PROMPT = "\nYou will be given one Reply below. Your task is to rate the Reply on one metric.\nPlease make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.\n\n**Evaluation Criteria**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**Evaluation Steps**\n- {{steps}}\nGiven the evaluation steps, return a JSON with two keys: \n 1) a \"score\" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;\n 2) a \"reason\" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Reply in your reason, but be very concise with it!\n\n**Reply**\n<Reply>\n{{output}}\n</Reply>\n\n**OUTPUT FORMAT**\nIMPORTANT: \n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain exactly two keys: \"score\" and \"reason\".\n- No additional words, explanations, or formatting are needed.\n- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.\n\nExample JSON:\n{\"score\":0,\"reason\":\"The text of Reply does not follow the evaluation criteria provided.\"}\n\nHere is the final evaluation in the required minified JSON format:\nJSON:\n";
|
|
10
|
+
export declare const GEVAL_EVALUATE_PROMPT = "\nYou will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.\nPlease make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.\n\n**Evaluation Criteria**\n<Criteria>\n{{criteria}}\n</Criteria>\n\n**Evaluation Steps**\n- {{steps}}\nGiven the evaluation steps, return a JSON with two keys: \n 1) a \"score\" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;\n 2) a \"reason\" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Prompt and Reply in your reason, but be very concise with it!\n\n**Prompt**\n<Prompt>\n{{input}}\n</Prompt>\n\n**Reply**\n<Reply>\n{{output}}\n</Reply>\n\n**OUTPUT FORMAT**\nIMPORTANT: \n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain exactly two keys: \"score\" and \"reason\".\n- No additional words, explanations, or formatting are needed.\n- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.\n\nExample JSON:\n{\"score\":0,\"reason\":\"The text of Reply does not follow the evaluation criteria provided.\"}\n\nHere is the final evaluation in the required minified JSON format:\nJSON:\n";
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*!
|
|
3
|
+
* Portions of this code are based on Promptfoo (MIT License)
|
|
4
|
+
* Copyright (c) 2025 Promptfoo
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.GEVAL_EVALUATE_PROMPT = exports.GEVAL_EVALUATE_REPLY_PROMPT = exports.GEVAL_STEPS_PROMPT = exports.GEVAL_SYSTEM_PROMPT = exports.LLM_RUBRIC_USER_PROMPT = exports.LLM_RUBRIC_SYSTEM_PROMPT = void 0;
|
|
8
|
+
exports.LLM_RUBRIC_SYSTEM_PROMPT = `
|
|
9
|
+
Instruction #{{hash_id}}.
|
|
10
|
+
|
|
11
|
+
You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. And score 1.0 indicates full compliance with the rubric, but 0.0 indicates no compliance at all. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
|
|
12
|
+
|
|
13
|
+
Examples:
|
|
14
|
+
|
|
15
|
+
<Output>Hello world</Output>
|
|
16
|
+
<Rubric>Content contains a greeting</Rubric>
|
|
17
|
+
{"reason": "the content contains the word 'Hello'", "pass": true, "score": 1.0}
|
|
18
|
+
|
|
19
|
+
<Output>Avast ye swabs, repel the invaders!</Output>
|
|
20
|
+
<Rubric>Does not speak like a pirate</Rubric>
|
|
21
|
+
{"reason": "'avast ye' is a common pirate term", "pass": false, "score": 0.0}
|
|
22
|
+
`;
|
|
23
|
+
exports.LLM_RUBRIC_USER_PROMPT = '<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>';
|
|
24
|
+
exports.GEVAL_SYSTEM_PROMPT = `
|
|
25
|
+
Instruction #{{hash_id}}.
|
|
26
|
+
|
|
27
|
+
You are an impartial Judge. Your role is to perform an independent audit according to provided criteria.
|
|
28
|
+
`;
|
|
29
|
+
exports.GEVAL_STEPS_PROMPT = `
|
|
30
|
+
Given an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.
|
|
31
|
+
|
|
32
|
+
**EVALUATION CRITERIA**
|
|
33
|
+
<Criteria>
|
|
34
|
+
{{criteria}}
|
|
35
|
+
</Criteria>
|
|
36
|
+
|
|
37
|
+
**OUTPUT FORMAT**
|
|
38
|
+
IMPORTANT:
|
|
39
|
+
- Return output ONLY as a minified JSON object (no code fences).
|
|
40
|
+
- The JSON object must contain a single key, "steps", whose value is a list of strings.
|
|
41
|
+
- Each string must represent one evaluation step.
|
|
42
|
+
- Do NOT include any explanations, commentary, extra text, or additional formatting.
|
|
43
|
+
|
|
44
|
+
Format:
|
|
45
|
+
{"steps": <list_of_strings>}
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
{"steps":["<Evaluation Step 1>","<Evaluation Step 2>","<Evaluation Step 3>","<Evaluation Step 4>"]}
|
|
49
|
+
|
|
50
|
+
Here are the 3-4 concise evaluation steps, formatted as required in a minified JSON:
|
|
51
|
+
JSON:
|
|
52
|
+
`;
|
|
53
|
+
exports.GEVAL_EVALUATE_REPLY_PROMPT = `
|
|
54
|
+
You will be given one Reply below. Your task is to rate the Reply on one metric.
|
|
55
|
+
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
56
|
+
|
|
57
|
+
**Evaluation Criteria**
|
|
58
|
+
<Criteria>
|
|
59
|
+
{{criteria}}
|
|
60
|
+
</Criteria>
|
|
61
|
+
|
|
62
|
+
**Evaluation Steps**
|
|
63
|
+
- {{steps}}
|
|
64
|
+
Given the evaluation steps, return a JSON with two keys:
|
|
65
|
+
1) a "score" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;
|
|
66
|
+
2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Reply in your reason, but be very concise with it!
|
|
67
|
+
|
|
68
|
+
**Reply**
|
|
69
|
+
<Reply>
|
|
70
|
+
{{output}}
|
|
71
|
+
</Reply>
|
|
72
|
+
|
|
73
|
+
**OUTPUT FORMAT**
|
|
74
|
+
IMPORTANT:
|
|
75
|
+
- Return output ONLY as a minified JSON object (no code fences).
|
|
76
|
+
- The JSON object must contain exactly two keys: "score" and "reason".
|
|
77
|
+
- No additional words, explanations, or formatting are needed.
|
|
78
|
+
- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.
|
|
79
|
+
|
|
80
|
+
Example JSON:
|
|
81
|
+
{"score":0,"reason":"The text of Reply does not follow the evaluation criteria provided."}
|
|
82
|
+
|
|
83
|
+
Here is the final evaluation in the required minified JSON format:
|
|
84
|
+
JSON:
|
|
85
|
+
`;
|
|
86
|
+
exports.GEVAL_EVALUATE_PROMPT = `
|
|
87
|
+
You will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.
|
|
88
|
+
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
89
|
+
|
|
90
|
+
**Evaluation Criteria**
|
|
91
|
+
<Criteria>
|
|
92
|
+
{{criteria}}
|
|
93
|
+
</Criteria>
|
|
94
|
+
|
|
95
|
+
**Evaluation Steps**
|
|
96
|
+
- {{steps}}
|
|
97
|
+
Given the evaluation steps, return a JSON with two keys:
|
|
98
|
+
1) a "score" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;
|
|
99
|
+
2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Prompt and Reply in your reason, but be very concise with it!
|
|
100
|
+
|
|
101
|
+
**Prompt**
|
|
102
|
+
<Prompt>
|
|
103
|
+
{{input}}
|
|
104
|
+
</Prompt>
|
|
105
|
+
|
|
106
|
+
**Reply**
|
|
107
|
+
<Reply>
|
|
108
|
+
{{output}}
|
|
109
|
+
</Reply>
|
|
110
|
+
|
|
111
|
+
**OUTPUT FORMAT**
|
|
112
|
+
IMPORTANT:
|
|
113
|
+
- Return output ONLY as a minified JSON object (no code fences).
|
|
114
|
+
- The JSON object must contain exactly two keys: "score" and "reason".
|
|
115
|
+
- No additional words, explanations, or formatting are needed.
|
|
116
|
+
- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.
|
|
117
|
+
|
|
118
|
+
Example JSON:
|
|
119
|
+
{"score":0,"reason":"The text of Reply does not follow the evaluation criteria provided."}
|
|
120
|
+
|
|
121
|
+
Here is the final evaluation in the required minified JSON format:
|
|
122
|
+
JSON:
|
|
123
|
+
`;
|
|
124
|
+
//# sourceMappingURL=prompt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/prompt.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKU,QAAA,wBAAwB,GAAG;;;;;;;;;;;;;;CAcvC,CAAC;AAKW,QAAA,sBAAsB,GAAG,kEAAkE,CAAC;AAE5F,QAAA,mBAAmB,GAAG;;;;CAIlC,CAAC;AAKW,QAAA,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;CAuBjC,CAAC;AAKW,QAAA,2BAA2B,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgC1C,CAAC;AAKW,QAAA,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqCpC,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { type LanguageModel } from 'ai';
|
|
2
|
+
export declare const getModel: (providerName: string, modelName: string) => LanguageModel;
|
|
3
|
+
export declare const getSteps: (criteria: string) => Promise<string[] | undefined>;
|
|
4
|
+
export declare const setSteps: (criteria: string, steps: string[]) => Promise<void>;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.setSteps = exports.getSteps = exports.getModel = void 0;
|
|
40
|
+
const crypto = __importStar(require("node:crypto"));
|
|
41
|
+
const openai_1 = require("@ai-sdk/openai");
|
|
42
|
+
const anthropic_1 = require("@ai-sdk/anthropic");
|
|
43
|
+
const google_1 = require("@ai-sdk/google");
|
|
44
|
+
const mistral_1 = require("@ai-sdk/mistral");
|
|
45
|
+
const amazon_bedrock_1 = require("@ai-sdk/amazon-bedrock");
|
|
46
|
+
const azure_1 = require("@ai-sdk/azure");
|
|
47
|
+
const deepseek_1 = require("@ai-sdk/deepseek");
|
|
48
|
+
const groq_1 = require("@ai-sdk/groq");
|
|
49
|
+
const perplexity_1 = require("@ai-sdk/perplexity");
|
|
50
|
+
const xai_1 = require("@ai-sdk/xai");
|
|
51
|
+
const config_1 = __importDefault(require("./config"));
|
|
52
|
+
const PROVIDERS = {
|
|
53
|
+
openai: openai_1.openai,
|
|
54
|
+
anthropic: anthropic_1.anthropic,
|
|
55
|
+
google: google_1.google,
|
|
56
|
+
mistral: mistral_1.mistral,
|
|
57
|
+
bedrock: amazon_bedrock_1.bedrock,
|
|
58
|
+
azure: azure_1.azure,
|
|
59
|
+
deepseek: deepseek_1.deepseek,
|
|
60
|
+
groq: groq_1.groq,
|
|
61
|
+
perplexity: perplexity_1.perplexity,
|
|
62
|
+
xai: xai_1.xai,
|
|
63
|
+
};
|
|
64
|
+
const getModel = (providerName, modelName) => {
|
|
65
|
+
const cacheKey = `${providerName}:${modelName}`;
|
|
66
|
+
let model = config_1.default.isModelCached ? config_1.default.modelCache.get(cacheKey) : undefined;
|
|
67
|
+
if (!model) {
|
|
68
|
+
const provider = PROVIDERS[providerName];
|
|
69
|
+
if (!provider) {
|
|
70
|
+
throw new Error(`Unknown provider: "${providerName}". Available providers: ${Object.keys(PROVIDERS).join(', ')}`);
|
|
71
|
+
}
|
|
72
|
+
model = provider(modelName);
|
|
73
|
+
if (config_1.default.isModelCached) {
|
|
74
|
+
config_1.default.modelCache.set(cacheKey, model);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return model;
|
|
78
|
+
};
|
|
79
|
+
exports.getModel = getModel;
|
|
80
|
+
const md5 = (str) => {
|
|
81
|
+
return crypto.createHash('md5').update(str).digest('hex');
|
|
82
|
+
};
|
|
83
|
+
const getSteps = (criteria) => {
|
|
84
|
+
return config_1.default.isStepsCached ? config_1.default.stepsCache.get(md5(criteria)) : Promise.resolve(undefined);
|
|
85
|
+
};
|
|
86
|
+
exports.getSteps = getSteps;
|
|
87
|
+
const setSteps = (criteria, steps) => {
|
|
88
|
+
if (config_1.default.isStepsCached) {
|
|
89
|
+
return config_1.default.stepsCache.set(md5(criteria), steps);
|
|
90
|
+
}
|
|
91
|
+
return Promise.resolve();
|
|
92
|
+
};
|
|
93
|
+
exports.setSteps = setSteps;
|
|
94
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;AAItC,2CAEwB;AACxB,iDAE2B;AAC3B,2CAEwB;AACxB,6CAEyB;AACzB,2DAEgC;AAChC,yCAEuB;AACvB,+CAE0B;AAC1B,uCAEsB;AACtB,mDAE4B;AAC5B,qCAEqB;AAErB,sDAA4B;AAM5B,MAAM,SAAS,GAA6B;IAC1C,MAAM,EAAN,eAAM;IACN,SAAS,EAAT,qBAAS;IACT,MAAM,EAAN,eAAM;IACN,OAAO,EAAP,iBAAO;IACP,OAAO,EAAP,wBAAO;IACP,KAAK,EAAL,aAAK;IACL,QAAQ,EAAR,mBAAQ;IACR,IAAI,EAAJ,WAAI;IACJ,UAAU,EAAV,uBAAU;IACV,GAAG,EAAH,SAAG;CACJ,CAAC;AAQK,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;YACvB,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AApBY,QAAA,QAAQ,YAoBpB;AAOD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;AAOM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAFY,QAAA,QAAQ,YAEpB;AAQM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA;AANY,QAAA,QAAQ,YAMpB"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export type TJudgeMethod = 'bEval' | 'gEval' | 'llmRubric';
|
|
3
|
+
export type TGevalInput = string | {
|
|
4
|
+
query: string;
|
|
5
|
+
answer: string;
|
|
6
|
+
};
|
|
7
|
+
export interface IStepsCache {
|
|
8
|
+
set(key: string, value: string[]): Promise<void>;
|
|
9
|
+
get(key: string): Promise<string[] | undefined>;
|
|
10
|
+
}
|
|
11
|
+
export type TVercelOptions = Record<string, any>;
|
|
12
|
+
export interface IJudgeHooks {
|
|
13
|
+
onSuccess?: (data: {
|
|
14
|
+
method: TJudgeMethod;
|
|
15
|
+
params: any;
|
|
16
|
+
result: any;
|
|
17
|
+
duration: number;
|
|
18
|
+
}) => void;
|
|
19
|
+
onError?: (data: {
|
|
20
|
+
method: TJudgeMethod;
|
|
21
|
+
error: any;
|
|
22
|
+
duration: number;
|
|
23
|
+
}) => void;
|
|
24
|
+
}
|
|
25
|
+
export declare const RubricResultSchema: z.ZodObject<{
|
|
26
|
+
reason: z.ZodString;
|
|
27
|
+
pass: z.ZodBoolean;
|
|
28
|
+
score: z.ZodNumber;
|
|
29
|
+
}, z.core.$strip>;
|
|
30
|
+
export type TRubricResult = z.infer<typeof RubricResultSchema>;
|
|
31
|
+
export declare const GevalStepsResultSchema: z.ZodObject<{
|
|
32
|
+
steps: z.ZodArray<z.ZodString>;
|
|
33
|
+
}, z.core.$strip>;
|
|
34
|
+
export type TGevalStepsResult = z.infer<typeof GevalStepsResultSchema>;
|
|
35
|
+
export declare const GevalEvaluateResultSchema: z.ZodObject<{
|
|
36
|
+
reason: z.ZodString;
|
|
37
|
+
score: z.ZodNumber;
|
|
38
|
+
}, z.core.$strip>;
|
|
39
|
+
export type TGevalEvaluateResult = z.infer<typeof GevalEvaluateResultSchema>;
|
package/dst/src/types.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GevalEvaluateResultSchema = exports.GevalStepsResultSchema = exports.RubricResultSchema = void 0;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
exports.RubricResultSchema = zod_1.z.object({
|
|
6
|
+
reason: zod_1.z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
7
|
+
pass: zod_1.z.boolean().describe('Whether the output satisfies the minimum requirements'),
|
|
8
|
+
score: zod_1.z.number().min(0).max(1).describe('Numeric representation of quality'),
|
|
9
|
+
});
|
|
10
|
+
exports.GevalStepsResultSchema = zod_1.z.object({
|
|
11
|
+
steps: zod_1.z.array(zod_1.z.string()).describe('List of concise evaluation steps derived from the criteria'),
|
|
12
|
+
});
|
|
13
|
+
exports.GevalEvaluateResultSchema = zod_1.z.object({
|
|
14
|
+
reason: zod_1.z.string().describe('Detailed explanation of the score based on the rubric'),
|
|
15
|
+
score: zod_1.z.number().min(0).describe('Numeric representation of quality'),
|
|
16
|
+
});
|
|
17
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AA+DX,QAAA,kBAAkB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,OAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUU,QAAA,sBAAsB,GAAG,OAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAUU,QAAA,yBAAyB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC"}
|
package/dst/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AA+DX,QAAA,kBAAkB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,OAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUU,QAAA,sBAAsB,GAAG,OAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAUU,QAAA,yBAAyB,GAAG,OAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eva-llm/eva-judge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.7",
|
|
4
4
|
"description": "LLM-as-a-Judge abstraction layer using ai-sdk and plugins",
|
|
5
5
|
"main": "dst/index.js",
|
|
6
6
|
"types": "dst/index.d.ts",
|
|
@@ -21,33 +21,39 @@
|
|
|
21
21
|
},
|
|
22
22
|
"homepage": "https://eva-llm.github.io/eva-judge",
|
|
23
23
|
"devDependencies": {
|
|
24
|
+
"@eslint/js": "^10.0.1",
|
|
24
25
|
"@types/jest": "^30.0.0",
|
|
25
26
|
"@types/mustache": "^4.2.6",
|
|
26
|
-
"@types/node": "^
|
|
27
|
-
"
|
|
28
|
-
"
|
|
27
|
+
"@types/node": "^26.0.1",
|
|
28
|
+
"eslint": "^10.5.0",
|
|
29
|
+
"husky": "^9.1.7",
|
|
30
|
+
"jest": "^30.4.2",
|
|
31
|
+
"ts-jest": "^29.4.11",
|
|
29
32
|
"ts-node": "^10.9.2",
|
|
30
|
-
"typescript": "^
|
|
33
|
+
"typescript": "^6.0.3",
|
|
34
|
+
"typescript-eslint": "^8.62.0"
|
|
31
35
|
},
|
|
32
36
|
"dependencies": {
|
|
33
|
-
"@ai-sdk/amazon-bedrock": "^
|
|
34
|
-
"@ai-sdk/anthropic": "^
|
|
35
|
-
"@ai-sdk/azure": "^
|
|
36
|
-
"@ai-sdk/deepseek": "^
|
|
37
|
-
"@ai-sdk/google": "^
|
|
38
|
-
"@ai-sdk/groq": "^
|
|
39
|
-
"@ai-sdk/mistral": "^
|
|
40
|
-
"@ai-sdk/openai": "^
|
|
41
|
-
"@ai-sdk/perplexity": "^
|
|
42
|
-
"@ai-sdk/xai": "^
|
|
43
|
-
"ai": "^
|
|
44
|
-
"lru-cache": "^11.
|
|
37
|
+
"@ai-sdk/amazon-bedrock": "^5.0.0",
|
|
38
|
+
"@ai-sdk/anthropic": "^4.0.0",
|
|
39
|
+
"@ai-sdk/azure": "^4.0.0",
|
|
40
|
+
"@ai-sdk/deepseek": "^3.0.0",
|
|
41
|
+
"@ai-sdk/google": "^4.0.0",
|
|
42
|
+
"@ai-sdk/groq": "^4.0.0",
|
|
43
|
+
"@ai-sdk/mistral": "^4.0.0",
|
|
44
|
+
"@ai-sdk/openai": "^4.0.0",
|
|
45
|
+
"@ai-sdk/perplexity": "^4.0.0",
|
|
46
|
+
"@ai-sdk/xai": "^4.0.0",
|
|
47
|
+
"ai": "^7.0.0",
|
|
48
|
+
"lru-cache": "^11.5.1",
|
|
45
49
|
"mustache": "^4.2.0",
|
|
46
|
-
"zod": "^4.3
|
|
50
|
+
"zod": "^4.4.3"
|
|
47
51
|
},
|
|
48
52
|
"scripts": {
|
|
49
53
|
"build": "tsc",
|
|
50
54
|
"example": "ts-node scripts/example.ts",
|
|
55
|
+
"lint": "eslint src",
|
|
56
|
+
"lint:fix": "eslint src --fix",
|
|
51
57
|
"test": "jest",
|
|
52
58
|
"test:coverage": "jest --coverage"
|
|
53
59
|
}
|