judgeval 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +202 -0
- package/README.md +340 -0
- package/dist/clients.d.ts +7 -0
- package/dist/clients.js +78 -0
- package/dist/clients.js.map +1 -0
- package/dist/common/integrations/langgraph.d.ts +40 -0
- package/dist/common/integrations/langgraph.js +444 -0
- package/dist/common/integrations/langgraph.js.map +1 -0
- package/dist/common/logger-instance.d.ts +3 -0
- package/dist/common/logger-instance.js +64 -0
- package/dist/common/logger-instance.js.map +1 -0
- package/dist/common/logger.d.ts +54 -0
- package/dist/common/logger.js +221 -0
- package/dist/common/logger.js.map +1 -0
- package/dist/common/tracer.d.ts +205 -0
- package/dist/common/tracer.js +1035 -0
- package/dist/common/tracer.js.map +1 -0
- package/dist/constants.d.ts +51 -0
- package/dist/constants.js +344 -0
- package/dist/constants.js.map +1 -0
- package/dist/data/example.d.ts +70 -0
- package/dist/data/example.js +125 -0
- package/dist/data/example.js.map +1 -0
- package/dist/data/result.d.ts +51 -0
- package/dist/data/result.js +83 -0
- package/dist/data/result.js.map +1 -0
- package/dist/evaluation-run.d.ts +44 -0
- package/dist/evaluation-run.js +136 -0
- package/dist/evaluation-run.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/judgment-client.d.ts +179 -0
- package/dist/judgment-client.js +1038 -0
- package/dist/judgment-client.js.map +1 -0
- package/dist/rules.d.ts +120 -0
- package/dist/rules.js +322 -0
- package/dist/rules.js.map +1 -0
- package/dist/run-evaluation.d.ts +78 -0
- package/dist/run-evaluation.js +618 -0
- package/dist/run-evaluation.js.map +1 -0
- package/dist/scorers/api-scorer.d.ts +79 -0
- package/dist/scorers/api-scorer.js +291 -0
- package/dist/scorers/api-scorer.js.map +1 -0
- package/dist/scorers/base-scorer.d.ts +100 -0
- package/dist/scorers/base-scorer.js +190 -0
- package/dist/scorers/base-scorer.js.map +1 -0
- package/dist/scorers/exact-match-scorer.d.ts +10 -0
- package/dist/scorers/exact-match-scorer.js +84 -0
- package/dist/scorers/exact-match-scorer.js.map +1 -0
- package/package.json +88 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { APIJudgmentScorer } from './base-scorer';
|
|
2
|
+
import { Example } from '../data/example';
|
|
3
|
+
import { ScorerData } from '../data/result';
|
|
4
|
+
/**
|
|
5
|
+
* Implementation of API-based scorers
|
|
6
|
+
*/
|
|
7
|
+
export declare class AnswerCorrectnessScorer extends APIJudgmentScorer {
|
|
8
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
9
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
10
|
+
}
|
|
11
|
+
export declare class AnswerRelevancyScorer extends APIJudgmentScorer {
|
|
12
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
13
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
14
|
+
}
|
|
15
|
+
export declare class ComparisonScorer extends APIJudgmentScorer {
|
|
16
|
+
criteria: string[];
|
|
17
|
+
description: string;
|
|
18
|
+
constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
19
|
+
toJSON(): Record<string, any>;
|
|
20
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
21
|
+
}
|
|
22
|
+
export declare class ContextualPrecisionScorer extends APIJudgmentScorer {
|
|
23
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
24
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
25
|
+
}
|
|
26
|
+
export declare class ContextualRecallScorer extends APIJudgmentScorer {
|
|
27
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
28
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
29
|
+
}
|
|
30
|
+
export declare class ContextualRelevancyScorer extends APIJudgmentScorer {
|
|
31
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
32
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
33
|
+
}
|
|
34
|
+
export declare class ExecutionOrderScorer extends APIJudgmentScorer {
|
|
35
|
+
strictMode: boolean;
|
|
36
|
+
expectedTools?: string[];
|
|
37
|
+
constructor(threshold?: number, strictMode?: boolean, expectedTools?: string[], additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
38
|
+
toJSON(): Record<string, any>;
|
|
39
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
40
|
+
}
|
|
41
|
+
export declare class FaithfulnessScorer extends APIJudgmentScorer {
|
|
42
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
43
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
44
|
+
}
|
|
45
|
+
export declare class GroundednessScorer extends APIJudgmentScorer {
|
|
46
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
47
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
48
|
+
}
|
|
49
|
+
export declare class HallucinationScorer extends APIJudgmentScorer {
|
|
50
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
51
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
52
|
+
}
|
|
53
|
+
export declare class InstructionAdherenceScorer extends APIJudgmentScorer {
|
|
54
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
55
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
56
|
+
}
|
|
57
|
+
export declare class JsonCorrectnessScorer extends APIJudgmentScorer {
|
|
58
|
+
jsonSchema?: Record<string, any>;
|
|
59
|
+
constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
60
|
+
toJSON(): Record<string, any>;
|
|
61
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
62
|
+
}
|
|
63
|
+
export declare class SummarizationScorer extends APIJudgmentScorer {
|
|
64
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
65
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
66
|
+
}
|
|
67
|
+
export declare class Text2SQLScorer extends APIJudgmentScorer {
|
|
68
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
69
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
70
|
+
}
|
|
71
|
+
export declare class ScorerWrapper {
|
|
72
|
+
private scorer;
|
|
73
|
+
constructor(scorer: APIJudgmentScorer);
|
|
74
|
+
get scoreType(): string;
|
|
75
|
+
get threshold(): number;
|
|
76
|
+
get additional_metadata(): Record<string, any> | undefined;
|
|
77
|
+
toJSON(): Record<string, any>;
|
|
78
|
+
static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean): APIJudgmentScorer;
|
|
79
|
+
}
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.ScorerWrapper = exports.Text2SQLScorer = exports.SummarizationScorer = exports.JsonCorrectnessScorer = exports.InstructionAdherenceScorer = exports.HallucinationScorer = exports.GroundednessScorer = exports.FaithfulnessScorer = exports.ExecutionOrderScorer = exports.ContextualRelevancyScorer = exports.ContextualRecallScorer = exports.ContextualPrecisionScorer = exports.ComparisonScorer = exports.AnswerRelevancyScorer = exports.AnswerCorrectnessScorer = void 0;
|
|
13
|
+
const base_scorer_1 = require("./base-scorer");
|
|
14
|
+
/**
|
|
15
|
+
* Implementation of API-based scorers
|
|
16
|
+
*/
|
|
17
|
+
class AnswerCorrectnessScorer extends base_scorer_1.APIJudgmentScorer {
|
|
18
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
19
|
+
super('answer_correctness', threshold, additional_metadata, verbose);
|
|
20
|
+
this.validateThreshold();
|
|
21
|
+
}
|
|
22
|
+
a_score_example(example) {
|
|
23
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
24
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
exports.AnswerCorrectnessScorer = AnswerCorrectnessScorer;
|
|
29
|
+
class AnswerRelevancyScorer extends base_scorer_1.APIJudgmentScorer {
|
|
30
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
31
|
+
super('answer_relevancy', threshold, additional_metadata, verbose);
|
|
32
|
+
this.validateThreshold();
|
|
33
|
+
}
|
|
34
|
+
a_score_example(example) {
|
|
35
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
36
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
exports.AnswerRelevancyScorer = AnswerRelevancyScorer;
|
|
41
|
+
class ComparisonScorer extends base_scorer_1.APIJudgmentScorer {
|
|
42
|
+
constructor(threshold = 0.5, criteria = ['Accuracy', 'Helpfulness', 'Relevance'], description = 'Compare the outputs based on the given criteria', additional_metadata, verbose = false) {
|
|
43
|
+
super('comparison', threshold, additional_metadata, verbose);
|
|
44
|
+
this.criteria = criteria;
|
|
45
|
+
this.description = description;
|
|
46
|
+
// Comparison is an unbounded scorer, only validate that threshold >= 0
|
|
47
|
+
if (threshold < 0) {
|
|
48
|
+
throw new Error(`Threshold for comparison must be greater than or equal to 0, got: ${threshold}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
toJSON() {
|
|
52
|
+
return {
|
|
53
|
+
score_type: 'comparison',
|
|
54
|
+
threshold: this.threshold,
|
|
55
|
+
criteria: this.criteria,
|
|
56
|
+
description: this.description,
|
|
57
|
+
additional_metadata: this.additional_metadata,
|
|
58
|
+
verbose: this.verbose
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
a_score_example(example) {
|
|
62
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
63
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
exports.ComparisonScorer = ComparisonScorer;
|
|
68
|
+
class ContextualPrecisionScorer extends base_scorer_1.APIJudgmentScorer {
|
|
69
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
70
|
+
super('contextual_precision', threshold, additional_metadata, verbose);
|
|
71
|
+
this.validateThreshold();
|
|
72
|
+
}
|
|
73
|
+
a_score_example(example) {
|
|
74
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
75
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
exports.ContextualPrecisionScorer = ContextualPrecisionScorer;
|
|
80
|
+
class ContextualRecallScorer extends base_scorer_1.APIJudgmentScorer {
|
|
81
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
82
|
+
super('contextual_recall', threshold, additional_metadata, verbose);
|
|
83
|
+
this.validateThreshold();
|
|
84
|
+
}
|
|
85
|
+
a_score_example(example) {
|
|
86
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
87
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
exports.ContextualRecallScorer = ContextualRecallScorer;
|
|
92
|
+
class ContextualRelevancyScorer extends base_scorer_1.APIJudgmentScorer {
|
|
93
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
94
|
+
super('contextual_relevancy', threshold, additional_metadata, verbose);
|
|
95
|
+
this.validateThreshold();
|
|
96
|
+
}
|
|
97
|
+
a_score_example(example) {
|
|
98
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
99
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.ContextualRelevancyScorer = ContextualRelevancyScorer;
|
|
104
|
+
class ExecutionOrderScorer extends base_scorer_1.APIJudgmentScorer {
|
|
105
|
+
constructor(threshold = 1.0, strictMode = true, expectedTools, additional_metadata, verbose = false) {
|
|
106
|
+
super('execution_order', threshold, additional_metadata, verbose);
|
|
107
|
+
this.strictMode = strictMode;
|
|
108
|
+
this.expectedTools = expectedTools;
|
|
109
|
+
this.validateThreshold();
|
|
110
|
+
}
|
|
111
|
+
toJSON() {
|
|
112
|
+
return {
|
|
113
|
+
score_type: 'execution_order',
|
|
114
|
+
threshold: this.threshold,
|
|
115
|
+
strict_mode: this.strictMode,
|
|
116
|
+
expected_tools: this.expectedTools,
|
|
117
|
+
additional_metadata: this.additional_metadata,
|
|
118
|
+
verbose: this.verbose
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
a_score_example(example) {
|
|
122
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
123
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
exports.ExecutionOrderScorer = ExecutionOrderScorer;
|
|
128
|
+
class FaithfulnessScorer extends base_scorer_1.APIJudgmentScorer {
|
|
129
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
130
|
+
super('faithfulness', threshold, additional_metadata, verbose);
|
|
131
|
+
this.validateThreshold();
|
|
132
|
+
}
|
|
133
|
+
a_score_example(example) {
|
|
134
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
135
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
exports.FaithfulnessScorer = FaithfulnessScorer;
|
|
140
|
+
class GroundednessScorer extends base_scorer_1.APIJudgmentScorer {
|
|
141
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
142
|
+
super('groundedness', threshold, additional_metadata, verbose);
|
|
143
|
+
this.validateThreshold();
|
|
144
|
+
}
|
|
145
|
+
a_score_example(example) {
|
|
146
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
147
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
exports.GroundednessScorer = GroundednessScorer;
|
|
152
|
+
class HallucinationScorer extends base_scorer_1.APIJudgmentScorer {
|
|
153
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
154
|
+
super('hallucination', threshold, additional_metadata, verbose);
|
|
155
|
+
this.validateThreshold();
|
|
156
|
+
}
|
|
157
|
+
a_score_example(example) {
|
|
158
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
159
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
exports.HallucinationScorer = HallucinationScorer;
|
|
164
|
+
class InstructionAdherenceScorer extends base_scorer_1.APIJudgmentScorer {
|
|
165
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
166
|
+
super('instruction_adherence', threshold, additional_metadata, verbose);
|
|
167
|
+
this.validateThreshold();
|
|
168
|
+
}
|
|
169
|
+
a_score_example(example) {
|
|
170
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
171
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
exports.InstructionAdherenceScorer = InstructionAdherenceScorer;
|
|
176
|
+
class JsonCorrectnessScorer extends base_scorer_1.APIJudgmentScorer {
|
|
177
|
+
constructor(threshold = 0.7, jsonSchema, additional_metadata, verbose = false) {
|
|
178
|
+
super('json_correctness', threshold, additional_metadata, verbose);
|
|
179
|
+
this.jsonSchema = jsonSchema;
|
|
180
|
+
this.validateThreshold();
|
|
181
|
+
}
|
|
182
|
+
toJSON() {
|
|
183
|
+
return {
|
|
184
|
+
score_type: 'json_correctness',
|
|
185
|
+
threshold: this.threshold,
|
|
186
|
+
json_schema: this.jsonSchema,
|
|
187
|
+
additional_metadata: this.additional_metadata,
|
|
188
|
+
verbose: this.verbose
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
a_score_example(example) {
|
|
192
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
193
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
exports.JsonCorrectnessScorer = JsonCorrectnessScorer;
|
|
198
|
+
class SummarizationScorer extends base_scorer_1.APIJudgmentScorer {
|
|
199
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
200
|
+
super('summarization', threshold, additional_metadata, verbose);
|
|
201
|
+
this.validateThreshold();
|
|
202
|
+
}
|
|
203
|
+
a_score_example(example) {
|
|
204
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
205
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
exports.SummarizationScorer = SummarizationScorer;
|
|
210
|
+
class Text2SQLScorer extends base_scorer_1.APIJudgmentScorer {
|
|
211
|
+
constructor(threshold = 0.7, additional_metadata, verbose = false) {
|
|
212
|
+
super('text2sql', threshold, additional_metadata, verbose);
|
|
213
|
+
this.validateThreshold();
|
|
214
|
+
}
|
|
215
|
+
a_score_example(example) {
|
|
216
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
217
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
exports.Text2SQLScorer = Text2SQLScorer;
|
|
222
|
+
// Create a ScorerWrapper class to dynamically load the appropriate implementation
|
|
223
|
+
class ScorerWrapper {
|
|
224
|
+
constructor(scorer) {
|
|
225
|
+
this.scorer = scorer;
|
|
226
|
+
}
|
|
227
|
+
get scoreType() {
|
|
228
|
+
return this.scorer.scoreType;
|
|
229
|
+
}
|
|
230
|
+
get threshold() {
|
|
231
|
+
return this.scorer.threshold;
|
|
232
|
+
}
|
|
233
|
+
get additional_metadata() {
|
|
234
|
+
return this.scorer.additional_metadata;
|
|
235
|
+
}
|
|
236
|
+
toJSON() {
|
|
237
|
+
return this.scorer.toJSON();
|
|
238
|
+
}
|
|
239
|
+
static fromType(type, threshold, additional_metadata, verbose = false) {
|
|
240
|
+
switch (type.toLowerCase()) {
|
|
241
|
+
case 'answer_correctness':
|
|
242
|
+
return new AnswerCorrectnessScorer(threshold, additional_metadata, verbose);
|
|
243
|
+
case 'answer_relevancy':
|
|
244
|
+
return new AnswerRelevancyScorer(threshold, additional_metadata, verbose);
|
|
245
|
+
case 'comparison':
|
|
246
|
+
// For comparison, extract criteria and description from metadata if available
|
|
247
|
+
const criteria = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.criteria) || ['Accuracy', 'Helpfulness', 'Relevance'];
|
|
248
|
+
const description = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.description) || 'Compare the outputs based on the given criteria';
|
|
249
|
+
const comparisonMetadata = Object.assign({}, additional_metadata);
|
|
250
|
+
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.criteria;
|
|
251
|
+
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.description;
|
|
252
|
+
return new ComparisonScorer(threshold, criteria, description, comparisonMetadata, verbose);
|
|
253
|
+
case 'contextual_precision':
|
|
254
|
+
return new ContextualPrecisionScorer(threshold, additional_metadata, verbose);
|
|
255
|
+
case 'contextual_recall':
|
|
256
|
+
return new ContextualRecallScorer(threshold, additional_metadata, verbose);
|
|
257
|
+
case 'contextual_relevancy':
|
|
258
|
+
return new ContextualRelevancyScorer(threshold, additional_metadata, verbose);
|
|
259
|
+
case 'execution_order':
|
|
260
|
+
// For execution order, extract strict_mode and expected_tools from metadata if available
|
|
261
|
+
const strictMode = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.strict_mode) || true;
|
|
262
|
+
const expectedTools = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.expected_tools;
|
|
263
|
+
const executionOrderMetadata = Object.assign({}, additional_metadata);
|
|
264
|
+
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.strict_mode;
|
|
265
|
+
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.expected_tools;
|
|
266
|
+
return new ExecutionOrderScorer(threshold, strictMode, expectedTools, executionOrderMetadata, verbose);
|
|
267
|
+
case 'faithfulness':
|
|
268
|
+
return new FaithfulnessScorer(threshold, additional_metadata, verbose);
|
|
269
|
+
case 'groundedness':
|
|
270
|
+
return new GroundednessScorer(threshold, additional_metadata, verbose);
|
|
271
|
+
case 'hallucination':
|
|
272
|
+
return new HallucinationScorer(threshold, additional_metadata, verbose);
|
|
273
|
+
case 'instruction_adherence':
|
|
274
|
+
return new InstructionAdherenceScorer(threshold, additional_metadata, verbose);
|
|
275
|
+
case 'json_correctness':
|
|
276
|
+
// For JSON correctness, extract json_schema from metadata if available
|
|
277
|
+
const jsonSchema = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.json_schema;
|
|
278
|
+
const jsonMetadata = Object.assign({}, additional_metadata);
|
|
279
|
+
jsonMetadata === null || jsonMetadata === void 0 ? true : delete jsonMetadata.json_schema;
|
|
280
|
+
return new JsonCorrectnessScorer(threshold, jsonSchema, jsonMetadata, verbose);
|
|
281
|
+
case 'summarization':
|
|
282
|
+
return new SummarizationScorer(threshold, additional_metadata, verbose);
|
|
283
|
+
case 'text2sql':
|
|
284
|
+
return new Text2SQLScorer(threshold, additional_metadata, verbose);
|
|
285
|
+
default:
|
|
286
|
+
throw new Error(`Unknown scorer type: ${type}`);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
exports.ScorerWrapper = ScorerWrapper;
|
|
291
|
+
//# sourceMappingURL=api-scorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api-scorer.js","sourceRoot":"","sources":["../../src/scorers/api-scorer.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,+CAAkD;AAKlD;;GAEG;AACH,MAAa,uBAAwB,SAAQ,+BAAiB;IAC5D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,oBAAoB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACrE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,0DASC;AAED,MAAa,qBAAsB,SAAQ,+BAAiB;IAC1D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,kBAAkB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACnE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,sDASC;AAED,MAAa,gBAAiB,SAAQ,+BAAiB;IAIrD,YACE,YAAoB,GAAG,EACvB,WAAqB,CAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,EAC7D,cAAsB,iDAAiD,EACvE,mBAAyC,EACzC,UAAmB,KAAK;QAExB,KAAK,CAAC,YAAY,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAC7D,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uEAAuE;QACvE,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,qEAAqE,SAAS,EAAE,CAAC,CAAC;QACpG,CAAC;IACH,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,YAAY;YACxB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAlCD,4CAkCC;AAED,MAAa,yBAA0B,SAAQ,+BAAiB;IAC9D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,sBAAsB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACvE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,8DASC;AAED,MAAa,sBAAuB,SAAQ,+BAAiB;IAC3D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,mBAAmB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACpE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,wDASC;AAED,MAAa,yBAA0B,SAAQ,+BAAiB;IAC9D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,sBAAsB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACvE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,8DASC;AAED,MAAa,oBAAqB,SAAQ,+BAAiB;IAIzD,YAAY,YAAoB,GAAG,EAAE,aAAsB,IAAI,EAAE,aAAwB,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QAC5J,KAAK,CAAC,iBAAiB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAClE,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,iBAAiB;YAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,UAAU;YAC5B,cAAc,EAAE,IAAI,CAAC,aAAa;YAClC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAzBD,oDAyBC;AAED,MAAa,kBAAmB,SAAQ,+BAAiB;IACvD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,cAAc,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAC/D,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,gDASC;AAED,MAAa,kBAAmB,SAAQ,+BAAiB;IACvD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,cAAc,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAC/D,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,gDASC;AAED,MAAa,mBAAoB,SAAQ,+BAAiB;IACxD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,eAAe,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAChE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,kDASC;AAED,MAAa,0BAA2B,SAAQ,+BAAiB;IAC/D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,uBAAuB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACxE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,gEASC;AAED,MAAa,qBAAsB,SAAQ,+BAAiB;IAG1D,YACE,YAAoB,GAAG,EACvB,UAAgC,EAChC,mBAAyC,EACzC,UAAmB,KAAK;QAExB,KAAK,CAAC,kBAAkB,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QACnE,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,kBAAkB;YAC9B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,UAAU;YAC5B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AA3BD,sDA2BC;AAED,MAAa,mBAAoB,SAAQ,+BAAiB;IACxD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,eAAe,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAChE,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,kDASC;AAED,MAAa,cAAe,SAAQ,+BAAiB;IACnD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QACtG,KAAK,CAAC,UAAU,EAAE,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAC3D,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AATD,wCASC;AAED,kFAAkF;AAClF,MAAa,aAAa;IAGxB,YAAY,MAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IAC/B,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IAC/B,CAAC;IAED,IAAI,mBAAmB;QACrB,OAAO,IAAI,CAAC,MAAM,CAAC,mBAAmB,CAAC;IACzC,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IAC9B,CAAC;IAED,MAAM,CAAC,QAAQ,CAAC,IAAY,EAAE,SAAiB,EAAE,mBAAyC,EAAE,UAAmB,KAAK;QAClH,QAAQ,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;YAC3B,KAAK,oBAAoB;gBACvB,OAAO,IAAI,uBAAuB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAC9E,KAAK,kBAAkB;gBACrB,OAAO,IAAI,qBAAqB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAC5E,KAAK,YAAY;gBACf,8EAA8E;gBAC9E,MAAM,QAAQ,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,QAAoB,KAAI,CAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;gBACvG,MAAM,WAAW,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAqB,KAAI,iDAAiD,CAAC;gBACpH,MAAM,kBAAkB,qBAAQ,mBAAmB,CAAE,CAAC;gBAC/C,kBAAkB,aAAlB,kBAAkB,4BAAlB,kBAAkB,CAAE,QAAQ,CAAC;gBAC7B,kBAAkB,aAAlB,kBAAkB,4BAAlB,kBAAkB,CAAE,WAAW,CAAC;gBACvC,OAAO,IAAI,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;YAC7F,KAAK,sBAAsB;gBACzB,OAAO,IAAI,yBAAyB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAChF,KAAK,mBAAmB;gBACtB,OAAO,IAAI,sBAAsB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAC7E,KAAK,sBAAsB;gBACzB,OAAO,IAAI,yBAAyB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAChF,KAAK,iBAAiB;gBACpB,yFAAyF;gBACzF,MAAM,UAAU,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAsB,KAAI,IAAI,CAAC;gBACvE,MAAM,aAAa,GAAG,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,cAA0B,CAAC;gBACtE,MAAM,sBAAsB,qBAAQ,mBAAmB,CAAE,CAAC;gBACnD,sBAAsB,aAAtB,sBAAsB,4BAAtB,sBAAsB,CAAE,WAAW,CAAC;gBACpC,sBAAsB,aAAtB,sBAAsB,4BAAtB,sBAAsB,CAAE,cAAc,CAAC;gBAC9C,OAAO,IAAI,oBAAoB,CAAC,SAAS,EAAE,UAAU,EAAE,aAAa,EAAE,sBAAsB,EAAE,OAAO,CAAC,CAAC;YACzG,KAAK,cAAc;gBACjB,OAAO,IAAI,kBAAkB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YACzE,KAAK,cAAc;gBACjB,OAAO,IAAI,kBAAkB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YACzE,KAAK,eAAe;gBAClB,OAAO,IAAI,mBAAmB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAC1E,KAAK,uBAAuB;gBAC1B,OAAO,IAAI,0BAA0B,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YACjF,KAAK,kBAAkB;gBACrB,uEAAuE;gBACvE,MAAM,UAAU,GAAG,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAW,CAAC;gBACpD,MAAM,YAAY,qBAAQ,mBAAmB,CAAE,CAAC;gBACzC,YAAY,aAAZ,YAAY,4BAAZ,YAAY,CAAE,WAAW,CAAC;gBACjC,OAAO,IAAI,qBAAqB,CAAC,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;YACjF,KAAK,eAAe;gBAClB,OAAO,IAAI,mBAAmB,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YAC1E,KAAK,UAAU;gBACb,OAAO,IAAI,cAAc,CAAC,SAAS,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;YACrE;gBACE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;CACF;AAzED,sCAyEC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { Example } from '../data/example';
|
|
2
|
+
import { ScorerData } from '../data/result';
|
|
3
|
+
/**
|
|
4
|
+
* Interface for all judgment scorers
|
|
5
|
+
*/
|
|
6
|
+
export interface Scorer {
|
|
7
|
+
type: string;
|
|
8
|
+
scoreType: string;
|
|
9
|
+
threshold: number;
|
|
10
|
+
score?: number;
|
|
11
|
+
additional_metadata?: Record<string, any>;
|
|
12
|
+
verbose: boolean;
|
|
13
|
+
validateThreshold(): void;
|
|
14
|
+
toJSON(): Record<string, any>;
|
|
15
|
+
successCheck(): boolean;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Base class for API judgment scorers
|
|
19
|
+
*/
|
|
20
|
+
export declare abstract class APIJudgmentScorer implements Scorer {
|
|
21
|
+
readonly type: string;
|
|
22
|
+
get scoreType(): string;
|
|
23
|
+
readonly threshold: number;
|
|
24
|
+
score?: number;
|
|
25
|
+
additional_metadata?: Record<string, any>;
|
|
26
|
+
verbose: boolean;
|
|
27
|
+
constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
28
|
+
/**
|
|
29
|
+
* Check if the score meets the threshold
|
|
30
|
+
*/
|
|
31
|
+
successCheck(): boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Validate that the threshold is within the allowed range
|
|
34
|
+
*/
|
|
35
|
+
validateThreshold(): void;
|
|
36
|
+
/**
|
|
37
|
+
* Convert the scorer to a plain object
|
|
38
|
+
*/
|
|
39
|
+
toJSON(): Record<string, any>;
|
|
40
|
+
a_score_example(example: Example): Promise<ScorerData>;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Base class for local judgment scorers
|
|
44
|
+
*/
|
|
45
|
+
export declare abstract class JudgevalScorer implements Scorer {
|
|
46
|
+
type: string;
|
|
47
|
+
scoreType: string;
|
|
48
|
+
threshold: number;
|
|
49
|
+
score?: number;
|
|
50
|
+
additional_metadata?: Record<string, any>;
|
|
51
|
+
verbose: boolean;
|
|
52
|
+
constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
53
|
+
/**
|
|
54
|
+
* Check if the score meets the threshold
|
|
55
|
+
*/
|
|
56
|
+
successCheck(): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Validate that the threshold is within the allowed range
|
|
59
|
+
*/
|
|
60
|
+
validateThreshold(): void;
|
|
61
|
+
/**
|
|
62
|
+
* Score an example
|
|
63
|
+
* @param example The example to score
|
|
64
|
+
* @returns A ScorerData object with the score
|
|
65
|
+
*/
|
|
66
|
+
abstract scoreExample(example: Example): Promise<ScorerData>;
|
|
67
|
+
/**
|
|
68
|
+
* Convert the scorer to a plain object
|
|
69
|
+
*/
|
|
70
|
+
toJSON(): Record<string, any>;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Wrapper for scorers to allow dynamic loading of implementations
|
|
74
|
+
*/
|
|
75
|
+
export declare class ScorerWrapper implements Scorer {
|
|
76
|
+
type: string;
|
|
77
|
+
scoreType: string;
|
|
78
|
+
threshold: number;
|
|
79
|
+
score?: number;
|
|
80
|
+
additional_metadata?: Record<string, any>;
|
|
81
|
+
verbose: boolean;
|
|
82
|
+
scorer: any;
|
|
83
|
+
constructor(scorer: any);
|
|
84
|
+
/**
|
|
85
|
+
* Check if the score meets the threshold
|
|
86
|
+
*/
|
|
87
|
+
successCheck(): boolean;
|
|
88
|
+
/**
|
|
89
|
+
* Load the appropriate implementation based on the use_judgment flag
|
|
90
|
+
*/
|
|
91
|
+
loadImplementation(useJudgment?: boolean): APIJudgmentScorer | JudgevalScorer;
|
|
92
|
+
/**
|
|
93
|
+
* Validate that the threshold is within the allowed range
|
|
94
|
+
*/
|
|
95
|
+
validateThreshold(): void;
|
|
96
|
+
/**
|
|
97
|
+
* Convert the scorer to a plain object
|
|
98
|
+
*/
|
|
99
|
+
toJSON(): Record<string, any>;
|
|
100
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.ScorerWrapper = exports.JudgevalScorer = exports.APIJudgmentScorer = void 0;
|
|
13
|
+
const constants_1 = require("../constants");
|
|
14
|
+
/**
|
|
15
|
+
* Base class for API judgment scorers
|
|
16
|
+
*/
|
|
17
|
+
class APIJudgmentScorer {
|
|
18
|
+
get scoreType() { return this.type; } // For backward compatibility
|
|
19
|
+
constructor(type, threshold, additional_metadata, verbose = false) {
|
|
20
|
+
this.type = type;
|
|
21
|
+
this.threshold = threshold;
|
|
22
|
+
this.additional_metadata = additional_metadata;
|
|
23
|
+
this.verbose = verbose;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Check if the score meets the threshold
|
|
27
|
+
*/
|
|
28
|
+
successCheck() {
|
|
29
|
+
if (this.score === undefined) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
return this.score >= this.threshold;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Validate that the threshold is within the allowed range
|
|
36
|
+
*/
|
|
37
|
+
validateThreshold() {
|
|
38
|
+
// Check if this is an unbounded scorer
|
|
39
|
+
const isUnbounded = Array.from(constants_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
|
|
40
|
+
if (isUnbounded) {
|
|
41
|
+
if (this.threshold < 0) {
|
|
42
|
+
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
if (this.threshold < 0 || this.threshold > 1) {
|
|
47
|
+
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Convert the scorer to a plain object
|
|
53
|
+
*/
|
|
54
|
+
toJSON() {
|
|
55
|
+
const result = {
|
|
56
|
+
score_type: this.type,
|
|
57
|
+
threshold: this.threshold,
|
|
58
|
+
score: this.score,
|
|
59
|
+
additional_metadata: this.additional_metadata,
|
|
60
|
+
verbose: this.verbose,
|
|
61
|
+
};
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
a_score_example(example) {
|
|
65
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
66
|
+
throw new Error('API scorers are evaluated on the server side');
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
exports.APIJudgmentScorer = APIJudgmentScorer;
|
|
71
|
+
/**
|
|
72
|
+
* Base class for local judgment scorers
|
|
73
|
+
*/
|
|
74
|
+
class JudgevalScorer {
|
|
75
|
+
constructor(type, threshold, additional_metadata, verbose = false) {
|
|
76
|
+
this.type = type;
|
|
77
|
+
this.scoreType = type; // For backward compatibility
|
|
78
|
+
this.threshold = threshold;
|
|
79
|
+
this.additional_metadata = additional_metadata;
|
|
80
|
+
this.verbose = verbose;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Check if the score meets the threshold
|
|
84
|
+
*/
|
|
85
|
+
successCheck() {
|
|
86
|
+
if (this.score === undefined) {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
return this.score >= this.threshold;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Validate that the threshold is within the allowed range
|
|
93
|
+
*/
|
|
94
|
+
validateThreshold() {
|
|
95
|
+
// Check if this is an unbounded scorer
|
|
96
|
+
const isUnbounded = Array.from(constants_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
|
|
97
|
+
if (isUnbounded) {
|
|
98
|
+
if (this.threshold < 0) {
|
|
99
|
+
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
if (this.threshold < 0 || this.threshold > 1) {
|
|
104
|
+
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Convert the scorer to a plain object
|
|
110
|
+
*/
|
|
111
|
+
toJSON() {
|
|
112
|
+
return {
|
|
113
|
+
score_type: this.type,
|
|
114
|
+
threshold: this.threshold,
|
|
115
|
+
score: this.score,
|
|
116
|
+
additional_metadata: this.additional_metadata,
|
|
117
|
+
verbose: this.verbose,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
exports.JudgevalScorer = JudgevalScorer;
|
|
122
|
+
/**
|
|
123
|
+
* Wrapper for scorers to allow dynamic loading of implementations
|
|
124
|
+
*/
|
|
125
|
+
class ScorerWrapper {
|
|
126
|
+
constructor(scorer) {
|
|
127
|
+
this.scorer = scorer;
|
|
128
|
+
this.type = scorer.type;
|
|
129
|
+
this.scoreType = scorer.scoreType || scorer.score_type; // For backward compatibility
|
|
130
|
+
this.threshold = scorer.threshold;
|
|
131
|
+
this.score = scorer.score;
|
|
132
|
+
this.additional_metadata = scorer.additional_metadata;
|
|
133
|
+
this.verbose = scorer.verbose;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Check if the score meets the threshold
|
|
137
|
+
*/
|
|
138
|
+
successCheck() {
|
|
139
|
+
if (this.score === undefined) {
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
return this.score >= this.threshold;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Load the appropriate implementation based on the use_judgment flag
|
|
146
|
+
*/
|
|
147
|
+
loadImplementation(useJudgment = true) {
|
|
148
|
+
// This would be implemented based on the specific scorer types
|
|
149
|
+
// For now, we'll just return the scorer as is
|
|
150
|
+
if (useJudgment) {
|
|
151
|
+
// Return API implementation
|
|
152
|
+
return this.scorer;
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
// Return local implementation
|
|
156
|
+
return this.scorer;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Validate that the threshold is within the allowed range
|
|
161
|
+
*/
|
|
162
|
+
validateThreshold() {
|
|
163
|
+
// Check if this is an unbounded scorer
|
|
164
|
+
const isUnbounded = Array.from(constants_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
|
|
165
|
+
if (isUnbounded) {
|
|
166
|
+
if (this.threshold < 0) {
|
|
167
|
+
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
if (this.threshold < 0 || this.threshold > 1) {
|
|
172
|
+
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Convert the scorer to a plain object
|
|
178
|
+
*/
|
|
179
|
+
toJSON() {
|
|
180
|
+
return {
|
|
181
|
+
score_type: this.type,
|
|
182
|
+
threshold: this.threshold,
|
|
183
|
+
score: this.score,
|
|
184
|
+
additional_metadata: this.additional_metadata,
|
|
185
|
+
verbose: this.verbose,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
exports.ScorerWrapper = ScorerWrapper;
|
|
190
|
+
//# sourceMappingURL=base-scorer.js.map
|