judgeval 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/logger-instance.js +17 -19
- package/dist/cjs/common/logger-instance.js.map +1 -1
- package/dist/cjs/common/tracer.js +210 -126
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +3 -2
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +20 -114
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/logger-instance.js +17 -19
- package/dist/esm/common/logger-instance.js.map +1 -1
- package/dist/esm/common/tracer.js +211 -127
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +2 -1
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +20 -114
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -13
- package/dist/types/constants.d.ts +2 -1
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +0 -22
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +10 -3
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -12,8 +12,8 @@ import { APIJudgmentScorer } from './base-scorer.js';
|
|
|
12
12
|
* Implementation of API-based scorers
|
|
13
13
|
*/
|
|
14
14
|
export class AnswerCorrectnessScorer extends APIJudgmentScorer {
|
|
15
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
16
|
-
super('answer_correctness', threshold, additional_metadata,
|
|
15
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
16
|
+
super('answer_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
17
17
|
this.validateThreshold();
|
|
18
18
|
}
|
|
19
19
|
a_score_example(example) {
|
|
@@ -23,8 +23,8 @@ export class AnswerCorrectnessScorer extends APIJudgmentScorer {
|
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
export class AnswerRelevancyScorer extends APIJudgmentScorer {
|
|
26
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
27
|
-
super('answer_relevancy', threshold, additional_metadata,
|
|
26
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
27
|
+
super('answer_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
28
28
|
this.validateThreshold();
|
|
29
29
|
}
|
|
30
30
|
a_score_example(example) {
|
|
@@ -34,8 +34,8 @@ export class AnswerRelevancyScorer extends APIJudgmentScorer {
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
export class ComparisonScorer extends APIJudgmentScorer {
|
|
37
|
-
constructor(threshold = 0.5, criteria = ['Accuracy', 'Helpfulness', 'Relevance'], description = 'Compare the outputs based on the given criteria', additional_metadata,
|
|
38
|
-
super('comparison', threshold, additional_metadata,
|
|
37
|
+
constructor(threshold = 0.5, criteria = ['Accuracy', 'Helpfulness', 'Relevance'], description = 'Compare the outputs based on the given criteria', additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
38
|
+
super('comparison', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
39
39
|
this.criteria = criteria;
|
|
40
40
|
this.description = description;
|
|
41
41
|
// Comparison is an unbounded scorer, only validate that threshold >= 0
|
|
@@ -50,7 +50,10 @@ export class ComparisonScorer extends APIJudgmentScorer {
|
|
|
50
50
|
criteria: this.criteria,
|
|
51
51
|
description: this.description,
|
|
52
52
|
additional_metadata: this.additional_metadata,
|
|
53
|
-
|
|
53
|
+
strict_mode: this.strict_mode,
|
|
54
|
+
async_mode: this.async_mode,
|
|
55
|
+
verbose_mode: this.verbose_mode,
|
|
56
|
+
include_reason: this.include_reason
|
|
54
57
|
};
|
|
55
58
|
}
|
|
56
59
|
a_score_example(example) {
|
|
@@ -60,8 +63,8 @@ export class ComparisonScorer extends APIJudgmentScorer {
|
|
|
60
63
|
}
|
|
61
64
|
}
|
|
62
65
|
export class ContextualPrecisionScorer extends APIJudgmentScorer {
|
|
63
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
64
|
-
super('contextual_precision', threshold, additional_metadata,
|
|
66
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
67
|
+
super('contextual_precision', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
65
68
|
this.validateThreshold();
|
|
66
69
|
}
|
|
67
70
|
a_score_example(example) {
|
|
@@ -71,8 +74,8 @@ export class ContextualPrecisionScorer extends APIJudgmentScorer {
|
|
|
71
74
|
}
|
|
72
75
|
}
|
|
73
76
|
export class ContextualRecallScorer extends APIJudgmentScorer {
|
|
74
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
75
|
-
super('contextual_recall', threshold, additional_metadata,
|
|
77
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
78
|
+
super('contextual_recall', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
76
79
|
this.validateThreshold();
|
|
77
80
|
}
|
|
78
81
|
a_score_example(example) {
|
|
@@ -82,8 +85,8 @@ export class ContextualRecallScorer extends APIJudgmentScorer {
|
|
|
82
85
|
}
|
|
83
86
|
}
|
|
84
87
|
export class ContextualRelevancyScorer extends APIJudgmentScorer {
|
|
85
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
86
|
-
super('contextual_relevancy', threshold, additional_metadata,
|
|
88
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
89
|
+
super('contextual_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
87
90
|
this.validateThreshold();
|
|
88
91
|
}
|
|
89
92
|
a_score_example(example) {
|
|
@@ -93,9 +96,9 @@ export class ContextualRelevancyScorer extends APIJudgmentScorer {
|
|
|
93
96
|
}
|
|
94
97
|
}
|
|
95
98
|
export class ExecutionOrderScorer extends APIJudgmentScorer {
|
|
96
|
-
constructor(threshold = 1.0,
|
|
97
|
-
super('execution_order', threshold, additional_metadata,
|
|
98
|
-
this.strictMode =
|
|
99
|
+
constructor(threshold = 1.0, expectedTools, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
100
|
+
super('execution_order', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
101
|
+
this.strictMode = strict_mode;
|
|
99
102
|
this.expectedTools = expectedTools;
|
|
100
103
|
this.validateThreshold();
|
|
101
104
|
}
|
|
@@ -106,7 +109,9 @@ export class ExecutionOrderScorer extends APIJudgmentScorer {
|
|
|
106
109
|
strict_mode: this.strictMode,
|
|
107
110
|
expected_tools: this.expectedTools,
|
|
108
111
|
additional_metadata: this.additional_metadata,
|
|
109
|
-
|
|
112
|
+
async_mode: this.async_mode,
|
|
113
|
+
verbose_mode: this.verbose_mode,
|
|
114
|
+
include_reason: this.include_reason
|
|
110
115
|
};
|
|
111
116
|
}
|
|
112
117
|
a_score_example(example) {
|
|
@@ -116,8 +121,8 @@ export class ExecutionOrderScorer extends APIJudgmentScorer {
|
|
|
116
121
|
}
|
|
117
122
|
}
|
|
118
123
|
export class FaithfulnessScorer extends APIJudgmentScorer {
|
|
119
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
120
|
-
super('faithfulness', threshold, additional_metadata,
|
|
124
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
125
|
+
super('faithfulness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
121
126
|
this.validateThreshold();
|
|
122
127
|
}
|
|
123
128
|
a_score_example(example) {
|
|
@@ -127,8 +132,8 @@ export class FaithfulnessScorer extends APIJudgmentScorer {
|
|
|
127
132
|
}
|
|
128
133
|
}
|
|
129
134
|
export class GroundednessScorer extends APIJudgmentScorer {
|
|
130
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
131
|
-
super('groundedness', threshold, additional_metadata,
|
|
135
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
136
|
+
super('groundedness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
132
137
|
this.validateThreshold();
|
|
133
138
|
}
|
|
134
139
|
a_score_example(example) {
|
|
@@ -138,8 +143,8 @@ export class GroundednessScorer extends APIJudgmentScorer {
|
|
|
138
143
|
}
|
|
139
144
|
}
|
|
140
145
|
export class HallucinationScorer extends APIJudgmentScorer {
|
|
141
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
142
|
-
super('hallucination', threshold, additional_metadata,
|
|
146
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
147
|
+
super('hallucination', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
143
148
|
this.validateThreshold();
|
|
144
149
|
}
|
|
145
150
|
a_score_example(example) {
|
|
@@ -149,8 +154,8 @@ export class HallucinationScorer extends APIJudgmentScorer {
|
|
|
149
154
|
}
|
|
150
155
|
}
|
|
151
156
|
export class InstructionAdherenceScorer extends APIJudgmentScorer {
|
|
152
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
153
|
-
super('instruction_adherence', threshold, additional_metadata,
|
|
157
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
158
|
+
super('instruction_adherence', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
154
159
|
this.validateThreshold();
|
|
155
160
|
}
|
|
156
161
|
a_score_example(example) {
|
|
@@ -160,8 +165,8 @@ export class InstructionAdherenceScorer extends APIJudgmentScorer {
|
|
|
160
165
|
}
|
|
161
166
|
}
|
|
162
167
|
export class JsonCorrectnessScorer extends APIJudgmentScorer {
|
|
163
|
-
constructor(threshold = 0.7, jsonSchema, additional_metadata,
|
|
164
|
-
super('json_correctness', threshold, additional_metadata,
|
|
168
|
+
constructor(threshold = 0.7, jsonSchema, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
169
|
+
super('json_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
165
170
|
this.jsonSchema = jsonSchema;
|
|
166
171
|
this.validateThreshold();
|
|
167
172
|
}
|
|
@@ -171,7 +176,10 @@ export class JsonCorrectnessScorer extends APIJudgmentScorer {
|
|
|
171
176
|
threshold: this.threshold,
|
|
172
177
|
json_schema: this.jsonSchema,
|
|
173
178
|
additional_metadata: this.additional_metadata,
|
|
174
|
-
|
|
179
|
+
strict_mode: this.strict_mode,
|
|
180
|
+
async_mode: this.async_mode,
|
|
181
|
+
verbose_mode: this.verbose_mode,
|
|
182
|
+
include_reason: this.include_reason
|
|
175
183
|
};
|
|
176
184
|
}
|
|
177
185
|
a_score_example(example) {
|
|
@@ -181,8 +189,8 @@ export class JsonCorrectnessScorer extends APIJudgmentScorer {
|
|
|
181
189
|
}
|
|
182
190
|
}
|
|
183
191
|
export class SummarizationScorer extends APIJudgmentScorer {
|
|
184
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
185
|
-
super('summarization', threshold, additional_metadata,
|
|
192
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
193
|
+
super('summarization', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
186
194
|
this.validateThreshold();
|
|
187
195
|
}
|
|
188
196
|
a_score_example(example) {
|
|
@@ -192,8 +200,8 @@ export class SummarizationScorer extends APIJudgmentScorer {
|
|
|
192
200
|
}
|
|
193
201
|
}
|
|
194
202
|
export class Text2SQLScorer extends APIJudgmentScorer {
|
|
195
|
-
constructor(threshold = 0.7, additional_metadata,
|
|
196
|
-
super('text2sql', threshold, additional_metadata,
|
|
203
|
+
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
204
|
+
super('text2sql', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
197
205
|
this.validateThreshold();
|
|
198
206
|
}
|
|
199
207
|
a_score_example(example) {
|
|
@@ -219,12 +227,12 @@ export class ScorerWrapper {
|
|
|
219
227
|
toJSON() {
|
|
220
228
|
return this.scorer.toJSON();
|
|
221
229
|
}
|
|
222
|
-
static fromType(type, threshold, additional_metadata,
|
|
230
|
+
static fromType(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
223
231
|
switch (type.toLowerCase()) {
|
|
224
232
|
case 'answer_correctness':
|
|
225
|
-
return new AnswerCorrectnessScorer(threshold, additional_metadata,
|
|
233
|
+
return new AnswerCorrectnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
226
234
|
case 'answer_relevancy':
|
|
227
|
-
return new AnswerRelevancyScorer(threshold, additional_metadata,
|
|
235
|
+
return new AnswerRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
228
236
|
case 'comparison':
|
|
229
237
|
// For comparison, extract criteria and description from metadata if available
|
|
230
238
|
const criteria = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.criteria) || ['Accuracy', 'Helpfulness', 'Relevance'];
|
|
@@ -232,39 +240,39 @@ export class ScorerWrapper {
|
|
|
232
240
|
const comparisonMetadata = Object.assign({}, additional_metadata);
|
|
233
241
|
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.criteria;
|
|
234
242
|
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.description;
|
|
235
|
-
return new ComparisonScorer(threshold, criteria, description, comparisonMetadata,
|
|
243
|
+
return new ComparisonScorer(threshold, criteria, description, comparisonMetadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
236
244
|
case 'contextual_precision':
|
|
237
|
-
return new ContextualPrecisionScorer(threshold, additional_metadata,
|
|
245
|
+
return new ContextualPrecisionScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
238
246
|
case 'contextual_recall':
|
|
239
|
-
return new ContextualRecallScorer(threshold, additional_metadata,
|
|
247
|
+
return new ContextualRecallScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
240
248
|
case 'contextual_relevancy':
|
|
241
|
-
return new ContextualRelevancyScorer(threshold, additional_metadata,
|
|
249
|
+
return new ContextualRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
242
250
|
case 'execution_order':
|
|
243
251
|
// For execution order, extract strict_mode and expected_tools from metadata if available
|
|
244
|
-
const strictMode = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.strict_mode) ||
|
|
252
|
+
const strictMode = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.strict_mode) || false;
|
|
245
253
|
const expectedTools = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.expected_tools;
|
|
246
254
|
const executionOrderMetadata = Object.assign({}, additional_metadata);
|
|
247
255
|
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.strict_mode;
|
|
248
256
|
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.expected_tools;
|
|
249
|
-
return new ExecutionOrderScorer(threshold,
|
|
257
|
+
return new ExecutionOrderScorer(threshold, expectedTools, executionOrderMetadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
250
258
|
case 'faithfulness':
|
|
251
|
-
return new FaithfulnessScorer(threshold, additional_metadata,
|
|
259
|
+
return new FaithfulnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
252
260
|
case 'groundedness':
|
|
253
|
-
return new GroundednessScorer(threshold, additional_metadata,
|
|
261
|
+
return new GroundednessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
254
262
|
case 'hallucination':
|
|
255
|
-
return new HallucinationScorer(threshold, additional_metadata,
|
|
263
|
+
return new HallucinationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
256
264
|
case 'instruction_adherence':
|
|
257
|
-
return new InstructionAdherenceScorer(threshold, additional_metadata,
|
|
265
|
+
return new InstructionAdherenceScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
258
266
|
case 'json_correctness':
|
|
259
267
|
// For JSON correctness, extract json_schema from metadata if available
|
|
260
268
|
const jsonSchema = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.json_schema;
|
|
261
269
|
const jsonMetadata = Object.assign({}, additional_metadata);
|
|
262
270
|
jsonMetadata === null || jsonMetadata === void 0 ? true : delete jsonMetadata.json_schema;
|
|
263
|
-
return new JsonCorrectnessScorer(threshold, jsonSchema, jsonMetadata,
|
|
271
|
+
return new JsonCorrectnessScorer(threshold, jsonSchema, jsonMetadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
264
272
|
case 'summarization':
|
|
265
|
-
return new SummarizationScorer(threshold, additional_metadata,
|
|
273
|
+
return new SummarizationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
266
274
|
case 'text2sql':
|
|
267
|
-
return new Text2SQLScorer(threshold, additional_metadata,
|
|
275
|
+
return new Text2SQLScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
|
|
268
276
|
default:
|
|
269
277
|
throw new Error(`Unknown scorer type: ${type}`);
|
|
270
278
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-scorer.js","sourceRoot":"","sources":["../../../src/scorers/api-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAKrD;;GAEG;AACH,MAAM,OAAO,uBAAwB,SAAQ,iBAAiB;IAC5D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,
|
|
1
|
+
{"version":3,"file":"api-scorer.js","sourceRoot":"","sources":["../../../src/scorers/api-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAKrD;;GAEG;AACH,MAAM,OAAO,uBAAwB,SAAQ,iBAAiB;IAC5D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,oBAAoB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACnH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAC1D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,kBAAkB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACjH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,gBAAiB,SAAQ,iBAAiB;IAIrD,YACE,YAAoB,GAAG,EACvB,WAAqB,CAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,EAC7D,cAAsB,iDAAiD,EACvE,mBAAyC,EACzC,cAAuB,KAAK,EAC5B,aAAsB,IAAI,EAC1B,eAAwB,IAAI,EAC5B,iBAA0B,IAAI;QAE9B,KAAK,CAAC,YAAY,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC3G,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uEAAuE;QACvE,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,qEAAqE,SAAS,EAAE,CAAC,CAAC;QACpG,CAAC;IACH,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,YAAY;YACxB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,yBAA0B,SAAQ,iBAAiB;IAC9D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,sBAAsB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACrH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,iBAAiB;IAC3D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,mBAAmB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAClH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,yBAA0B,SAAQ,iBAAiB;IAC9D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,sBAAsB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACrH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,oBAAqB,SAAQ,iBAAiB;IAIzD,YAAY,YAAoB,GAAG,EAAE,aAAwB,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QAC9N,KAAK,CAAC,iBAAiB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAChH,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC;QAC9B,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,iBAAiB;YAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,UAAU;YAC5B,cAAc,EAAE,IAAI,CAAC,aAAa;YAClC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,iBAAiB;IACvD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,cAAc,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC7G,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,iBAAiB;IACvD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,cAAc,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC7G,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,mBAAoB,SAAQ,iBAAiB;IACxD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,eAAe,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC9G,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,0BAA2B,SAAQ,iBAAiB;IAC/D,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,uBAAuB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACtH,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAG1D,YACE,YAAoB,GAAG,EACvB,UAAgC,EAChC,mBAAyC,EACzC,cAAuB,KAAK,EAC5B,aAAsB,IAAI,EAC1B,eAAwB,IAAI,EAC5B,iBAA0B,IAAI;QAE9B,KAAK,CAAC,kBAAkB,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACjH,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,kBAAkB;YAC9B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,UAAU;YAC5B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;IACJ,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,mBAAoB,SAAQ,iBAAiB;IACxD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,eAAe,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC9G,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,MAAM,OAAO,cAAe,SAAQ,iBAAiB;IACnD,YAAY,YAAoB,GAAG,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QACpM,KAAK,CAAC,UAAU,EAAE,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QACzG,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED,kFAAkF;AAClF,MAAM,OAAO,aAAa;IAGxB,YAAY,MAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IAC/B,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IAC/B,CAAC;IAED,IAAI,mBAAmB;QACrB,OAAO,IAAI,CAAC,MAAM,CAAC,mBAAmB,CAAC;IACzC,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IAC9B,CAAC;IAED,MAAM,CAAC,QAAQ,CAAC,IAAY,EAAE,SAAiB,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QAChN,QAAQ,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;YAC3B,KAAK,oBAAoB;gBACvB,OAAO,IAAI,uBAAuB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC5H,KAAK,kBAAkB;gBACrB,OAAO,IAAI,qBAAqB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC1H,KAAK,YAAY;gBACf,8EAA8E;gBAC9E,MAAM,QAAQ,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,QAAoB,KAAI,CAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;gBACvG,MAAM,WAAW,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAqB,KAAI,iDAAiD,CAAC;gBACpH,MAAM,kBAAkB,qBAAQ,mBAAmB,CAAE,CAAC;gBAC/C,kBAAkB,aAAlB,kBAAkB,4BAAlB,kBAAkB,CAAE,QAAQ,CAAC;gBAC7B,kBAAkB,aAAlB,kBAAkB,4BAAlB,kBAAkB,CAAE,WAAW,CAAC;gBACvC,OAAO,IAAI,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,kBAAkB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC3I,KAAK,sBAAsB;gBACzB,OAAO,IAAI,yBAAyB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC9H,KAAK,mBAAmB;gBACtB,OAAO,IAAI,sBAAsB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC3H,KAAK,sBAAsB;gBACzB,OAAO,IAAI,yBAAyB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC9H,KAAK,iBAAiB;gBACpB,yFAAyF;gBACzF,MAAM,UAAU,GAAG,CAAA,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAsB,KAAI,KAAK,CAAC;gBACxE,MAAM,aAAa,GAAG,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,cAA0B,CAAC;gBACtE,MAAM,sBAAsB,qBAAQ,mBAAmB,CAAE,CAAC;gBACnD,sBAAsB,aAAtB,sBAAsB,4BAAtB,sBAAsB,CAAE,WAAW,CAAC;gBACpC,sBAAsB,aAAtB,sBAAsB,4BAAtB,sBAAsB,CAAE,cAAc,CAAC;gBAC9C,OAAO,IAAI,oBAAoB,CAAC,SAAS,EAAE,aAAa,EAAE,sBAAsB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC3I,KAAK,cAAc;gBACjB,OAAO,IAAI,kBAAkB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACvH,KAAK,cAAc;gBACjB,OAAO,IAAI,kBAAkB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACvH,KAAK,eAAe;gBAClB,OAAO,IAAI,mBAAmB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACxH,KAAK,uBAAuB;gBAC1B,OAAO,IAAI,0BAA0B,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC/H,KAAK,kBAAkB;gBACrB,uEAAuE;gBACvE,MAAM,UAAU,GAAG,mBAAmB,aAAnB,mBAAmB,uBAAnB,mBAAmB,CAAE,WAAW,CAAC;gBACpD,MAAM,YAAY,qBAAQ,mBAAmB,CAAE,CAAC;gBACzC,YAAY,aAAZ,YAAY,4BAAZ,YAAY,CAAE,WAAW,CAAC;gBACjC,OAAO,IAAI,qBAAqB,CAAC,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YAC/H,KAAK,eAAe;gBAClB,OAAO,IAAI,mBAAmB,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACxH,KAAK,UAAU;gBACb,OAAO,IAAI,cAAc,CAAC,SAAS,EAAE,mBAAmB,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACnH;gBACE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;CACF"}
|
|
@@ -13,11 +13,14 @@ import { UNBOUNDED_SCORERS } from '../constants.js';
|
|
|
13
13
|
*/
|
|
14
14
|
export class APIJudgmentScorer {
|
|
15
15
|
get scoreType() { return this.type; } // For backward compatibility
|
|
16
|
-
constructor(type, threshold, additional_metadata,
|
|
16
|
+
constructor(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
|
|
17
17
|
this.type = type;
|
|
18
18
|
this.threshold = threshold;
|
|
19
19
|
this.additional_metadata = additional_metadata;
|
|
20
|
-
this.
|
|
20
|
+
this.strict_mode = strict_mode;
|
|
21
|
+
this.async_mode = async_mode;
|
|
22
|
+
this.verbose_mode = verbose_mode;
|
|
23
|
+
this.include_reason = include_reason;
|
|
21
24
|
}
|
|
22
25
|
/**
|
|
23
26
|
* Check if the score meets the threshold
|
|
@@ -53,8 +56,12 @@ export class APIJudgmentScorer {
|
|
|
53
56
|
score_type: this.type,
|
|
54
57
|
threshold: this.threshold,
|
|
55
58
|
score: this.score,
|
|
59
|
+
score_breakdown: this.score_breakdown,
|
|
56
60
|
additional_metadata: this.additional_metadata,
|
|
57
|
-
|
|
61
|
+
strict_mode: this.strict_mode,
|
|
62
|
+
async_mode: this.async_mode,
|
|
63
|
+
verbose_mode: this.verbose_mode,
|
|
64
|
+
include_reason: this.include_reason,
|
|
58
65
|
};
|
|
59
66
|
return result;
|
|
60
67
|
}
|
|
@@ -68,21 +75,32 @@ export class APIJudgmentScorer {
|
|
|
68
75
|
* Base class for local judgment scorers
|
|
69
76
|
*/
|
|
70
77
|
export class JudgevalScorer {
|
|
71
|
-
constructor(type, threshold, additional_metadata,
|
|
78
|
+
constructor(type, threshold, additional_metadata, include_reason = true, async_mode = true, strict_mode = false, verbose_mode = true) {
|
|
72
79
|
this.type = type;
|
|
73
80
|
this.scoreType = type; // For backward compatibility
|
|
74
|
-
this.threshold = threshold;
|
|
81
|
+
this.threshold = strict_mode ? 1.0 : threshold;
|
|
82
|
+
this.strict_mode = strict_mode;
|
|
83
|
+
this.async_mode = async_mode;
|
|
84
|
+
this.verbose_mode = verbose_mode;
|
|
85
|
+
this.include_reason = include_reason;
|
|
75
86
|
this.additional_metadata = additional_metadata;
|
|
76
|
-
this.
|
|
87
|
+
this.validateThreshold();
|
|
77
88
|
}
|
|
78
89
|
/**
|
|
79
90
|
* Check if the score meets the threshold
|
|
80
91
|
*/
|
|
81
92
|
successCheck() {
|
|
82
|
-
if (this.
|
|
93
|
+
if (this.error !== undefined) {
|
|
83
94
|
return false;
|
|
84
95
|
}
|
|
85
|
-
return this.score >= this.threshold;
|
|
96
|
+
return this.score !== undefined && this.score >= this.threshold;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Internal method to check success
|
|
100
|
+
* This is equivalent to Python's _success_check method
|
|
101
|
+
*/
|
|
102
|
+
_successCheck() {
|
|
103
|
+
return this.successCheck();
|
|
86
104
|
}
|
|
87
105
|
/**
|
|
88
106
|
* Validate that the threshold is within the allowed range
|
|
@@ -109,10 +127,27 @@ export class JudgevalScorer {
|
|
|
109
127
|
score_type: this.type,
|
|
110
128
|
threshold: this.threshold,
|
|
111
129
|
score: this.score,
|
|
130
|
+
score_breakdown: this.score_breakdown,
|
|
131
|
+
reason: this.reason,
|
|
132
|
+
success: this.success,
|
|
133
|
+
evaluation_model: this.evaluation_model,
|
|
134
|
+
strict_mode: this.strict_mode,
|
|
135
|
+
async_mode: this.async_mode,
|
|
136
|
+
verbose_mode: this.verbose_mode,
|
|
137
|
+
include_reason: this.include_reason,
|
|
138
|
+
error: this.error,
|
|
139
|
+
evaluation_cost: this.evaluation_cost,
|
|
140
|
+
verbose_logs: this.verbose_logs,
|
|
112
141
|
additional_metadata: this.additional_metadata,
|
|
113
|
-
verbose: this.verbose,
|
|
114
142
|
};
|
|
115
143
|
}
|
|
144
|
+
/**
|
|
145
|
+
* Get the name of the scorer
|
|
146
|
+
* This is equivalent to Python's __name__ property
|
|
147
|
+
*/
|
|
148
|
+
get name() {
|
|
149
|
+
return this.type;
|
|
150
|
+
}
|
|
116
151
|
}
|
|
117
152
|
/**
|
|
118
153
|
* Wrapper for scorers to allow dynamic loading of implementations
|
|
@@ -124,8 +159,18 @@ export class ScorerWrapper {
|
|
|
124
159
|
this.scoreType = scorer.scoreType || scorer.score_type; // For backward compatibility
|
|
125
160
|
this.threshold = scorer.threshold;
|
|
126
161
|
this.score = scorer.score;
|
|
162
|
+
this.score_breakdown = scorer.score_breakdown;
|
|
163
|
+
this.reason = scorer.reason;
|
|
164
|
+
this.success = scorer.success;
|
|
165
|
+
this.evaluation_model = scorer.evaluation_model;
|
|
166
|
+
this.strict_mode = scorer.strict_mode;
|
|
167
|
+
this.async_mode = scorer.async_mode;
|
|
168
|
+
this.verbose_mode = scorer.verbose_mode;
|
|
169
|
+
this.include_reason = scorer.include_reason;
|
|
170
|
+
this.error = scorer.error;
|
|
171
|
+
this.evaluation_cost = scorer.evaluation_cost;
|
|
172
|
+
this.verbose_logs = scorer.verbose_logs;
|
|
127
173
|
this.additional_metadata = scorer.additional_metadata;
|
|
128
|
-
this.verbose = scorer.verbose;
|
|
129
174
|
}
|
|
130
175
|
/**
|
|
131
176
|
* Check if the score meets the threshold
|
|
@@ -176,8 +221,18 @@ export class ScorerWrapper {
|
|
|
176
221
|
score_type: this.type,
|
|
177
222
|
threshold: this.threshold,
|
|
178
223
|
score: this.score,
|
|
224
|
+
score_breakdown: this.score_breakdown,
|
|
225
|
+
reason: this.reason,
|
|
226
|
+
success: this.success,
|
|
227
|
+
evaluation_model: this.evaluation_model,
|
|
228
|
+
strict_mode: this.strict_mode,
|
|
229
|
+
async_mode: this.async_mode,
|
|
230
|
+
verbose_mode: this.verbose_mode,
|
|
231
|
+
include_reason: this.include_reason,
|
|
232
|
+
error: this.error,
|
|
233
|
+
evaluation_cost: this.evaluation_cost,
|
|
234
|
+
verbose_logs: this.verbose_logs,
|
|
179
235
|
additional_metadata: this.additional_metadata,
|
|
180
|
-
verbose: this.verbose,
|
|
181
236
|
};
|
|
182
237
|
}
|
|
183
238
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base-scorer.js","sourceRoot":"","sources":["../../../src/scorers/base-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAa,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"base-scorer.js","sourceRoot":"","sources":["../../../src/scorers/base-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAa,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AA2B/D;;GAEG;AACH,MAAM,OAAgB,iBAAiB;IAErC,IAAI,SAAS,KAAa,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,6BAA6B;IAU3E,YAAY,IAAY,EAAE,SAAiB,EAAE,mBAAyC,EAAE,cAAuB,KAAK,EAAE,aAAsB,IAAI,EAAE,eAAwB,IAAI,EAAE,iBAA0B,IAAI;QAC5M,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,mBAAmB,GAAG,mBAAmB,CAAC;QAC/C,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,uCAAuC;QACvC,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,CACpD,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAC3D,CAAC;QAEF,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,6CAA6C,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAC3G,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,kCAAkC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAChG,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,MAAM,MAAM,GAAwB;YAClC,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;SACpC,CAAC;QAEF,OAAO,MAAM,CAAC;IAChB,CAAC;IAEK,eAAe,CAAC,OAAgB;;YACpC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;KAAA;CACF;AAED;;GAEG;AACH,MAAM,OAAgB,cAAc;IAkBlC,YACE,IAAY,EACZ,SAAiB,EACjB,mBAAyC,EACzC,iBAA0B,IAAI,EAC9B,aAAsB,IAAI,EAC1B,cAAuB,KAAK,EAC5B,eAAwB,IAAI;QAE5B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,6BAA6B;QACpD,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;QAC/C,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,mBAAmB,GAAG,mBAAmB,CAAC;QAC/C,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC;IAClE,CAAC;IAED;;;OAGG;IACO,aAAa;QACrB,OAAO,IAAI,CAAC,YAAY,EAAE,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,uCAAuC;QACvC,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,CACpD,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAC3D,CAAC;QAEF,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,6CAA6C,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAC3G,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,kCAAkC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAChG,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;YACvC,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;SAC9C,CAAC;IACJ,CAAC;IAQD;;;OAGG;IACH,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,aAAa;IAmBxB,YAAY,MAAW;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC,6BAA6B;QACrF,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,CAAC;QAChD,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;QACtC,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACpC,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QACxC,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,cAAc,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;QAC9C,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QACxC,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB,CAAC;IACxD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,cAAuB,IAAI;QAC5C,+DAA+D;QAC/D,8CAA8C;QAC9C,IAAI,WAAW,EAAE,CAAC;YAChB,4BAA4B;YAC5B,OAAO,IAAI,CAAC,MAA2B,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,8BAA8B;YAC9B,OAAO,IAAI,CAAC,MAAwB,CAAC;QACvC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,uCAAuC;QACvC,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,CACpD,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAC3D,CAAC;QAEF,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,6CAA6C,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAC3G,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,kCAAkC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAChG,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;YACvC,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;SAC9C,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -26,13 +26,15 @@ interface TraceEntry {
|
|
|
26
26
|
function: string;
|
|
27
27
|
span_id: string;
|
|
28
28
|
depth: number;
|
|
29
|
-
|
|
29
|
+
created_at: number;
|
|
30
30
|
duration?: number;
|
|
31
31
|
output?: any;
|
|
32
32
|
inputs?: Record<string, any>;
|
|
33
33
|
span_type: SpanType;
|
|
34
34
|
parent_span_id?: string;
|
|
35
35
|
evaluation_runs?: any[];
|
|
36
|
+
trace_id?: string;
|
|
37
|
+
message?: string;
|
|
36
38
|
}
|
|
37
39
|
interface TraceSavePayload {
|
|
38
40
|
trace_id: string;
|
|
@@ -49,8 +51,7 @@ interface TraceSavePayload {
|
|
|
49
51
|
total_cost_usd: number;
|
|
50
52
|
};
|
|
51
53
|
entries: CondensedSpanEntry[];
|
|
52
|
-
|
|
53
|
-
empty_save: boolean;
|
|
54
|
+
evaluation_runs: any[];
|
|
54
55
|
overwrite: boolean;
|
|
55
56
|
parent_trace_id?: string | null;
|
|
56
57
|
parent_name?: string | null;
|
|
@@ -59,15 +60,25 @@ interface CondensedSpanEntry {
|
|
|
59
60
|
span_id: string;
|
|
60
61
|
function: string;
|
|
61
62
|
depth: number;
|
|
62
|
-
|
|
63
|
+
created_at: string;
|
|
63
64
|
parent_span_id?: string | null;
|
|
64
65
|
span_type: SpanType;
|
|
65
66
|
inputs: Record<string, any> | null;
|
|
66
67
|
output: any | null;
|
|
67
68
|
evaluation_runs: any[];
|
|
68
69
|
duration: number | null;
|
|
70
|
+
trace_id?: string;
|
|
69
71
|
children?: CondensedSpanEntry[];
|
|
70
72
|
}
|
|
73
|
+
interface TokenCostResponse {
|
|
74
|
+
model: string;
|
|
75
|
+
prompt_tokens: number;
|
|
76
|
+
completion_tokens: number;
|
|
77
|
+
total_tokens: number;
|
|
78
|
+
prompt_tokens_cost_usd: number;
|
|
79
|
+
completion_tokens_cost_usd: number;
|
|
80
|
+
total_cost_usd: number;
|
|
81
|
+
}
|
|
71
82
|
/**
|
|
72
83
|
* Client for interacting with Judgment trace API endpoints.
|
|
73
84
|
*/
|
|
@@ -77,10 +88,20 @@ declare class TraceManagerClient {
|
|
|
77
88
|
constructor(apiKey: string, organizationId: string);
|
|
78
89
|
private _fetch;
|
|
79
90
|
fetchTrace(traceId: string): Promise<any>;
|
|
80
|
-
saveTrace(traceData: TraceSavePayload
|
|
91
|
+
saveTrace(traceData: TraceSavePayload): Promise<any>;
|
|
81
92
|
deleteTrace(traceId: string): Promise<any>;
|
|
82
93
|
deleteTraces(traceIds: string[]): Promise<any>;
|
|
83
94
|
addTraceToEvalQueue(traceData: TraceSavePayload): Promise<any>;
|
|
95
|
+
/**
|
|
96
|
+
* Calculate token costs directly using the API endpoint.
|
|
97
|
+
* This is more accurate than client-side calculation as it uses the most up-to-date pricing.
|
|
98
|
+
*
|
|
99
|
+
* @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
|
|
100
|
+
* @param promptTokens Number of tokens in the prompt/input
|
|
101
|
+
* @param completionTokens Number of tokens in the completion/output
|
|
102
|
+
* @returns Object containing token counts and calculated costs in USD
|
|
103
|
+
*/
|
|
104
|
+
calculateTokenCosts(model: string, promptTokens: number, completionTokens: number): Promise<TokenCostResponse | null>;
|
|
84
105
|
}
|
|
85
106
|
/**
|
|
86
107
|
* Represents an ongoing trace context.
|
|
@@ -100,6 +121,7 @@ declare class TraceClient {
|
|
|
100
121
|
private apiKey;
|
|
101
122
|
private organizationId;
|
|
102
123
|
private originalName;
|
|
124
|
+
private _spanDepths;
|
|
103
125
|
constructor(config: {
|
|
104
126
|
tracer: Tracer;
|
|
105
127
|
traceId?: string;
|
|
@@ -154,14 +176,6 @@ declare class TraceClient {
|
|
|
154
176
|
model?: string;
|
|
155
177
|
logResults?: boolean;
|
|
156
178
|
}): Promise<void>;
|
|
157
|
-
/**
|
|
158
|
-
* Private helper to add an evaluation entry to the trace.
|
|
159
|
-
* This mirrors the structure of Python's add_eval_run.
|
|
160
|
-
*
|
|
161
|
-
* @param evalRunPayload The constructed payload for the evaluation.
|
|
162
|
-
* @param startTime The start time (in seconds) of the evaluation process.
|
|
163
|
-
*/
|
|
164
|
-
private _addEvalRun;
|
|
165
179
|
getOriginalName(): string;
|
|
166
180
|
}
|
|
167
181
|
/**
|
|
@@ -37,7 +37,8 @@ export declare const JUDGMENT_PROJECT_CREATE_API_URL = "https://api.judgmentlabs
|
|
|
37
37
|
export declare const JUDGMENT_TRACES_FETCH_API_URL = "https://api.judgmentlabs.ai/traces/fetch/";
|
|
38
38
|
export declare const JUDGMENT_TRACES_SAVE_API_URL = "https://api.judgmentlabs.ai/traces/save/";
|
|
39
39
|
export declare const JUDGMENT_TRACES_DELETE_API_URL = "https://api.judgmentlabs.ai/traces/delete/";
|
|
40
|
-
export declare const JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/traces/
|
|
40
|
+
export declare const JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/traces/add_to_eval_queue/";
|
|
41
|
+
export declare const JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL = "https://api.judgmentlabs.ai/calculate-token-costs";
|
|
41
42
|
export declare const JUDGMENT_WEBSOCKET_URL: string;
|
|
42
43
|
export declare const JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/add_to_run_eval_queue/";
|
|
43
44
|
export declare const RABBITMQ_HOST: string;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -3,7 +3,6 @@ export { ScoringResult, ScoringResultBuilder, ScorerData, ScoringResultOptions }
|
|
|
3
3
|
export { Tracer, SpanType, wrap, TraceClient } from './common/tracer.js';
|
|
4
4
|
export { Scorer, APIJudgmentScorer, JudgevalScorer, ScorerWrapper } from './scorers/base-scorer.js';
|
|
5
5
|
export { AnswerCorrectnessScorer, AnswerRelevancyScorer, ComparisonScorer, ContextualPrecisionScorer, ContextualRecallScorer, ContextualRelevancyScorer, ExecutionOrderScorer, FaithfulnessScorer, GroundednessScorer, HallucinationScorer, InstructionAdherenceScorer, JsonCorrectnessScorer, SummarizationScorer } from './scorers/api-scorer.js';
|
|
6
|
-
export { ExactMatchScorer } from './scorers/exact-match-scorer.js';
|
|
7
6
|
export { AlertStatus, Condition, NotificationConfig, Rule, AlertResult, RulesEngine } from './rules.js';
|
|
8
7
|
export { EvaluationRun, EvaluationRunOptions } from './evaluation-run.js';
|
|
9
8
|
export { runEval, assertTest, JudgmentAPIError, sendToRabbitMQ, executeApiEval, mergeResults, checkMissingScorerData, checkEvalRunNameExists, logEvaluationResults, checkExamples } from './run-evaluation.js';
|
|
@@ -83,28 +83,6 @@ export declare class JudgmentClient {
|
|
|
83
83
|
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
84
84
|
*/
|
|
85
85
|
pullEval(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
|
|
86
|
-
/**
|
|
87
|
-
* Get evaluation run results (alias for pullEval with a more intuitive name)
|
|
88
|
-
* @param projectName Name of the project
|
|
89
|
-
* @param evalRunName Name of the evaluation run
|
|
90
|
-
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
91
|
-
*/
|
|
92
|
-
getEvalRun(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
|
|
93
|
-
/**
|
|
94
|
-
* List all evaluation runs for a project
|
|
95
|
-
* @param projectName Name of the project
|
|
96
|
-
* @param limit Maximum number of evaluation runs to return (default: 100)
|
|
97
|
-
* @param offset Offset for pagination (default: 0)
|
|
98
|
-
* @returns List of evaluation run metadata
|
|
99
|
-
*/
|
|
100
|
-
listEvalRuns(projectName: string, limit?: number, offset?: number): Promise<Array<Record<string, any>>>;
|
|
101
|
-
/**
|
|
102
|
-
* Get evaluation run statistics
|
|
103
|
-
* @param projectName Name of the project
|
|
104
|
-
* @param evalRunName Name of the evaluation run
|
|
105
|
-
* @returns Statistics for the evaluation run
|
|
106
|
-
*/
|
|
107
|
-
getEvalRunStats(projectName: string, evalRunName: string): Promise<Record<string, any>>;
|
|
108
86
|
/**
|
|
109
87
|
* Export evaluation results to a file format
|
|
110
88
|
* @param projectName Name of the project
|