scorecard-ai 1.0.0-alpha.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +48 -19
- package/client.d.mts +4 -4
- package/client.d.mts.map +1 -1
- package/client.d.ts +4 -4
- package/client.d.ts.map +1 -1
- package/client.js +7 -5
- package/client.js.map +1 -1
- package/client.mjs +7 -5
- package/client.mjs.map +1 -1
- package/internal/tslib.js +6 -6
- package/lib/runAndEvaluate.d.mts +20 -8
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +20 -8
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +31 -21
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +31 -21
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/index.d.mts +2 -2
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +4 -4
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -2
- package/resources/index.mjs.map +1 -1
- package/resources/metrics.d.mts +611 -0
- package/resources/metrics.d.mts.map +1 -0
- package/resources/metrics.d.ts +611 -0
- package/resources/metrics.d.ts.map +1 -0
- package/resources/metrics.js +52 -0
- package/resources/metrics.js.map +1 -0
- package/resources/metrics.mjs +48 -0
- package/resources/metrics.mjs.map +1 -0
- package/resources/runs.d.mts +5 -5
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +5 -5
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -1
- package/resources/runs.mjs +1 -1
- package/resources/systems/index.d.mts +3 -0
- package/resources/systems/index.d.mts.map +1 -0
- package/resources/systems/index.d.ts +3 -0
- package/resources/systems/index.d.ts.map +1 -0
- package/resources/systems/index.js +9 -0
- package/resources/systems/index.js.map +1 -0
- package/resources/systems/index.mjs +4 -0
- package/resources/systems/index.mjs.map +1 -0
- package/resources/systems/systems.d.mts +229 -0
- package/resources/systems/systems.d.mts.map +1 -0
- package/resources/systems/systems.d.ts +229 -0
- package/resources/systems/systems.d.ts.map +1 -0
- package/resources/systems/systems.js +151 -0
- package/resources/systems/systems.js.map +1 -0
- package/resources/systems/systems.mjs +146 -0
- package/resources/systems/systems.mjs.map +1 -0
- package/resources/systems/versions.d.mts +132 -0
- package/resources/systems/versions.d.mts.map +1 -0
- package/resources/systems/versions.d.ts +132 -0
- package/resources/systems/versions.d.ts.map +1 -0
- package/resources/{system-configs.js → systems/versions.js} +26 -25
- package/resources/systems/versions.js.map +1 -0
- package/resources/{system-configs.mjs → systems/versions.mjs} +24 -23
- package/resources/systems/versions.mjs.map +1 -0
- package/resources/systems.d.mts +1 -224
- package/resources/systems.d.mts.map +1 -1
- package/resources/systems.d.ts +1 -224
- package/resources/systems.d.ts.map +1 -1
- package/resources/systems.js +2 -139
- package/resources/systems.js.map +1 -1
- package/resources/systems.mjs +1 -137
- package/resources/systems.mjs.map +1 -1
- package/resources/testsets.d.mts +1 -1
- package/resources/testsets.d.ts +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +22 -27
- package/src/lib/runAndEvaluate.ts +52 -27
- package/src/resources/index.ts +2 -8
- package/src/resources/metrics.ts +768 -0
- package/src/resources/runs.ts +5 -5
- package/src/resources/systems/index.ts +18 -0
- package/src/resources/systems/systems.ts +299 -0
- package/src/resources/systems/versions.ts +166 -0
- package/src/resources/systems.ts +1 -277
- package/src/resources/testsets.ts +1 -1
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.mts.map +1 -1
- package/version.d.ts +1 -1
- package/version.d.ts.map +1 -1
- package/version.js +1 -1
- package/version.js.map +1 -1
- package/version.mjs +1 -1
- package/version.mjs.map +1 -1
- package/resources/system-configs.d.mts +0 -148
- package/resources/system-configs.d.mts.map +0 -1
- package/resources/system-configs.d.ts +0 -148
- package/resources/system-configs.d.ts.map +0 -1
- package/resources/system-configs.js.map +0 -1
- package/resources/system-configs.mjs.map +0 -1
- package/src/resources/system-configs.ts +0 -189
|
@@ -0,0 +1,768 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
import { APIResource } from '../core/resource';
|
|
4
|
+
import { APIPromise } from '../core/api-promise';
|
|
5
|
+
import { RequestOptions } from '../internal/request-options';
|
|
6
|
+
import { path } from '../internal/utils/path';
|
|
7
|
+
|
|
8
|
+
export class Metrics extends APIResource {
|
|
9
|
+
/**
|
|
10
|
+
* Create a new Metric for evaluating system outputs. The structure of a metric
|
|
11
|
+
* depends on the evalType and outputType of the metric.
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* const metric = await client.metrics.create('314', {
|
|
16
|
+
* evalType: 'ai',
|
|
17
|
+
* name: 'Response Accuracy',
|
|
18
|
+
* outputType: 'boolean',
|
|
19
|
+
* promptTemplate:
|
|
20
|
+
* 'Please evaluate if the following response is factually accurate: {{outputs.response}}',
|
|
21
|
+
* description:
|
|
22
|
+
* 'Evaluates if the response is factually accurate',
|
|
23
|
+
* evalModelName: 'gpt-4o',
|
|
24
|
+
* guidelines:
|
|
25
|
+
* 'Check if the response contains factually correct information',
|
|
26
|
+
* temperature: 0.1,
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
create(projectID: string, body: MetricCreateParams, options?: RequestOptions): APIPromise<Metric> {
|
|
31
|
+
return this._client.post(path`/projects/${projectID}/metrics`, { body, ...options });
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Update an existing Metric. You must specify the evalType and outputType of the
|
|
36
|
+
* metric. The structure of a metric depends on the evalType and outputType of the
|
|
37
|
+
* metric.
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```ts
|
|
41
|
+
* const metric = await client.metrics.update('321', {
|
|
42
|
+
* evalType: 'ai',
|
|
43
|
+
* outputType: 'boolean',
|
|
44
|
+
* promptTemplate:
|
|
45
|
+
* 'Using the following guidelines, evaluate the response: {{ guidelines }}\n\nResponse: {{ outputs.response }}\n\nIdeal answer: {{ expected.idealResponse }}',
|
|
46
|
+
* });
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
update(metricID: string, body: MetricUpdateParams, options?: RequestOptions): APIPromise<Metric> {
|
|
50
|
+
return this._client.patch(path`/metrics/${metricID}`, { body, ...options });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* A Metric defines how to evaluate system outputs against expected results.
|
|
56
|
+
*/
|
|
57
|
+
export type Metric =
|
|
58
|
+
| Metric.AIIntMetric
|
|
59
|
+
| Metric.HumanIntMetric
|
|
60
|
+
| Metric.HeuristicIntMetric
|
|
61
|
+
| Metric.AIBooleanMetric
|
|
62
|
+
| Metric.HumanBooleanMetric
|
|
63
|
+
| Metric.HeuristicBooleanMetric;
|
|
64
|
+
|
|
65
|
+
export namespace Metric {
|
|
66
|
+
/**
|
|
67
|
+
* A Metric with AI evaluation and integer output.
|
|
68
|
+
*/
|
|
69
|
+
export interface AIIntMetric {
|
|
70
|
+
/**
|
|
71
|
+
* The ID of the Metric.
|
|
72
|
+
*/
|
|
73
|
+
id: string;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* The description of the Metric.
|
|
77
|
+
*/
|
|
78
|
+
description: string | null;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* The AI model to use for evaluation.
|
|
82
|
+
*/
|
|
83
|
+
evalModelName: string;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* AI-based evaluation type.
|
|
87
|
+
*/
|
|
88
|
+
evalType: 'ai';
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
92
|
+
*/
|
|
93
|
+
guidelines: string | null;
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* The name of the Metric.
|
|
97
|
+
*/
|
|
98
|
+
name: string;
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Integer output type.
|
|
102
|
+
*/
|
|
103
|
+
outputType: 'int';
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
107
|
+
*/
|
|
108
|
+
passingThreshold: number;
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
112
|
+
* dynamic content.
|
|
113
|
+
*/
|
|
114
|
+
promptTemplate: string;
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* The temperature for AI evaluation (0-2).
|
|
118
|
+
*/
|
|
119
|
+
temperature: number;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* A Metric with human evaluation and integer output.
|
|
124
|
+
*/
|
|
125
|
+
export interface HumanIntMetric {
|
|
126
|
+
/**
|
|
127
|
+
* The ID of the Metric.
|
|
128
|
+
*/
|
|
129
|
+
id: string;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* The description of the Metric.
|
|
133
|
+
*/
|
|
134
|
+
description: string | null;
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Human-based evaluation type.
|
|
138
|
+
*/
|
|
139
|
+
evalType: 'human';
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* The name of the Metric.
|
|
143
|
+
*/
|
|
144
|
+
name: string;
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Integer output type.
|
|
148
|
+
*/
|
|
149
|
+
outputType: 'int';
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
153
|
+
*/
|
|
154
|
+
passingThreshold: number;
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Guidelines for human evaluators.
|
|
158
|
+
*/
|
|
159
|
+
guidelines?: string;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* A Metric with heuristic evaluation and integer output.
|
|
164
|
+
*/
|
|
165
|
+
export interface HeuristicIntMetric {
|
|
166
|
+
/**
|
|
167
|
+
* The ID of the Metric.
|
|
168
|
+
*/
|
|
169
|
+
id: string;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* The description of the Metric.
|
|
173
|
+
*/
|
|
174
|
+
description: string | null;
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Heuristic-based evaluation type.
|
|
178
|
+
*/
|
|
179
|
+
evalType: 'heuristic';
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* The name of the Metric.
|
|
183
|
+
*/
|
|
184
|
+
name: string;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Integer output type.
|
|
188
|
+
*/
|
|
189
|
+
outputType: 'int';
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
193
|
+
*/
|
|
194
|
+
passingThreshold: number;
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
198
|
+
*/
|
|
199
|
+
guidelines?: string;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* A Metric with AI evaluation and boolean output.
|
|
204
|
+
*/
|
|
205
|
+
export interface AIBooleanMetric {
|
|
206
|
+
/**
|
|
207
|
+
* The ID of the Metric.
|
|
208
|
+
*/
|
|
209
|
+
id: string;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* The description of the Metric.
|
|
213
|
+
*/
|
|
214
|
+
description: string | null;
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* The AI model to use for evaluation.
|
|
218
|
+
*/
|
|
219
|
+
evalModelName: string;
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* AI-based evaluation type.
|
|
223
|
+
*/
|
|
224
|
+
evalType: 'ai';
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
228
|
+
*/
|
|
229
|
+
guidelines: string | null;
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* The name of the Metric.
|
|
233
|
+
*/
|
|
234
|
+
name: string;
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Boolean output type.
|
|
238
|
+
*/
|
|
239
|
+
outputType: 'boolean';
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
243
|
+
* dynamic content.
|
|
244
|
+
*/
|
|
245
|
+
promptTemplate: string;
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* The temperature for AI evaluation (0-2).
|
|
249
|
+
*/
|
|
250
|
+
temperature: number;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* A Metric with human evaluation and boolean output.
|
|
255
|
+
*/
|
|
256
|
+
export interface HumanBooleanMetric {
|
|
257
|
+
/**
|
|
258
|
+
* The ID of the Metric.
|
|
259
|
+
*/
|
|
260
|
+
id: string;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* The description of the Metric.
|
|
264
|
+
*/
|
|
265
|
+
description: string | null;
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Human-based evaluation type.
|
|
269
|
+
*/
|
|
270
|
+
evalType: 'human';
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* The name of the Metric.
|
|
274
|
+
*/
|
|
275
|
+
name: string;
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Boolean output type.
|
|
279
|
+
*/
|
|
280
|
+
outputType: 'boolean';
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Guidelines for human evaluators.
|
|
284
|
+
*/
|
|
285
|
+
guidelines?: string;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* A Metric with heuristic evaluation and boolean output.
|
|
290
|
+
*/
|
|
291
|
+
export interface HeuristicBooleanMetric {
|
|
292
|
+
/**
|
|
293
|
+
* The ID of the Metric.
|
|
294
|
+
*/
|
|
295
|
+
id: string;
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* The description of the Metric.
|
|
299
|
+
*/
|
|
300
|
+
description: string | null;
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Heuristic-based evaluation type.
|
|
304
|
+
*/
|
|
305
|
+
evalType: 'heuristic';
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* The name of the Metric.
|
|
309
|
+
*/
|
|
310
|
+
name: string;
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Boolean output type.
|
|
314
|
+
*/
|
|
315
|
+
outputType: 'boolean';
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
319
|
+
*/
|
|
320
|
+
guidelines?: string;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
export type MetricCreateParams =
|
|
325
|
+
| MetricCreateParams.AIIntMetric
|
|
326
|
+
| MetricCreateParams.HumanIntMetric
|
|
327
|
+
| MetricCreateParams.HeuristicIntMetric
|
|
328
|
+
| MetricCreateParams.AIBooleanMetric
|
|
329
|
+
| MetricCreateParams.HumanBooleanMetric
|
|
330
|
+
| MetricCreateParams.HeuristicBooleanMetric;
|
|
331
|
+
|
|
332
|
+
export declare namespace MetricCreateParams {
|
|
333
|
+
export interface AIIntMetric {
|
|
334
|
+
/**
|
|
335
|
+
* AI-based evaluation type.
|
|
336
|
+
*/
|
|
337
|
+
evalType: 'ai';
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* The name of the Metric.
|
|
341
|
+
*/
|
|
342
|
+
name: string;
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Integer output type.
|
|
346
|
+
*/
|
|
347
|
+
outputType: 'int';
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
351
|
+
* dynamic content.
|
|
352
|
+
*/
|
|
353
|
+
promptTemplate: string;
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* The description of the Metric.
|
|
357
|
+
*/
|
|
358
|
+
description?: string | null;
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* The AI model to use for evaluation.
|
|
362
|
+
*/
|
|
363
|
+
evalModelName?: string;
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
367
|
+
*/
|
|
368
|
+
guidelines?: string | null;
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
372
|
+
*/
|
|
373
|
+
passingThreshold?: number;
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* The temperature for AI evaluation (0-2).
|
|
377
|
+
*/
|
|
378
|
+
temperature?: number;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
export interface HumanIntMetric {
|
|
382
|
+
/**
|
|
383
|
+
* Human-based evaluation type.
|
|
384
|
+
*/
|
|
385
|
+
evalType: 'human';
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* The name of the Metric.
|
|
389
|
+
*/
|
|
390
|
+
name: string;
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Integer output type.
|
|
394
|
+
*/
|
|
395
|
+
outputType: 'int';
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* The description of the Metric.
|
|
399
|
+
*/
|
|
400
|
+
description?: string | null;
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Guidelines for human evaluators.
|
|
404
|
+
*/
|
|
405
|
+
guidelines?: string;
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
409
|
+
*/
|
|
410
|
+
passingThreshold?: number;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
export interface HeuristicIntMetric {
|
|
414
|
+
/**
|
|
415
|
+
* Heuristic-based evaluation type.
|
|
416
|
+
*/
|
|
417
|
+
evalType: 'heuristic';
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* The name of the Metric.
|
|
421
|
+
*/
|
|
422
|
+
name: string;
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Integer output type.
|
|
426
|
+
*/
|
|
427
|
+
outputType: 'int';
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* The description of the Metric.
|
|
431
|
+
*/
|
|
432
|
+
description?: string | null;
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
436
|
+
*/
|
|
437
|
+
guidelines?: string;
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
441
|
+
*/
|
|
442
|
+
passingThreshold?: number;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
export interface AIBooleanMetric {
|
|
446
|
+
/**
|
|
447
|
+
* AI-based evaluation type.
|
|
448
|
+
*/
|
|
449
|
+
evalType: 'ai';
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* The name of the Metric.
|
|
453
|
+
*/
|
|
454
|
+
name: string;
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Boolean output type.
|
|
458
|
+
*/
|
|
459
|
+
outputType: 'boolean';
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
463
|
+
* dynamic content.
|
|
464
|
+
*/
|
|
465
|
+
promptTemplate: string;
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* The description of the Metric.
|
|
469
|
+
*/
|
|
470
|
+
description?: string | null;
|
|
471
|
+
|
|
472
|
+
/**
|
|
473
|
+
* The AI model to use for evaluation.
|
|
474
|
+
*/
|
|
475
|
+
evalModelName?: string;
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
479
|
+
*/
|
|
480
|
+
guidelines?: string | null;
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* The temperature for AI evaluation (0-2).
|
|
484
|
+
*/
|
|
485
|
+
temperature?: number;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
export interface HumanBooleanMetric {
|
|
489
|
+
/**
|
|
490
|
+
* Human-based evaluation type.
|
|
491
|
+
*/
|
|
492
|
+
evalType: 'human';
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* The name of the Metric.
|
|
496
|
+
*/
|
|
497
|
+
name: string;
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Boolean output type.
|
|
501
|
+
*/
|
|
502
|
+
outputType: 'boolean';
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* The description of the Metric.
|
|
506
|
+
*/
|
|
507
|
+
description?: string | null;
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* Guidelines for human evaluators.
|
|
511
|
+
*/
|
|
512
|
+
guidelines?: string;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
export interface HeuristicBooleanMetric {
|
|
516
|
+
/**
|
|
517
|
+
* Heuristic-based evaluation type.
|
|
518
|
+
*/
|
|
519
|
+
evalType: 'heuristic';
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* The name of the Metric.
|
|
523
|
+
*/
|
|
524
|
+
name: string;
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Boolean output type.
|
|
528
|
+
*/
|
|
529
|
+
outputType: 'boolean';
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* The description of the Metric.
|
|
533
|
+
*/
|
|
534
|
+
description?: string | null;
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
538
|
+
*/
|
|
539
|
+
guidelines?: string;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
export type MetricUpdateParams =
|
|
544
|
+
| MetricUpdateParams.AIIntMetric
|
|
545
|
+
| MetricUpdateParams.HumanIntMetric
|
|
546
|
+
| MetricUpdateParams.HeuristicIntMetric
|
|
547
|
+
| MetricUpdateParams.AIBooleanMetric
|
|
548
|
+
| MetricUpdateParams.HumanBooleanMetric
|
|
549
|
+
| MetricUpdateParams.HeuristicBooleanMetric;
|
|
550
|
+
|
|
551
|
+
export declare namespace MetricUpdateParams {
|
|
552
|
+
export interface AIIntMetric {
|
|
553
|
+
/**
|
|
554
|
+
* AI-based evaluation type.
|
|
555
|
+
*/
|
|
556
|
+
evalType: 'ai';
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Integer output type.
|
|
560
|
+
*/
|
|
561
|
+
outputType: 'int';
|
|
562
|
+
|
|
563
|
+
/**
|
|
564
|
+
* The description of the Metric.
|
|
565
|
+
*/
|
|
566
|
+
description?: string | null;
|
|
567
|
+
|
|
568
|
+
/**
|
|
569
|
+
* The AI model to use for evaluation.
|
|
570
|
+
*/
|
|
571
|
+
evalModelName?: string;
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
575
|
+
*/
|
|
576
|
+
guidelines?: string | null;
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* The name of the Metric.
|
|
580
|
+
*/
|
|
581
|
+
name?: string;
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
585
|
+
*/
|
|
586
|
+
passingThreshold?: number;
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
590
|
+
* dynamic content.
|
|
591
|
+
*/
|
|
592
|
+
promptTemplate?: string;
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* The temperature for AI evaluation (0-2).
|
|
596
|
+
*/
|
|
597
|
+
temperature?: number;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
export interface HumanIntMetric {
|
|
601
|
+
/**
|
|
602
|
+
* Human-based evaluation type.
|
|
603
|
+
*/
|
|
604
|
+
evalType: 'human';
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Integer output type.
|
|
608
|
+
*/
|
|
609
|
+
outputType: 'int';
|
|
610
|
+
|
|
611
|
+
/**
|
|
612
|
+
* The description of the Metric.
|
|
613
|
+
*/
|
|
614
|
+
description?: string | null;
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Guidelines for human evaluators.
|
|
618
|
+
*/
|
|
619
|
+
guidelines?: string;
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* The name of the Metric.
|
|
623
|
+
*/
|
|
624
|
+
name?: string;
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
628
|
+
*/
|
|
629
|
+
passingThreshold?: number;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
export interface HeuristicIntMetric {
|
|
633
|
+
/**
|
|
634
|
+
* Heuristic-based evaluation type.
|
|
635
|
+
*/
|
|
636
|
+
evalType: 'heuristic';
|
|
637
|
+
|
|
638
|
+
/**
|
|
639
|
+
* Integer output type.
|
|
640
|
+
*/
|
|
641
|
+
outputType: 'int';
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* The description of the Metric.
|
|
645
|
+
*/
|
|
646
|
+
description?: string | null;
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
650
|
+
*/
|
|
651
|
+
guidelines?: string;
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* The name of the Metric.
|
|
655
|
+
*/
|
|
656
|
+
name?: string;
|
|
657
|
+
|
|
658
|
+
/**
|
|
659
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
660
|
+
*/
|
|
661
|
+
passingThreshold?: number;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
export interface AIBooleanMetric {
|
|
665
|
+
/**
|
|
666
|
+
* AI-based evaluation type.
|
|
667
|
+
*/
|
|
668
|
+
evalType: 'ai';
|
|
669
|
+
|
|
670
|
+
/**
|
|
671
|
+
* Boolean output type.
|
|
672
|
+
*/
|
|
673
|
+
outputType: 'boolean';
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* The description of the Metric.
|
|
677
|
+
*/
|
|
678
|
+
description?: string | null;
|
|
679
|
+
|
|
680
|
+
/**
|
|
681
|
+
* The AI model to use for evaluation.
|
|
682
|
+
*/
|
|
683
|
+
evalModelName?: string;
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
687
|
+
*/
|
|
688
|
+
guidelines?: string | null;
|
|
689
|
+
|
|
690
|
+
/**
|
|
691
|
+
* The name of the Metric.
|
|
692
|
+
*/
|
|
693
|
+
name?: string;
|
|
694
|
+
|
|
695
|
+
/**
|
|
696
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
697
|
+
* dynamic content.
|
|
698
|
+
*/
|
|
699
|
+
promptTemplate?: string;
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* The temperature for AI evaluation (0-2).
|
|
703
|
+
*/
|
|
704
|
+
temperature?: number;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
export interface HumanBooleanMetric {
|
|
708
|
+
/**
|
|
709
|
+
* Human-based evaluation type.
|
|
710
|
+
*/
|
|
711
|
+
evalType: 'human';
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Boolean output type.
|
|
715
|
+
*/
|
|
716
|
+
outputType: 'boolean';
|
|
717
|
+
|
|
718
|
+
/**
|
|
719
|
+
* The description of the Metric.
|
|
720
|
+
*/
|
|
721
|
+
description?: string | null;
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Guidelines for human evaluators.
|
|
725
|
+
*/
|
|
726
|
+
guidelines?: string;
|
|
727
|
+
|
|
728
|
+
/**
|
|
729
|
+
* The name of the Metric.
|
|
730
|
+
*/
|
|
731
|
+
name?: string;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
export interface HeuristicBooleanMetric {
|
|
735
|
+
/**
|
|
736
|
+
* Heuristic-based evaluation type.
|
|
737
|
+
*/
|
|
738
|
+
evalType: 'heuristic';
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Boolean output type.
|
|
742
|
+
*/
|
|
743
|
+
outputType: 'boolean';
|
|
744
|
+
|
|
745
|
+
/**
|
|
746
|
+
* The description of the Metric.
|
|
747
|
+
*/
|
|
748
|
+
description?: string | null;
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
752
|
+
*/
|
|
753
|
+
guidelines?: string;
|
|
754
|
+
|
|
755
|
+
/**
|
|
756
|
+
* The name of the Metric.
|
|
757
|
+
*/
|
|
758
|
+
name?: string;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
export declare namespace Metrics {
|
|
763
|
+
export {
|
|
764
|
+
type Metric as Metric,
|
|
765
|
+
type MetricCreateParams as MetricCreateParams,
|
|
766
|
+
type MetricUpdateParams as MetricUpdateParams,
|
|
767
|
+
};
|
|
768
|
+
}
|