scorecard-ai 1.0.0-alpha.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +25 -37
- package/client.d.mts +5 -4
- package/client.d.mts.map +1 -1
- package/client.d.ts +5 -4
- package/client.d.ts.map +1 -1
- package/client.js +19 -4
- package/client.js.map +1 -1
- package/client.mjs +19 -4
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +1 -1
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +1 -1
- package/core/pagination.d.ts.map +1 -1
- package/index.d.mts +1 -0
- package/index.d.mts.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +3 -1
- package/index.js.map +1 -1
- package/index.mjs +1 -0
- package/index.mjs.map +1 -1
- package/internal/detect-platform.js +3 -3
- package/internal/detect-platform.js.map +1 -1
- package/internal/detect-platform.mjs +3 -3
- package/internal/detect-platform.mjs.map +1 -1
- package/internal/shim-types.d.mts +11 -22
- package/internal/shim-types.d.mts.map +1 -0
- package/internal/shim-types.d.ts +11 -22
- package/internal/shim-types.d.ts.map +1 -0
- package/internal/shim-types.js +4 -0
- package/internal/shim-types.js.map +1 -0
- package/internal/shim-types.mjs +3 -0
- package/internal/shim-types.mjs.map +1 -0
- package/internal/shims.d.mts +2 -2
- package/internal/shims.d.mts.map +1 -1
- package/internal/shims.d.ts +2 -2
- package/internal/shims.d.ts.map +1 -1
- package/internal/tslib.js +6 -6
- package/internal/uploads.js.map +1 -1
- package/internal/uploads.mjs.map +1 -1
- package/lib/runAndEvaluate.d.mts +62 -10
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +62 -10
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +72 -19
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +72 -19
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -4
- package/resources/index.d.mts +2 -2
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +4 -4
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -2
- package/resources/index.mjs.map +1 -1
- package/resources/metrics.d.mts +421 -0
- package/resources/metrics.d.mts.map +1 -0
- package/resources/metrics.d.ts +421 -0
- package/resources/metrics.d.ts.map +1 -0
- package/resources/metrics.js +33 -0
- package/resources/metrics.js.map +1 -0
- package/resources/metrics.mjs +29 -0
- package/resources/metrics.mjs.map +1 -0
- package/resources/runs.d.mts +8 -8
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +8 -8
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -1
- package/resources/runs.mjs +1 -1
- package/resources/systems/index.d.mts +3 -0
- package/resources/systems/index.d.mts.map +1 -0
- package/resources/systems/index.d.ts +3 -0
- package/resources/systems/index.d.ts.map +1 -0
- package/resources/systems/index.js +9 -0
- package/resources/systems/index.js.map +1 -0
- package/resources/systems/index.mjs +4 -0
- package/resources/systems/index.mjs.map +1 -0
- package/resources/systems/systems.d.mts +229 -0
- package/resources/systems/systems.d.mts.map +1 -0
- package/resources/systems/systems.d.ts +229 -0
- package/resources/systems/systems.d.ts.map +1 -0
- package/resources/systems/systems.js +151 -0
- package/resources/systems/systems.js.map +1 -0
- package/resources/systems/systems.mjs +146 -0
- package/resources/systems/systems.mjs.map +1 -0
- package/resources/systems/versions.d.mts +132 -0
- package/resources/systems/versions.d.mts.map +1 -0
- package/resources/systems/versions.d.ts +132 -0
- package/resources/systems/versions.d.ts.map +1 -0
- package/resources/systems/versions.js +82 -0
- package/resources/systems/versions.js.map +1 -0
- package/resources/systems/versions.mjs +78 -0
- package/resources/systems/versions.mjs.map +1 -0
- package/resources/systems.d.mts +1 -224
- package/resources/systems.d.mts.map +1 -1
- package/resources/systems.d.ts +1 -224
- package/resources/systems.d.ts.map +1 -1
- package/resources/systems.js +2 -139
- package/resources/systems.js.map +1 -1
- package/resources/systems.mjs +1 -137
- package/resources/systems.mjs.map +1 -1
- package/resources/testsets.d.mts +1 -1
- package/resources/testsets.d.ts +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +28 -28
- package/src/core/pagination.ts +1 -1
- package/src/index.ts +2 -0
- package/src/internal/detect-platform.ts +3 -3
- package/src/internal/shim-types.ts +26 -0
- package/src/internal/shims.ts +2 -2
- package/src/internal/uploads.ts +1 -1
- package/src/lib/runAndEvaluate.ts +159 -31
- package/src/resources/index.ts +2 -9
- package/src/resources/metrics.ts +525 -0
- package/src/resources/runs.ts +8 -8
- package/src/resources/systems/index.ts +18 -0
- package/src/resources/systems/systems.ts +299 -0
- package/src/resources/systems/versions.ts +166 -0
- package/src/resources/systems.ts +1 -277
- package/src/resources/testsets.ts +1 -1
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.mts.map +1 -1
- package/version.d.ts +1 -1
- package/version.d.ts.map +1 -1
- package/version.js +1 -1
- package/version.js.map +1 -1
- package/version.mjs +1 -1
- package/version.mjs.map +1 -1
- package/resources/system-configs.d.mts +0 -155
- package/resources/system-configs.d.mts.map +0 -1
- package/resources/system-configs.d.ts +0 -155
- package/resources/system-configs.d.ts.map +0 -1
- package/resources/system-configs.js +0 -83
- package/resources/system-configs.js.map +0 -1
- package/resources/system-configs.mjs +0 -79
- package/resources/system-configs.mjs.map +0 -1
- package/src/internal/shim-types.d.ts +0 -28
- package/src/resources/system-configs.ts +0 -203
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
import { APIResource } from '../core/resource';
|
|
4
|
+
import { APIPromise } from '../core/api-promise';
|
|
5
|
+
import { RequestOptions } from '../internal/request-options';
|
|
6
|
+
import { path } from '../internal/utils/path';
|
|
7
|
+
|
|
8
|
+
export class Metrics extends APIResource {
|
|
9
|
+
/**
|
|
10
|
+
* Create a new Metric for evaluating system outputs.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```ts
|
|
14
|
+
* const metric = await client.metrics.create('314', {
|
|
15
|
+
* evalType: 'ai',
|
|
16
|
+
* name: 'Response Accuracy',
|
|
17
|
+
* outputType: 'boolean',
|
|
18
|
+
* promptTemplate:
|
|
19
|
+
* 'Please evaluate if the following response is factually accurate: {{outputs.response}}',
|
|
20
|
+
* description:
|
|
21
|
+
* 'Evaluates if the response is factually accurate',
|
|
22
|
+
* evalModelName: 'gpt-4o',
|
|
23
|
+
* guidelines:
|
|
24
|
+
* 'Check if the response contains factually correct information',
|
|
25
|
+
* temperature: 0.1,
|
|
26
|
+
* });
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
create(projectID: string, body: MetricCreateParams, options?: RequestOptions): APIPromise<Metric> {
|
|
30
|
+
return this._client.post(path`/projects/${projectID}/metrics`, { body, ...options });
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* A Metric defines how to evaluate system outputs against expected results.
|
|
36
|
+
*/
|
|
37
|
+
export type Metric =
|
|
38
|
+
| Metric.AIIntMetric
|
|
39
|
+
| Metric.HumanIntMetric
|
|
40
|
+
| Metric.HeuristicIntMetric
|
|
41
|
+
| Metric.AIBooleanMetric
|
|
42
|
+
| Metric.HumanBooleanMetric
|
|
43
|
+
| Metric.HeuristicBooleanMetric;
|
|
44
|
+
|
|
45
|
+
export namespace Metric {
|
|
46
|
+
/**
|
|
47
|
+
* A Metric with AI evaluation and integer output.
|
|
48
|
+
*/
|
|
49
|
+
export interface AIIntMetric {
|
|
50
|
+
/**
|
|
51
|
+
* The ID of the Metric.
|
|
52
|
+
*/
|
|
53
|
+
id: string;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* The description of the Metric.
|
|
57
|
+
*/
|
|
58
|
+
description: string | null;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* The AI model to use for evaluation.
|
|
62
|
+
*/
|
|
63
|
+
evalModelName: string;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* AI-based evaluation type.
|
|
67
|
+
*/
|
|
68
|
+
evalType: 'ai';
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
72
|
+
*/
|
|
73
|
+
guidelines: string | null;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* The name of the Metric.
|
|
77
|
+
*/
|
|
78
|
+
name: string;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Integer output type.
|
|
82
|
+
*/
|
|
83
|
+
outputType: 'int';
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
87
|
+
*/
|
|
88
|
+
passingThreshold: number;
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
92
|
+
* dynamic content.
|
|
93
|
+
*/
|
|
94
|
+
promptTemplate: string;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* The temperature for AI evaluation (0-2).
|
|
98
|
+
*/
|
|
99
|
+
temperature: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* A Metric with human evaluation and integer output.
|
|
104
|
+
*/
|
|
105
|
+
export interface HumanIntMetric {
|
|
106
|
+
/**
|
|
107
|
+
* The ID of the Metric.
|
|
108
|
+
*/
|
|
109
|
+
id: string;
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* The description of the Metric.
|
|
113
|
+
*/
|
|
114
|
+
description: string | null;
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Human-based evaluation type.
|
|
118
|
+
*/
|
|
119
|
+
evalType: 'human';
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* The name of the Metric.
|
|
123
|
+
*/
|
|
124
|
+
name: string;
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Integer output type.
|
|
128
|
+
*/
|
|
129
|
+
outputType: 'int';
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
133
|
+
*/
|
|
134
|
+
passingThreshold: number;
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Guidelines for human evaluators.
|
|
138
|
+
*/
|
|
139
|
+
guidelines?: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* A Metric with heuristic evaluation and integer output.
|
|
144
|
+
*/
|
|
145
|
+
export interface HeuristicIntMetric {
|
|
146
|
+
/**
|
|
147
|
+
* The ID of the Metric.
|
|
148
|
+
*/
|
|
149
|
+
id: string;
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* The description of the Metric.
|
|
153
|
+
*/
|
|
154
|
+
description: string | null;
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Heuristic-based evaluation type.
|
|
158
|
+
*/
|
|
159
|
+
evalType: 'heuristic';
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* The name of the Metric.
|
|
163
|
+
*/
|
|
164
|
+
name: string;
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Integer output type.
|
|
168
|
+
*/
|
|
169
|
+
outputType: 'int';
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
173
|
+
*/
|
|
174
|
+
passingThreshold: number;
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
178
|
+
*/
|
|
179
|
+
guidelines?: string;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* A Metric with AI evaluation and boolean output.
|
|
184
|
+
*/
|
|
185
|
+
export interface AIBooleanMetric {
|
|
186
|
+
/**
|
|
187
|
+
* The ID of the Metric.
|
|
188
|
+
*/
|
|
189
|
+
id: string;
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* The description of the Metric.
|
|
193
|
+
*/
|
|
194
|
+
description: string | null;
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* The AI model to use for evaluation.
|
|
198
|
+
*/
|
|
199
|
+
evalModelName: string;
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* AI-based evaluation type.
|
|
203
|
+
*/
|
|
204
|
+
evalType: 'ai';
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
208
|
+
*/
|
|
209
|
+
guidelines: string | null;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* The name of the Metric.
|
|
213
|
+
*/
|
|
214
|
+
name: string;
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Boolean output type.
|
|
218
|
+
*/
|
|
219
|
+
outputType: 'boolean';
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
223
|
+
* dynamic content.
|
|
224
|
+
*/
|
|
225
|
+
promptTemplate: string;
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* The temperature for AI evaluation (0-2).
|
|
229
|
+
*/
|
|
230
|
+
temperature: number;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* A Metric with human evaluation and boolean output.
|
|
235
|
+
*/
|
|
236
|
+
export interface HumanBooleanMetric {
|
|
237
|
+
/**
|
|
238
|
+
* The ID of the Metric.
|
|
239
|
+
*/
|
|
240
|
+
id: string;
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* The description of the Metric.
|
|
244
|
+
*/
|
|
245
|
+
description: string | null;
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Human-based evaluation type.
|
|
249
|
+
*/
|
|
250
|
+
evalType: 'human';
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* The name of the Metric.
|
|
254
|
+
*/
|
|
255
|
+
name: string;
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Boolean output type.
|
|
259
|
+
*/
|
|
260
|
+
outputType: 'boolean';
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Guidelines for human evaluators.
|
|
264
|
+
*/
|
|
265
|
+
guidelines?: string;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* A Metric with heuristic evaluation and boolean output.
|
|
270
|
+
*/
|
|
271
|
+
export interface HeuristicBooleanMetric {
|
|
272
|
+
/**
|
|
273
|
+
* The ID of the Metric.
|
|
274
|
+
*/
|
|
275
|
+
id: string;
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* The description of the Metric.
|
|
279
|
+
*/
|
|
280
|
+
description: string | null;
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Heuristic-based evaluation type.
|
|
284
|
+
*/
|
|
285
|
+
evalType: 'heuristic';
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* The name of the Metric.
|
|
289
|
+
*/
|
|
290
|
+
name: string;
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Boolean output type.
|
|
294
|
+
*/
|
|
295
|
+
outputType: 'boolean';
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
299
|
+
*/
|
|
300
|
+
guidelines?: string;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
export type MetricCreateParams =
|
|
305
|
+
| MetricCreateParams.AIIntMetric
|
|
306
|
+
| MetricCreateParams.HumanIntMetric
|
|
307
|
+
| MetricCreateParams.HeuristicIntMetric
|
|
308
|
+
| MetricCreateParams.AIBooleanMetric
|
|
309
|
+
| MetricCreateParams.HumanBooleanMetric
|
|
310
|
+
| MetricCreateParams.HeuristicBooleanMetric;
|
|
311
|
+
|
|
312
|
+
export declare namespace MetricCreateParams {
|
|
313
|
+
export interface AIIntMetric {
|
|
314
|
+
/**
|
|
315
|
+
* AI-based evaluation type.
|
|
316
|
+
*/
|
|
317
|
+
evalType: 'ai';
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* The name of the Metric.
|
|
321
|
+
*/
|
|
322
|
+
name: string;
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Integer output type.
|
|
326
|
+
*/
|
|
327
|
+
outputType: 'int';
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
331
|
+
* dynamic content.
|
|
332
|
+
*/
|
|
333
|
+
promptTemplate: string;
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* The description of the Metric.
|
|
337
|
+
*/
|
|
338
|
+
description?: string | null;
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* The AI model to use for evaluation.
|
|
342
|
+
*/
|
|
343
|
+
evalModelName?: string;
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
347
|
+
*/
|
|
348
|
+
guidelines?: string | null;
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
352
|
+
*/
|
|
353
|
+
passingThreshold?: number;
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* The temperature for AI evaluation (0-2).
|
|
357
|
+
*/
|
|
358
|
+
temperature?: number;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
export interface HumanIntMetric {
|
|
362
|
+
/**
|
|
363
|
+
* Human-based evaluation type.
|
|
364
|
+
*/
|
|
365
|
+
evalType: 'human';
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* The name of the Metric.
|
|
369
|
+
*/
|
|
370
|
+
name: string;
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Integer output type.
|
|
374
|
+
*/
|
|
375
|
+
outputType: 'int';
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* The description of the Metric.
|
|
379
|
+
*/
|
|
380
|
+
description?: string | null;
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Guidelines for human evaluators.
|
|
384
|
+
*/
|
|
385
|
+
guidelines?: string;
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
389
|
+
*/
|
|
390
|
+
passingThreshold?: number;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
export interface HeuristicIntMetric {
|
|
394
|
+
/**
|
|
395
|
+
* Heuristic-based evaluation type.
|
|
396
|
+
*/
|
|
397
|
+
evalType: 'heuristic';
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* The name of the Metric.
|
|
401
|
+
*/
|
|
402
|
+
name: string;
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Integer output type.
|
|
406
|
+
*/
|
|
407
|
+
outputType: 'int';
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* The description of the Metric.
|
|
411
|
+
*/
|
|
412
|
+
description?: string | null;
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
416
|
+
*/
|
|
417
|
+
guidelines?: string;
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* The threshold for determining pass/fail from integer scores (1-5).
|
|
421
|
+
*/
|
|
422
|
+
passingThreshold?: number;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
export interface AIBooleanMetric {
|
|
426
|
+
/**
|
|
427
|
+
* AI-based evaluation type.
|
|
428
|
+
*/
|
|
429
|
+
evalType: 'ai';
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* The name of the Metric.
|
|
433
|
+
*/
|
|
434
|
+
name: string;
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Boolean output type.
|
|
438
|
+
*/
|
|
439
|
+
outputType: 'boolean';
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
443
|
+
* dynamic content.
|
|
444
|
+
*/
|
|
445
|
+
promptTemplate: string;
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* The description of the Metric.
|
|
449
|
+
*/
|
|
450
|
+
description?: string | null;
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* The AI model to use for evaluation.
|
|
454
|
+
*/
|
|
455
|
+
evalModelName?: string;
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
459
|
+
*/
|
|
460
|
+
guidelines?: string | null;
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* The temperature for AI evaluation (0-2).
|
|
464
|
+
*/
|
|
465
|
+
temperature?: number;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
export interface HumanBooleanMetric {
|
|
469
|
+
/**
|
|
470
|
+
* Human-based evaluation type.
|
|
471
|
+
*/
|
|
472
|
+
evalType: 'human';
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* The name of the Metric.
|
|
476
|
+
*/
|
|
477
|
+
name: string;
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Boolean output type.
|
|
481
|
+
*/
|
|
482
|
+
outputType: 'boolean';
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* The description of the Metric.
|
|
486
|
+
*/
|
|
487
|
+
description?: string | null;
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Guidelines for human evaluators.
|
|
491
|
+
*/
|
|
492
|
+
guidelines?: string;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
export interface HeuristicBooleanMetric {
|
|
496
|
+
/**
|
|
497
|
+
* Heuristic-based evaluation type.
|
|
498
|
+
*/
|
|
499
|
+
evalType: 'heuristic';
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* The name of the Metric.
|
|
503
|
+
*/
|
|
504
|
+
name: string;
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Boolean output type.
|
|
508
|
+
*/
|
|
509
|
+
outputType: 'boolean';
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* The description of the Metric.
|
|
513
|
+
*/
|
|
514
|
+
description?: string | null;
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Optional guidelines for heuristic evaluation logic.
|
|
518
|
+
*/
|
|
519
|
+
guidelines?: string;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
export declare namespace Metrics {
|
|
524
|
+
export { type Metric as Metric, type MetricCreateParams as MetricCreateParams };
|
|
525
|
+
}
|
package/src/resources/runs.ts
CHANGED
|
@@ -13,8 +13,8 @@ export class Runs extends APIResource {
|
|
|
13
13
|
* ```ts
|
|
14
14
|
* const run = await client.runs.create('314', {
|
|
15
15
|
* metricIds: ['789', '101'],
|
|
16
|
+
* systemVersionId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
16
17
|
* testsetId: '246',
|
|
17
|
-
* systemConfigId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
18
18
|
* });
|
|
19
19
|
* ```
|
|
20
20
|
*/
|
|
@@ -52,12 +52,12 @@ export interface Run {
|
|
|
52
52
|
/**
|
|
53
53
|
* The ID of the Testset this Run is testing.
|
|
54
54
|
*/
|
|
55
|
-
testsetId: string;
|
|
55
|
+
testsetId: string | null;
|
|
56
56
|
|
|
57
57
|
/**
|
|
58
|
-
* The ID of the system
|
|
58
|
+
* The ID of the system version this Run is using.
|
|
59
59
|
*/
|
|
60
|
-
|
|
60
|
+
systemVersionId?: string;
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
export interface RunCreateParams {
|
|
@@ -67,14 +67,14 @@ export interface RunCreateParams {
|
|
|
67
67
|
metricIds: Array<string>;
|
|
68
68
|
|
|
69
69
|
/**
|
|
70
|
-
* The ID of the
|
|
70
|
+
* The ID of the system version this Run is using.
|
|
71
71
|
*/
|
|
72
|
-
|
|
72
|
+
systemVersionId?: string;
|
|
73
73
|
|
|
74
74
|
/**
|
|
75
|
-
* The ID of the
|
|
75
|
+
* The ID of the Testset this Run is testing.
|
|
76
76
|
*/
|
|
77
|
-
|
|
77
|
+
testsetId?: string | null;
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
export declare namespace Runs {
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
export {
|
|
4
|
+
Systems,
|
|
5
|
+
type System,
|
|
6
|
+
type SystemDeleteResponse,
|
|
7
|
+
type SystemCreateParams,
|
|
8
|
+
type SystemUpdateParams,
|
|
9
|
+
type SystemListParams,
|
|
10
|
+
type SystemsPaginatedResponse,
|
|
11
|
+
} from './systems';
|
|
12
|
+
export {
|
|
13
|
+
Versions,
|
|
14
|
+
type SystemVersion,
|
|
15
|
+
type VersionCreateParams,
|
|
16
|
+
type VersionListParams,
|
|
17
|
+
type SystemVersionsPaginatedResponse,
|
|
18
|
+
} from './versions';
|