scorecard-ai 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/client.d.mts +6 -6
- package/client.d.mts.map +1 -1
- package/client.d.ts +6 -6
- package/client.d.ts.map +1 -1
- package/client.js.map +1 -1
- package/client.mjs +2 -2
- package/client.mjs.map +1 -1
- package/internal/to-file.d.mts +1 -1
- package/internal/to-file.d.ts +1 -1
- package/internal/to-file.js +1 -1
- package/internal/to-file.mjs +1 -1
- package/package.json +1 -1
- package/resources/index.d.mts +3 -3
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +3 -3
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -2
- package/resources/index.mjs.map +1 -1
- package/resources/metrics.d.mts +348 -28
- package/resources/metrics.d.mts.map +1 -1
- package/resources/metrics.d.ts +348 -28
- package/resources/metrics.d.ts.map +1 -1
- package/resources/metrics.js +30 -0
- package/resources/metrics.js.map +1 -1
- package/resources/metrics.mjs +30 -0
- package/resources/metrics.mjs.map +1 -1
- package/resources/records.d.mts +30 -1
- package/resources/records.d.mts.map +1 -1
- package/resources/records.d.ts +30 -1
- package/resources/records.d.ts.map +1 -1
- package/resources/records.js +21 -0
- package/resources/records.js.map +1 -1
- package/resources/records.mjs +21 -0
- package/resources/records.mjs.map +1 -1
- package/resources/runs.d.mts +52 -5
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +52 -5
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +30 -0
- package/resources/runs.js.map +1 -1
- package/resources/runs.mjs +30 -0
- package/resources/runs.mjs.map +1 -1
- package/src/client.ts +34 -5
- package/src/internal/to-file.ts +1 -1
- package/src/resources/index.ts +17 -3
- package/src/resources/metrics.ts +438 -27
- package/src/resources/records.ts +48 -1
- package/src/resources/runs.ts +76 -5
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
package/src/resources/metrics.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../core/resource';
|
|
4
4
|
import { APIPromise } from '../core/api-promise';
|
|
5
|
+
import { PagePromise, PaginatedResponse, type PaginatedResponseParams } from '../core/pagination';
|
|
5
6
|
import { RequestOptions } from '../internal/request-options';
|
|
6
7
|
import { path } from '../internal/utils/path';
|
|
7
8
|
|
|
@@ -49,8 +50,45 @@ export class Metrics extends APIResource {
|
|
|
49
50
|
update(metricID: string, body: MetricUpdateParams, options?: RequestOptions): APIPromise<Metric> {
|
|
50
51
|
return this._client.patch(path`/metrics/${metricID}`, { body, ...options });
|
|
51
52
|
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* List Metrics configured for the specified Project. Metrics are returned in
|
|
56
|
+
* reverse chronological order.
|
|
57
|
+
*
|
|
58
|
+
* @example
|
|
59
|
+
* ```ts
|
|
60
|
+
* // Automatically fetches more pages as needed.
|
|
61
|
+
* for await (const metric of client.metrics.list('314')) {
|
|
62
|
+
* // ...
|
|
63
|
+
* }
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
list(
|
|
67
|
+
projectID: string,
|
|
68
|
+
query: MetricListParams | null | undefined = {},
|
|
69
|
+
options?: RequestOptions,
|
|
70
|
+
): PagePromise<MetricsPaginatedResponse, Metric> {
|
|
71
|
+
return this._client.getAPIList(path`/projects/${projectID}/metrics`, PaginatedResponse<Metric>, {
|
|
72
|
+
query,
|
|
73
|
+
...options,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Retrieve a specific Metric by ID.
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* ```ts
|
|
82
|
+
* const metric = await client.metrics.get('321');
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
get(metricID: string, options?: RequestOptions): APIPromise<Metric> {
|
|
86
|
+
return this._client.get(path`/metrics/${metricID}`, options);
|
|
87
|
+
}
|
|
52
88
|
}
|
|
53
89
|
|
|
90
|
+
export type MetricsPaginatedResponse = PaginatedResponse<Metric>;
|
|
91
|
+
|
|
54
92
|
/**
|
|
55
93
|
* A Metric defines how to evaluate system outputs against expected results.
|
|
56
94
|
*/
|
|
@@ -58,6 +96,9 @@ export type Metric =
|
|
|
58
96
|
| Metric.AIIntMetric
|
|
59
97
|
| Metric.HumanIntMetric
|
|
60
98
|
| Metric.HeuristicIntMetric
|
|
99
|
+
| Metric.AIFloatMetric
|
|
100
|
+
| Metric.HumanFloatMetric
|
|
101
|
+
| Metric.HeuristicFloatMetric
|
|
61
102
|
| Metric.AIBooleanMetric
|
|
62
103
|
| Metric.HumanBooleanMetric
|
|
63
104
|
| Metric.HeuristicBooleanMetric;
|
|
@@ -90,7 +131,7 @@ export namespace Metric {
|
|
|
90
131
|
/**
|
|
91
132
|
* Guidelines for AI evaluation on how to score the metric.
|
|
92
133
|
*/
|
|
93
|
-
guidelines: string
|
|
134
|
+
guidelines: string;
|
|
94
135
|
|
|
95
136
|
/**
|
|
96
137
|
* The name of the Metric.
|
|
@@ -138,6 +179,11 @@ export namespace Metric {
|
|
|
138
179
|
*/
|
|
139
180
|
evalType: 'human';
|
|
140
181
|
|
|
182
|
+
/**
|
|
183
|
+
* Guidelines for human evaluators.
|
|
184
|
+
*/
|
|
185
|
+
guidelines: string;
|
|
186
|
+
|
|
141
187
|
/**
|
|
142
188
|
* The name of the Metric.
|
|
143
189
|
*/
|
|
@@ -152,11 +198,6 @@ export namespace Metric {
|
|
|
152
198
|
* The threshold for determining pass/fail from integer scores (1-5).
|
|
153
199
|
*/
|
|
154
200
|
passingThreshold: number;
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Guidelines for human evaluators.
|
|
158
|
-
*/
|
|
159
|
-
guidelines?: string;
|
|
160
201
|
}
|
|
161
202
|
|
|
162
203
|
/**
|
|
@@ -178,6 +219,11 @@ export namespace Metric {
|
|
|
178
219
|
*/
|
|
179
220
|
evalType: 'heuristic';
|
|
180
221
|
|
|
222
|
+
/**
|
|
223
|
+
* Guidelines for heuristic evaluation logic.
|
|
224
|
+
*/
|
|
225
|
+
guidelines: string;
|
|
226
|
+
|
|
181
227
|
/**
|
|
182
228
|
* The name of the Metric.
|
|
183
229
|
*/
|
|
@@ -192,11 +238,142 @@ export namespace Metric {
|
|
|
192
238
|
* The threshold for determining pass/fail from integer scores (1-5).
|
|
193
239
|
*/
|
|
194
240
|
passingThreshold: number;
|
|
241
|
+
}
|
|
195
242
|
|
|
243
|
+
/**
|
|
244
|
+
* A Metric with AI evaluation and float output.
|
|
245
|
+
*/
|
|
246
|
+
export interface AIFloatMetric {
|
|
196
247
|
/**
|
|
197
|
-
*
|
|
248
|
+
* The ID of the Metric.
|
|
198
249
|
*/
|
|
199
|
-
|
|
250
|
+
id: string;
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* The description of the Metric.
|
|
254
|
+
*/
|
|
255
|
+
description: string | null;
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* The AI model to use for evaluation.
|
|
259
|
+
*/
|
|
260
|
+
evalModelName: string;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* AI-based evaluation type.
|
|
264
|
+
*/
|
|
265
|
+
evalType: 'ai';
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
269
|
+
*/
|
|
270
|
+
guidelines: string;
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* The name of the Metric.
|
|
274
|
+
*/
|
|
275
|
+
name: string;
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Float output type (0-1).
|
|
279
|
+
*/
|
|
280
|
+
outputType: 'float';
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
284
|
+
*/
|
|
285
|
+
passingThreshold: number;
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
289
|
+
* dynamic content.
|
|
290
|
+
*/
|
|
291
|
+
promptTemplate: string;
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* The temperature for AI evaluation (0-2).
|
|
295
|
+
*/
|
|
296
|
+
temperature: number;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* A Metric with human evaluation and float output.
|
|
301
|
+
*/
|
|
302
|
+
export interface HumanFloatMetric {
|
|
303
|
+
/**
|
|
304
|
+
* The ID of the Metric.
|
|
305
|
+
*/
|
|
306
|
+
id: string;
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* The description of the Metric.
|
|
310
|
+
*/
|
|
311
|
+
description: string | null;
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Human-based evaluation type.
|
|
315
|
+
*/
|
|
316
|
+
evalType: 'human';
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Guidelines for human evaluators.
|
|
320
|
+
*/
|
|
321
|
+
guidelines: string;
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* The name of the Metric.
|
|
325
|
+
*/
|
|
326
|
+
name: string;
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Float output type (0-1).
|
|
330
|
+
*/
|
|
331
|
+
outputType: 'float';
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
335
|
+
*/
|
|
336
|
+
passingThreshold: number;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* A Metric with heuristic evaluation and float output.
|
|
341
|
+
*/
|
|
342
|
+
export interface HeuristicFloatMetric {
|
|
343
|
+
/**
|
|
344
|
+
* The ID of the Metric.
|
|
345
|
+
*/
|
|
346
|
+
id: string;
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* The description of the Metric.
|
|
350
|
+
*/
|
|
351
|
+
description: string | null;
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Heuristic-based evaluation type.
|
|
355
|
+
*/
|
|
356
|
+
evalType: 'heuristic';
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Guidelines for heuristic evaluation logic.
|
|
360
|
+
*/
|
|
361
|
+
guidelines: string;
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* The name of the Metric.
|
|
365
|
+
*/
|
|
366
|
+
name: string;
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Float output type (0-1).
|
|
370
|
+
*/
|
|
371
|
+
outputType: 'float';
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
375
|
+
*/
|
|
376
|
+
passingThreshold: number;
|
|
200
377
|
}
|
|
201
378
|
|
|
202
379
|
/**
|
|
@@ -226,7 +403,7 @@ export namespace Metric {
|
|
|
226
403
|
/**
|
|
227
404
|
* Guidelines for AI evaluation on how to score the metric.
|
|
228
405
|
*/
|
|
229
|
-
guidelines: string
|
|
406
|
+
guidelines: string;
|
|
230
407
|
|
|
231
408
|
/**
|
|
232
409
|
* The name of the Metric.
|
|
@@ -269,6 +446,11 @@ export namespace Metric {
|
|
|
269
446
|
*/
|
|
270
447
|
evalType: 'human';
|
|
271
448
|
|
|
449
|
+
/**
|
|
450
|
+
* Guidelines for human evaluators.
|
|
451
|
+
*/
|
|
452
|
+
guidelines: string;
|
|
453
|
+
|
|
272
454
|
/**
|
|
273
455
|
* The name of the Metric.
|
|
274
456
|
*/
|
|
@@ -278,11 +460,6 @@ export namespace Metric {
|
|
|
278
460
|
* Boolean output type.
|
|
279
461
|
*/
|
|
280
462
|
outputType: 'boolean';
|
|
281
|
-
|
|
282
|
-
/**
|
|
283
|
-
* Guidelines for human evaluators.
|
|
284
|
-
*/
|
|
285
|
-
guidelines?: string;
|
|
286
463
|
}
|
|
287
464
|
|
|
288
465
|
/**
|
|
@@ -304,6 +481,11 @@ export namespace Metric {
|
|
|
304
481
|
*/
|
|
305
482
|
evalType: 'heuristic';
|
|
306
483
|
|
|
484
|
+
/**
|
|
485
|
+
* Guidelines for heuristic evaluation logic.
|
|
486
|
+
*/
|
|
487
|
+
guidelines: string;
|
|
488
|
+
|
|
307
489
|
/**
|
|
308
490
|
* The name of the Metric.
|
|
309
491
|
*/
|
|
@@ -313,11 +495,6 @@ export namespace Metric {
|
|
|
313
495
|
* Boolean output type.
|
|
314
496
|
*/
|
|
315
497
|
outputType: 'boolean';
|
|
316
|
-
|
|
317
|
-
/**
|
|
318
|
-
* Optional guidelines for heuristic evaluation logic.
|
|
319
|
-
*/
|
|
320
|
-
guidelines?: string;
|
|
321
498
|
}
|
|
322
499
|
}
|
|
323
500
|
|
|
@@ -325,6 +502,9 @@ export type MetricCreateParams =
|
|
|
325
502
|
| MetricCreateParams.AIIntMetric
|
|
326
503
|
| MetricCreateParams.HumanIntMetric
|
|
327
504
|
| MetricCreateParams.HeuristicIntMetric
|
|
505
|
+
| MetricCreateParams.AIFloatMetric
|
|
506
|
+
| MetricCreateParams.HumanFloatMetric
|
|
507
|
+
| MetricCreateParams.HeuristicFloatMetric
|
|
328
508
|
| MetricCreateParams.AIBooleanMetric
|
|
329
509
|
| MetricCreateParams.HumanBooleanMetric
|
|
330
510
|
| MetricCreateParams.HeuristicBooleanMetric;
|
|
@@ -365,7 +545,7 @@ export declare namespace MetricCreateParams {
|
|
|
365
545
|
/**
|
|
366
546
|
* Guidelines for AI evaluation on how to score the metric.
|
|
367
547
|
*/
|
|
368
|
-
guidelines?: string
|
|
548
|
+
guidelines?: string;
|
|
369
549
|
|
|
370
550
|
/**
|
|
371
551
|
* The threshold for determining pass/fail from integer scores (1-5).
|
|
@@ -432,7 +612,7 @@ export declare namespace MetricCreateParams {
|
|
|
432
612
|
description?: string | null;
|
|
433
613
|
|
|
434
614
|
/**
|
|
435
|
-
*
|
|
615
|
+
* Guidelines for heuristic evaluation logic.
|
|
436
616
|
*/
|
|
437
617
|
guidelines?: string;
|
|
438
618
|
|
|
@@ -442,6 +622,118 @@ export declare namespace MetricCreateParams {
|
|
|
442
622
|
passingThreshold?: number;
|
|
443
623
|
}
|
|
444
624
|
|
|
625
|
+
export interface AIFloatMetric {
|
|
626
|
+
/**
|
|
627
|
+
* AI-based evaluation type.
|
|
628
|
+
*/
|
|
629
|
+
evalType: 'ai';
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* The name of the Metric.
|
|
633
|
+
*/
|
|
634
|
+
name: string;
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Float output type (0-1).
|
|
638
|
+
*/
|
|
639
|
+
outputType: 'float';
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
643
|
+
* dynamic content.
|
|
644
|
+
*/
|
|
645
|
+
promptTemplate: string;
|
|
646
|
+
|
|
647
|
+
/**
|
|
648
|
+
* The description of the Metric.
|
|
649
|
+
*/
|
|
650
|
+
description?: string | null;
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* The AI model to use for evaluation.
|
|
654
|
+
*/
|
|
655
|
+
evalModelName?: string;
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
659
|
+
*/
|
|
660
|
+
guidelines?: string;
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
664
|
+
*/
|
|
665
|
+
passingThreshold?: number;
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* The temperature for AI evaluation (0-2).
|
|
669
|
+
*/
|
|
670
|
+
temperature?: number;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
export interface HumanFloatMetric {
|
|
674
|
+
/**
|
|
675
|
+
* Human-based evaluation type.
|
|
676
|
+
*/
|
|
677
|
+
evalType: 'human';
|
|
678
|
+
|
|
679
|
+
/**
|
|
680
|
+
* The name of the Metric.
|
|
681
|
+
*/
|
|
682
|
+
name: string;
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Float output type (0-1).
|
|
686
|
+
*/
|
|
687
|
+
outputType: 'float';
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* The description of the Metric.
|
|
691
|
+
*/
|
|
692
|
+
description?: string | null;
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Guidelines for human evaluators.
|
|
696
|
+
*/
|
|
697
|
+
guidelines?: string;
|
|
698
|
+
|
|
699
|
+
/**
|
|
700
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
701
|
+
*/
|
|
702
|
+
passingThreshold?: number;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
export interface HeuristicFloatMetric {
|
|
706
|
+
/**
|
|
707
|
+
* Heuristic-based evaluation type.
|
|
708
|
+
*/
|
|
709
|
+
evalType: 'heuristic';
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* The name of the Metric.
|
|
713
|
+
*/
|
|
714
|
+
name: string;
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* Float output type (0-1).
|
|
718
|
+
*/
|
|
719
|
+
outputType: 'float';
|
|
720
|
+
|
|
721
|
+
/**
|
|
722
|
+
* The description of the Metric.
|
|
723
|
+
*/
|
|
724
|
+
description?: string | null;
|
|
725
|
+
|
|
726
|
+
/**
|
|
727
|
+
* Guidelines for heuristic evaluation logic.
|
|
728
|
+
*/
|
|
729
|
+
guidelines?: string;
|
|
730
|
+
|
|
731
|
+
/**
|
|
732
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
733
|
+
*/
|
|
734
|
+
passingThreshold?: number;
|
|
735
|
+
}
|
|
736
|
+
|
|
445
737
|
export interface AIBooleanMetric {
|
|
446
738
|
/**
|
|
447
739
|
* AI-based evaluation type.
|
|
@@ -477,7 +769,7 @@ export declare namespace MetricCreateParams {
|
|
|
477
769
|
/**
|
|
478
770
|
* Guidelines for AI evaluation on how to score the metric.
|
|
479
771
|
*/
|
|
480
|
-
guidelines?: string
|
|
772
|
+
guidelines?: string;
|
|
481
773
|
|
|
482
774
|
/**
|
|
483
775
|
* The temperature for AI evaluation (0-2).
|
|
@@ -534,7 +826,7 @@ export declare namespace MetricCreateParams {
|
|
|
534
826
|
description?: string | null;
|
|
535
827
|
|
|
536
828
|
/**
|
|
537
|
-
*
|
|
829
|
+
* Guidelines for heuristic evaluation logic.
|
|
538
830
|
*/
|
|
539
831
|
guidelines?: string;
|
|
540
832
|
}
|
|
@@ -544,6 +836,9 @@ export type MetricUpdateParams =
|
|
|
544
836
|
| MetricUpdateParams.AIIntMetric
|
|
545
837
|
| MetricUpdateParams.HumanIntMetric
|
|
546
838
|
| MetricUpdateParams.HeuristicIntMetric
|
|
839
|
+
| MetricUpdateParams.AIFloatMetric
|
|
840
|
+
| MetricUpdateParams.HumanFloatMetric
|
|
841
|
+
| MetricUpdateParams.HeuristicFloatMetric
|
|
547
842
|
| MetricUpdateParams.AIBooleanMetric
|
|
548
843
|
| MetricUpdateParams.HumanBooleanMetric
|
|
549
844
|
| MetricUpdateParams.HeuristicBooleanMetric;
|
|
@@ -573,7 +868,7 @@ export declare namespace MetricUpdateParams {
|
|
|
573
868
|
/**
|
|
574
869
|
* Guidelines for AI evaluation on how to score the metric.
|
|
575
870
|
*/
|
|
576
|
-
guidelines?: string
|
|
871
|
+
guidelines?: string;
|
|
577
872
|
|
|
578
873
|
/**
|
|
579
874
|
* The name of the Metric.
|
|
@@ -646,7 +941,7 @@ export declare namespace MetricUpdateParams {
|
|
|
646
941
|
description?: string | null;
|
|
647
942
|
|
|
648
943
|
/**
|
|
649
|
-
*
|
|
944
|
+
* Guidelines for heuristic evaluation logic.
|
|
650
945
|
*/
|
|
651
946
|
guidelines?: string;
|
|
652
947
|
|
|
@@ -661,6 +956,118 @@ export declare namespace MetricUpdateParams {
|
|
|
661
956
|
passingThreshold?: number;
|
|
662
957
|
}
|
|
663
958
|
|
|
959
|
+
export interface AIFloatMetric {
|
|
960
|
+
/**
|
|
961
|
+
* AI-based evaluation type.
|
|
962
|
+
*/
|
|
963
|
+
evalType: 'ai';
|
|
964
|
+
|
|
965
|
+
/**
|
|
966
|
+
* Float output type (0-1).
|
|
967
|
+
*/
|
|
968
|
+
outputType: 'float';
|
|
969
|
+
|
|
970
|
+
/**
|
|
971
|
+
* The description of the Metric.
|
|
972
|
+
*/
|
|
973
|
+
description?: string | null;
|
|
974
|
+
|
|
975
|
+
/**
|
|
976
|
+
* The AI model to use for evaluation.
|
|
977
|
+
*/
|
|
978
|
+
evalModelName?: string;
|
|
979
|
+
|
|
980
|
+
/**
|
|
981
|
+
* Guidelines for AI evaluation on how to score the metric.
|
|
982
|
+
*/
|
|
983
|
+
guidelines?: string;
|
|
984
|
+
|
|
985
|
+
/**
|
|
986
|
+
* The name of the Metric.
|
|
987
|
+
*/
|
|
988
|
+
name?: string;
|
|
989
|
+
|
|
990
|
+
/**
|
|
991
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
992
|
+
*/
|
|
993
|
+
passingThreshold?: number;
|
|
994
|
+
|
|
995
|
+
/**
|
|
996
|
+
* The complete prompt template for AI evaluation. Should include placeholders for
|
|
997
|
+
* dynamic content.
|
|
998
|
+
*/
|
|
999
|
+
promptTemplate?: string;
|
|
1000
|
+
|
|
1001
|
+
/**
|
|
1002
|
+
* The temperature for AI evaluation (0-2).
|
|
1003
|
+
*/
|
|
1004
|
+
temperature?: number;
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
export interface HumanFloatMetric {
|
|
1008
|
+
/**
|
|
1009
|
+
* Human-based evaluation type.
|
|
1010
|
+
*/
|
|
1011
|
+
evalType: 'human';
|
|
1012
|
+
|
|
1013
|
+
/**
|
|
1014
|
+
* Float output type (0-1).
|
|
1015
|
+
*/
|
|
1016
|
+
outputType: 'float';
|
|
1017
|
+
|
|
1018
|
+
/**
|
|
1019
|
+
* The description of the Metric.
|
|
1020
|
+
*/
|
|
1021
|
+
description?: string | null;
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Guidelines for human evaluators.
|
|
1025
|
+
*/
|
|
1026
|
+
guidelines?: string;
|
|
1027
|
+
|
|
1028
|
+
/**
|
|
1029
|
+
* The name of the Metric.
|
|
1030
|
+
*/
|
|
1031
|
+
name?: string;
|
|
1032
|
+
|
|
1033
|
+
/**
|
|
1034
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
1035
|
+
*/
|
|
1036
|
+
passingThreshold?: number;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
export interface HeuristicFloatMetric {
|
|
1040
|
+
/**
|
|
1041
|
+
* Heuristic-based evaluation type.
|
|
1042
|
+
*/
|
|
1043
|
+
evalType: 'heuristic';
|
|
1044
|
+
|
|
1045
|
+
/**
|
|
1046
|
+
* Float output type (0-1).
|
|
1047
|
+
*/
|
|
1048
|
+
outputType: 'float';
|
|
1049
|
+
|
|
1050
|
+
/**
|
|
1051
|
+
* The description of the Metric.
|
|
1052
|
+
*/
|
|
1053
|
+
description?: string | null;
|
|
1054
|
+
|
|
1055
|
+
/**
|
|
1056
|
+
* Guidelines for heuristic evaluation logic.
|
|
1057
|
+
*/
|
|
1058
|
+
guidelines?: string;
|
|
1059
|
+
|
|
1060
|
+
/**
|
|
1061
|
+
* The name of the Metric.
|
|
1062
|
+
*/
|
|
1063
|
+
name?: string;
|
|
1064
|
+
|
|
1065
|
+
/**
|
|
1066
|
+
* Threshold for determining pass/fail from float scores (0.0-1.0).
|
|
1067
|
+
*/
|
|
1068
|
+
passingThreshold?: number;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
664
1071
|
export interface AIBooleanMetric {
|
|
665
1072
|
/**
|
|
666
1073
|
* AI-based evaluation type.
|
|
@@ -685,7 +1092,7 @@ export declare namespace MetricUpdateParams {
|
|
|
685
1092
|
/**
|
|
686
1093
|
* Guidelines for AI evaluation on how to score the metric.
|
|
687
1094
|
*/
|
|
688
|
-
guidelines?: string
|
|
1095
|
+
guidelines?: string;
|
|
689
1096
|
|
|
690
1097
|
/**
|
|
691
1098
|
* The name of the Metric.
|
|
@@ -748,7 +1155,7 @@ export declare namespace MetricUpdateParams {
|
|
|
748
1155
|
description?: string | null;
|
|
749
1156
|
|
|
750
1157
|
/**
|
|
751
|
-
*
|
|
1158
|
+
* Guidelines for heuristic evaluation logic.
|
|
752
1159
|
*/
|
|
753
1160
|
guidelines?: string;
|
|
754
1161
|
|
|
@@ -759,10 +1166,14 @@ export declare namespace MetricUpdateParams {
|
|
|
759
1166
|
}
|
|
760
1167
|
}
|
|
761
1168
|
|
|
1169
|
+
export interface MetricListParams extends PaginatedResponseParams {}
|
|
1170
|
+
|
|
762
1171
|
export declare namespace Metrics {
|
|
763
1172
|
export {
|
|
764
1173
|
type Metric as Metric,
|
|
1174
|
+
type MetricsPaginatedResponse as MetricsPaginatedResponse,
|
|
765
1175
|
type MetricCreateParams as MetricCreateParams,
|
|
766
1176
|
type MetricUpdateParams as MetricUpdateParams,
|
|
1177
|
+
type MetricListParams as MetricListParams,
|
|
767
1178
|
};
|
|
768
1179
|
}
|
package/src/resources/records.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../core/resource';
|
|
4
|
+
import * as ScoresAPI from './scores';
|
|
4
5
|
import { APIPromise } from '../core/api-promise';
|
|
6
|
+
import { PagePromise, PaginatedResponse, type PaginatedResponseParams } from '../core/pagination';
|
|
5
7
|
import { RequestOptions } from '../internal/request-options';
|
|
6
8
|
import { path } from '../internal/utils/path';
|
|
7
9
|
|
|
@@ -24,8 +26,35 @@ export class Records extends APIResource {
|
|
|
24
26
|
create(runID: string, body: RecordCreateParams, options?: RequestOptions): APIPromise<Record> {
|
|
25
27
|
return this._client.post(path`/runs/${runID}/records`, { body, ...options });
|
|
26
28
|
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Retrieve a paginated list of Records for a Run, including all scores for each
|
|
32
|
+
* record.
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* // Automatically fetches more pages as needed.
|
|
37
|
+
* for await (const recordListResponse of client.records.list(
|
|
38
|
+
* '135',
|
|
39
|
+
* )) {
|
|
40
|
+
* // ...
|
|
41
|
+
* }
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
list(
|
|
45
|
+
runID: string,
|
|
46
|
+
query: RecordListParams | null | undefined = {},
|
|
47
|
+
options?: RequestOptions,
|
|
48
|
+
): PagePromise<RecordListResponsesPaginatedResponse, RecordListResponse> {
|
|
49
|
+
return this._client.getAPIList(path`/runs/${runID}/records`, PaginatedResponse<RecordListResponse>, {
|
|
50
|
+
query,
|
|
51
|
+
...options,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
27
54
|
}
|
|
28
55
|
|
|
56
|
+
export type RecordListResponsesPaginatedResponse = PaginatedResponse<RecordListResponse>;
|
|
57
|
+
|
|
29
58
|
/**
|
|
30
59
|
* A record of a system execution in the Scorecard system.
|
|
31
60
|
*/
|
|
@@ -62,6 +91,16 @@ export interface Record {
|
|
|
62
91
|
testcaseId?: string;
|
|
63
92
|
}
|
|
64
93
|
|
|
94
|
+
/**
|
|
95
|
+
* A record with all its associated scores.
|
|
96
|
+
*/
|
|
97
|
+
export interface RecordListResponse extends Record {
|
|
98
|
+
/**
|
|
99
|
+
* All scores associated with this record.
|
|
100
|
+
*/
|
|
101
|
+
scores: Array<ScoresAPI.Score>;
|
|
102
|
+
}
|
|
103
|
+
|
|
65
104
|
export interface RecordCreateParams {
|
|
66
105
|
/**
|
|
67
106
|
* The expected outputs for the Testcase.
|
|
@@ -85,6 +124,14 @@ export interface RecordCreateParams {
|
|
|
85
124
|
testcaseId?: string;
|
|
86
125
|
}
|
|
87
126
|
|
|
127
|
+
export interface RecordListParams extends PaginatedResponseParams {}
|
|
128
|
+
|
|
88
129
|
export declare namespace Records {
|
|
89
|
-
export {
|
|
130
|
+
export {
|
|
131
|
+
type Record as Record,
|
|
132
|
+
type RecordListResponse as RecordListResponse,
|
|
133
|
+
type RecordListResponsesPaginatedResponse as RecordListResponsesPaginatedResponse,
|
|
134
|
+
type RecordCreateParams as RecordCreateParams,
|
|
135
|
+
type RecordListParams as RecordListParams,
|
|
136
|
+
};
|
|
90
137
|
}
|