scorecard-ai 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +57 -0
  2. package/client.d.mts +6 -6
  3. package/client.d.mts.map +1 -1
  4. package/client.d.ts +6 -6
  5. package/client.d.ts.map +1 -1
  6. package/client.js.map +1 -1
  7. package/client.mjs +2 -2
  8. package/client.mjs.map +1 -1
  9. package/internal/to-file.d.mts +1 -1
  10. package/internal/to-file.d.ts +1 -1
  11. package/internal/to-file.js +1 -1
  12. package/internal/to-file.mjs +1 -1
  13. package/package.json +1 -1
  14. package/resources/index.d.mts +3 -3
  15. package/resources/index.d.mts.map +1 -1
  16. package/resources/index.d.ts +3 -3
  17. package/resources/index.d.ts.map +1 -1
  18. package/resources/index.js.map +1 -1
  19. package/resources/index.mjs +2 -2
  20. package/resources/index.mjs.map +1 -1
  21. package/resources/metrics.d.mts +348 -28
  22. package/resources/metrics.d.mts.map +1 -1
  23. package/resources/metrics.d.ts +348 -28
  24. package/resources/metrics.d.ts.map +1 -1
  25. package/resources/metrics.js +30 -0
  26. package/resources/metrics.js.map +1 -1
  27. package/resources/metrics.mjs +30 -0
  28. package/resources/metrics.mjs.map +1 -1
  29. package/resources/records.d.mts +45 -1
  30. package/resources/records.d.mts.map +1 -1
  31. package/resources/records.d.ts +45 -1
  32. package/resources/records.d.ts.map +1 -1
  33. package/resources/records.js +32 -0
  34. package/resources/records.js.map +1 -1
  35. package/resources/records.mjs +32 -0
  36. package/resources/records.mjs.map +1 -1
  37. package/resources/runs.d.mts +52 -5
  38. package/resources/runs.d.mts.map +1 -1
  39. package/resources/runs.d.ts +52 -5
  40. package/resources/runs.d.ts.map +1 -1
  41. package/resources/runs.js +30 -0
  42. package/resources/runs.js.map +1 -1
  43. package/resources/runs.mjs +30 -0
  44. package/resources/runs.mjs.map +1 -1
  45. package/resources/scores.d.mts +1 -1
  46. package/resources/scores.d.ts +1 -1
  47. package/src/client.ts +36 -5
  48. package/src/internal/to-file.ts +1 -1
  49. package/src/resources/index.ts +18 -3
  50. package/src/resources/metrics.ts +438 -27
  51. package/src/resources/records.ts +68 -1
  52. package/src/resources/runs.ts +76 -5
  53. package/src/resources/scores.ts +1 -1
  54. package/src/version.ts +1 -1
  55. package/version.d.mts +1 -1
  56. package/version.d.ts +1 -1
  57. package/version.js +1 -1
  58. package/version.mjs +1 -1
@@ -2,6 +2,7 @@
2
2
 
3
3
  import { APIResource } from '../core/resource';
4
4
  import { APIPromise } from '../core/api-promise';
5
+ import { PagePromise, PaginatedResponse, type PaginatedResponseParams } from '../core/pagination';
5
6
  import { RequestOptions } from '../internal/request-options';
6
7
  import { path } from '../internal/utils/path';
7
8
 
@@ -49,8 +50,45 @@ export class Metrics extends APIResource {
49
50
  update(metricID: string, body: MetricUpdateParams, options?: RequestOptions): APIPromise<Metric> {
50
51
  return this._client.patch(path`/metrics/${metricID}`, { body, ...options });
51
52
  }
53
+
54
+ /**
55
+ * List Metrics configured for the specified Project. Metrics are returned in
56
+ * reverse chronological order.
57
+ *
58
+ * @example
59
+ * ```ts
60
+ * // Automatically fetches more pages as needed.
61
+ * for await (const metric of client.metrics.list('314')) {
62
+ * // ...
63
+ * }
64
+ * ```
65
+ */
66
+ list(
67
+ projectID: string,
68
+ query: MetricListParams | null | undefined = {},
69
+ options?: RequestOptions,
70
+ ): PagePromise<MetricsPaginatedResponse, Metric> {
71
+ return this._client.getAPIList(path`/projects/${projectID}/metrics`, PaginatedResponse<Metric>, {
72
+ query,
73
+ ...options,
74
+ });
75
+ }
76
+
77
+ /**
78
+ * Retrieve a specific Metric by ID.
79
+ *
80
+ * @example
81
+ * ```ts
82
+ * const metric = await client.metrics.get('321');
83
+ * ```
84
+ */
85
+ get(metricID: string, options?: RequestOptions): APIPromise<Metric> {
86
+ return this._client.get(path`/metrics/${metricID}`, options);
87
+ }
52
88
  }
53
89
 
90
+ export type MetricsPaginatedResponse = PaginatedResponse<Metric>;
91
+
54
92
  /**
55
93
  * A Metric defines how to evaluate system outputs against expected results.
56
94
  */
@@ -58,6 +96,9 @@ export type Metric =
58
96
  | Metric.AIIntMetric
59
97
  | Metric.HumanIntMetric
60
98
  | Metric.HeuristicIntMetric
99
+ | Metric.AIFloatMetric
100
+ | Metric.HumanFloatMetric
101
+ | Metric.HeuristicFloatMetric
61
102
  | Metric.AIBooleanMetric
62
103
  | Metric.HumanBooleanMetric
63
104
  | Metric.HeuristicBooleanMetric;
@@ -90,7 +131,7 @@ export namespace Metric {
90
131
  /**
91
132
  * Guidelines for AI evaluation on how to score the metric.
92
133
  */
93
- guidelines: string | null;
134
+ guidelines: string;
94
135
 
95
136
  /**
96
137
  * The name of the Metric.
@@ -138,6 +179,11 @@ export namespace Metric {
138
179
  */
139
180
  evalType: 'human';
140
181
 
182
+ /**
183
+ * Guidelines for human evaluators.
184
+ */
185
+ guidelines: string;
186
+
141
187
  /**
142
188
  * The name of the Metric.
143
189
  */
@@ -152,11 +198,6 @@ export namespace Metric {
152
198
  * The threshold for determining pass/fail from integer scores (1-5).
153
199
  */
154
200
  passingThreshold: number;
155
-
156
- /**
157
- * Guidelines for human evaluators.
158
- */
159
- guidelines?: string;
160
201
  }
161
202
 
162
203
  /**
@@ -178,6 +219,11 @@ export namespace Metric {
178
219
  */
179
220
  evalType: 'heuristic';
180
221
 
222
+ /**
223
+ * Guidelines for heuristic evaluation logic.
224
+ */
225
+ guidelines: string;
226
+
181
227
  /**
182
228
  * The name of the Metric.
183
229
  */
@@ -192,11 +238,142 @@ export namespace Metric {
192
238
  * The threshold for determining pass/fail from integer scores (1-5).
193
239
  */
194
240
  passingThreshold: number;
241
+ }
195
242
 
243
+ /**
244
+ * A Metric with AI evaluation and float output.
245
+ */
246
+ export interface AIFloatMetric {
196
247
  /**
197
- * Optional guidelines for heuristic evaluation logic.
248
+ * The ID of the Metric.
198
249
  */
199
- guidelines?: string;
250
+ id: string;
251
+
252
+ /**
253
+ * The description of the Metric.
254
+ */
255
+ description: string | null;
256
+
257
+ /**
258
+ * The AI model to use for evaluation.
259
+ */
260
+ evalModelName: string;
261
+
262
+ /**
263
+ * AI-based evaluation type.
264
+ */
265
+ evalType: 'ai';
266
+
267
+ /**
268
+ * Guidelines for AI evaluation on how to score the metric.
269
+ */
270
+ guidelines: string;
271
+
272
+ /**
273
+ * The name of the Metric.
274
+ */
275
+ name: string;
276
+
277
+ /**
278
+ * Float output type (0-1).
279
+ */
280
+ outputType: 'float';
281
+
282
+ /**
283
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
284
+ */
285
+ passingThreshold: number;
286
+
287
+ /**
288
+ * The complete prompt template for AI evaluation. Should include placeholders for
289
+ * dynamic content.
290
+ */
291
+ promptTemplate: string;
292
+
293
+ /**
294
+ * The temperature for AI evaluation (0-2).
295
+ */
296
+ temperature: number;
297
+ }
298
+
299
+ /**
300
+ * A Metric with human evaluation and float output.
301
+ */
302
+ export interface HumanFloatMetric {
303
+ /**
304
+ * The ID of the Metric.
305
+ */
306
+ id: string;
307
+
308
+ /**
309
+ * The description of the Metric.
310
+ */
311
+ description: string | null;
312
+
313
+ /**
314
+ * Human-based evaluation type.
315
+ */
316
+ evalType: 'human';
317
+
318
+ /**
319
+ * Guidelines for human evaluators.
320
+ */
321
+ guidelines: string;
322
+
323
+ /**
324
+ * The name of the Metric.
325
+ */
326
+ name: string;
327
+
328
+ /**
329
+ * Float output type (0-1).
330
+ */
331
+ outputType: 'float';
332
+
333
+ /**
334
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
335
+ */
336
+ passingThreshold: number;
337
+ }
338
+
339
+ /**
340
+ * A Metric with heuristic evaluation and float output.
341
+ */
342
+ export interface HeuristicFloatMetric {
343
+ /**
344
+ * The ID of the Metric.
345
+ */
346
+ id: string;
347
+
348
+ /**
349
+ * The description of the Metric.
350
+ */
351
+ description: string | null;
352
+
353
+ /**
354
+ * Heuristic-based evaluation type.
355
+ */
356
+ evalType: 'heuristic';
357
+
358
+ /**
359
+ * Guidelines for heuristic evaluation logic.
360
+ */
361
+ guidelines: string;
362
+
363
+ /**
364
+ * The name of the Metric.
365
+ */
366
+ name: string;
367
+
368
+ /**
369
+ * Float output type (0-1).
370
+ */
371
+ outputType: 'float';
372
+
373
+ /**
374
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
375
+ */
376
+ passingThreshold: number;
200
377
  }
201
378
 
202
379
  /**
@@ -226,7 +403,7 @@ export namespace Metric {
226
403
  /**
227
404
  * Guidelines for AI evaluation on how to score the metric.
228
405
  */
229
- guidelines: string | null;
406
+ guidelines: string;
230
407
 
231
408
  /**
232
409
  * The name of the Metric.
@@ -269,6 +446,11 @@ export namespace Metric {
269
446
  */
270
447
  evalType: 'human';
271
448
 
449
+ /**
450
+ * Guidelines for human evaluators.
451
+ */
452
+ guidelines: string;
453
+
272
454
  /**
273
455
  * The name of the Metric.
274
456
  */
@@ -278,11 +460,6 @@ export namespace Metric {
278
460
  * Boolean output type.
279
461
  */
280
462
  outputType: 'boolean';
281
-
282
- /**
283
- * Guidelines for human evaluators.
284
- */
285
- guidelines?: string;
286
463
  }
287
464
 
288
465
  /**
@@ -304,6 +481,11 @@ export namespace Metric {
304
481
  */
305
482
  evalType: 'heuristic';
306
483
 
484
+ /**
485
+ * Guidelines for heuristic evaluation logic.
486
+ */
487
+ guidelines: string;
488
+
307
489
  /**
308
490
  * The name of the Metric.
309
491
  */
@@ -313,11 +495,6 @@ export namespace Metric {
313
495
  * Boolean output type.
314
496
  */
315
497
  outputType: 'boolean';
316
-
317
- /**
318
- * Optional guidelines for heuristic evaluation logic.
319
- */
320
- guidelines?: string;
321
498
  }
322
499
  }
323
500
 
@@ -325,6 +502,9 @@ export type MetricCreateParams =
325
502
  | MetricCreateParams.AIIntMetric
326
503
  | MetricCreateParams.HumanIntMetric
327
504
  | MetricCreateParams.HeuristicIntMetric
505
+ | MetricCreateParams.AIFloatMetric
506
+ | MetricCreateParams.HumanFloatMetric
507
+ | MetricCreateParams.HeuristicFloatMetric
328
508
  | MetricCreateParams.AIBooleanMetric
329
509
  | MetricCreateParams.HumanBooleanMetric
330
510
  | MetricCreateParams.HeuristicBooleanMetric;
@@ -365,7 +545,7 @@ export declare namespace MetricCreateParams {
365
545
  /**
366
546
  * Guidelines for AI evaluation on how to score the metric.
367
547
  */
368
- guidelines?: string | null;
548
+ guidelines?: string;
369
549
 
370
550
  /**
371
551
  * The threshold for determining pass/fail from integer scores (1-5).
@@ -432,7 +612,7 @@ export declare namespace MetricCreateParams {
432
612
  description?: string | null;
433
613
 
434
614
  /**
435
- * Optional guidelines for heuristic evaluation logic.
615
+ * Guidelines for heuristic evaluation logic.
436
616
  */
437
617
  guidelines?: string;
438
618
 
@@ -442,6 +622,118 @@ export declare namespace MetricCreateParams {
442
622
  passingThreshold?: number;
443
623
  }
444
624
 
625
+ export interface AIFloatMetric {
626
+ /**
627
+ * AI-based evaluation type.
628
+ */
629
+ evalType: 'ai';
630
+
631
+ /**
632
+ * The name of the Metric.
633
+ */
634
+ name: string;
635
+
636
+ /**
637
+ * Float output type (0-1).
638
+ */
639
+ outputType: 'float';
640
+
641
+ /**
642
+ * The complete prompt template for AI evaluation. Should include placeholders for
643
+ * dynamic content.
644
+ */
645
+ promptTemplate: string;
646
+
647
+ /**
648
+ * The description of the Metric.
649
+ */
650
+ description?: string | null;
651
+
652
+ /**
653
+ * The AI model to use for evaluation.
654
+ */
655
+ evalModelName?: string;
656
+
657
+ /**
658
+ * Guidelines for AI evaluation on how to score the metric.
659
+ */
660
+ guidelines?: string;
661
+
662
+ /**
663
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
664
+ */
665
+ passingThreshold?: number;
666
+
667
+ /**
668
+ * The temperature for AI evaluation (0-2).
669
+ */
670
+ temperature?: number;
671
+ }
672
+
673
+ export interface HumanFloatMetric {
674
+ /**
675
+ * Human-based evaluation type.
676
+ */
677
+ evalType: 'human';
678
+
679
+ /**
680
+ * The name of the Metric.
681
+ */
682
+ name: string;
683
+
684
+ /**
685
+ * Float output type (0-1).
686
+ */
687
+ outputType: 'float';
688
+
689
+ /**
690
+ * The description of the Metric.
691
+ */
692
+ description?: string | null;
693
+
694
+ /**
695
+ * Guidelines for human evaluators.
696
+ */
697
+ guidelines?: string;
698
+
699
+ /**
700
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
701
+ */
702
+ passingThreshold?: number;
703
+ }
704
+
705
+ export interface HeuristicFloatMetric {
706
+ /**
707
+ * Heuristic-based evaluation type.
708
+ */
709
+ evalType: 'heuristic';
710
+
711
+ /**
712
+ * The name of the Metric.
713
+ */
714
+ name: string;
715
+
716
+ /**
717
+ * Float output type (0-1).
718
+ */
719
+ outputType: 'float';
720
+
721
+ /**
722
+ * The description of the Metric.
723
+ */
724
+ description?: string | null;
725
+
726
+ /**
727
+ * Guidelines for heuristic evaluation logic.
728
+ */
729
+ guidelines?: string;
730
+
731
+ /**
732
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
733
+ */
734
+ passingThreshold?: number;
735
+ }
736
+
445
737
  export interface AIBooleanMetric {
446
738
  /**
447
739
  * AI-based evaluation type.
@@ -477,7 +769,7 @@ export declare namespace MetricCreateParams {
477
769
  /**
478
770
  * Guidelines for AI evaluation on how to score the metric.
479
771
  */
480
- guidelines?: string | null;
772
+ guidelines?: string;
481
773
 
482
774
  /**
483
775
  * The temperature for AI evaluation (0-2).
@@ -534,7 +826,7 @@ export declare namespace MetricCreateParams {
534
826
  description?: string | null;
535
827
 
536
828
  /**
537
- * Optional guidelines for heuristic evaluation logic.
829
+ * Guidelines for heuristic evaluation logic.
538
830
  */
539
831
  guidelines?: string;
540
832
  }
@@ -544,6 +836,9 @@ export type MetricUpdateParams =
544
836
  | MetricUpdateParams.AIIntMetric
545
837
  | MetricUpdateParams.HumanIntMetric
546
838
  | MetricUpdateParams.HeuristicIntMetric
839
+ | MetricUpdateParams.AIFloatMetric
840
+ | MetricUpdateParams.HumanFloatMetric
841
+ | MetricUpdateParams.HeuristicFloatMetric
547
842
  | MetricUpdateParams.AIBooleanMetric
548
843
  | MetricUpdateParams.HumanBooleanMetric
549
844
  | MetricUpdateParams.HeuristicBooleanMetric;
@@ -573,7 +868,7 @@ export declare namespace MetricUpdateParams {
573
868
  /**
574
869
  * Guidelines for AI evaluation on how to score the metric.
575
870
  */
576
- guidelines?: string | null;
871
+ guidelines?: string;
577
872
 
578
873
  /**
579
874
  * The name of the Metric.
@@ -646,7 +941,7 @@ export declare namespace MetricUpdateParams {
646
941
  description?: string | null;
647
942
 
648
943
  /**
649
- * Optional guidelines for heuristic evaluation logic.
944
+ * Guidelines for heuristic evaluation logic.
650
945
  */
651
946
  guidelines?: string;
652
947
 
@@ -661,6 +956,118 @@ export declare namespace MetricUpdateParams {
661
956
  passingThreshold?: number;
662
957
  }
663
958
 
959
+ export interface AIFloatMetric {
960
+ /**
961
+ * AI-based evaluation type.
962
+ */
963
+ evalType: 'ai';
964
+
965
+ /**
966
+ * Float output type (0-1).
967
+ */
968
+ outputType: 'float';
969
+
970
+ /**
971
+ * The description of the Metric.
972
+ */
973
+ description?: string | null;
974
+
975
+ /**
976
+ * The AI model to use for evaluation.
977
+ */
978
+ evalModelName?: string;
979
+
980
+ /**
981
+ * Guidelines for AI evaluation on how to score the metric.
982
+ */
983
+ guidelines?: string;
984
+
985
+ /**
986
+ * The name of the Metric.
987
+ */
988
+ name?: string;
989
+
990
+ /**
991
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
992
+ */
993
+ passingThreshold?: number;
994
+
995
+ /**
996
+ * The complete prompt template for AI evaluation. Should include placeholders for
997
+ * dynamic content.
998
+ */
999
+ promptTemplate?: string;
1000
+
1001
+ /**
1002
+ * The temperature for AI evaluation (0-2).
1003
+ */
1004
+ temperature?: number;
1005
+ }
1006
+
1007
+ export interface HumanFloatMetric {
1008
+ /**
1009
+ * Human-based evaluation type.
1010
+ */
1011
+ evalType: 'human';
1012
+
1013
+ /**
1014
+ * Float output type (0-1).
1015
+ */
1016
+ outputType: 'float';
1017
+
1018
+ /**
1019
+ * The description of the Metric.
1020
+ */
1021
+ description?: string | null;
1022
+
1023
+ /**
1024
+ * Guidelines for human evaluators.
1025
+ */
1026
+ guidelines?: string;
1027
+
1028
+ /**
1029
+ * The name of the Metric.
1030
+ */
1031
+ name?: string;
1032
+
1033
+ /**
1034
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
1035
+ */
1036
+ passingThreshold?: number;
1037
+ }
1038
+
1039
+ export interface HeuristicFloatMetric {
1040
+ /**
1041
+ * Heuristic-based evaluation type.
1042
+ */
1043
+ evalType: 'heuristic';
1044
+
1045
+ /**
1046
+ * Float output type (0-1).
1047
+ */
1048
+ outputType: 'float';
1049
+
1050
+ /**
1051
+ * The description of the Metric.
1052
+ */
1053
+ description?: string | null;
1054
+
1055
+ /**
1056
+ * Guidelines for heuristic evaluation logic.
1057
+ */
1058
+ guidelines?: string;
1059
+
1060
+ /**
1061
+ * The name of the Metric.
1062
+ */
1063
+ name?: string;
1064
+
1065
+ /**
1066
+ * Threshold for determining pass/fail from float scores (0.0-1.0).
1067
+ */
1068
+ passingThreshold?: number;
1069
+ }
1070
+
664
1071
  export interface AIBooleanMetric {
665
1072
  /**
666
1073
  * AI-based evaluation type.
@@ -685,7 +1092,7 @@ export declare namespace MetricUpdateParams {
685
1092
  /**
686
1093
  * Guidelines for AI evaluation on how to score the metric.
687
1094
  */
688
- guidelines?: string | null;
1095
+ guidelines?: string;
689
1096
 
690
1097
  /**
691
1098
  * The name of the Metric.
@@ -748,7 +1155,7 @@ export declare namespace MetricUpdateParams {
748
1155
  description?: string | null;
749
1156
 
750
1157
  /**
751
- * Optional guidelines for heuristic evaluation logic.
1158
+ * Guidelines for heuristic evaluation logic.
752
1159
  */
753
1160
  guidelines?: string;
754
1161
 
@@ -759,10 +1166,14 @@ export declare namespace MetricUpdateParams {
759
1166
  }
760
1167
  }
761
1168
 
1169
+ export interface MetricListParams extends PaginatedResponseParams {}
1170
+
762
1171
  export declare namespace Metrics {
763
1172
  export {
764
1173
  type Metric as Metric,
1174
+ type MetricsPaginatedResponse as MetricsPaginatedResponse,
765
1175
  type MetricCreateParams as MetricCreateParams,
766
1176
  type MetricUpdateParams as MetricUpdateParams,
1177
+ type MetricListParams as MetricListParams,
767
1178
  };
768
1179
  }