scorecard-ai 1.0.0-alpha.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/README.md +48 -19
  3. package/client.d.mts +4 -4
  4. package/client.d.mts.map +1 -1
  5. package/client.d.ts +4 -4
  6. package/client.d.ts.map +1 -1
  7. package/client.js +7 -5
  8. package/client.js.map +1 -1
  9. package/client.mjs +7 -5
  10. package/client.mjs.map +1 -1
  11. package/internal/tslib.js +6 -6
  12. package/lib/runAndEvaluate.d.mts +20 -8
  13. package/lib/runAndEvaluate.d.mts.map +1 -1
  14. package/lib/runAndEvaluate.d.ts +20 -8
  15. package/lib/runAndEvaluate.d.ts.map +1 -1
  16. package/lib/runAndEvaluate.js +31 -21
  17. package/lib/runAndEvaluate.js.map +1 -1
  18. package/lib/runAndEvaluate.mjs +31 -21
  19. package/lib/runAndEvaluate.mjs.map +1 -1
  20. package/package.json +1 -1
  21. package/resources/index.d.mts +2 -2
  22. package/resources/index.d.mts.map +1 -1
  23. package/resources/index.d.ts +2 -2
  24. package/resources/index.d.ts.map +1 -1
  25. package/resources/index.js +4 -4
  26. package/resources/index.js.map +1 -1
  27. package/resources/index.mjs +2 -2
  28. package/resources/index.mjs.map +1 -1
  29. package/resources/metrics.d.mts +611 -0
  30. package/resources/metrics.d.mts.map +1 -0
  31. package/resources/metrics.d.ts +611 -0
  32. package/resources/metrics.d.ts.map +1 -0
  33. package/resources/metrics.js +52 -0
  34. package/resources/metrics.js.map +1 -0
  35. package/resources/metrics.mjs +48 -0
  36. package/resources/metrics.mjs.map +1 -0
  37. package/resources/runs.d.mts +5 -5
  38. package/resources/runs.d.mts.map +1 -1
  39. package/resources/runs.d.ts +5 -5
  40. package/resources/runs.d.ts.map +1 -1
  41. package/resources/runs.js +1 -1
  42. package/resources/runs.mjs +1 -1
  43. package/resources/systems/index.d.mts +3 -0
  44. package/resources/systems/index.d.mts.map +1 -0
  45. package/resources/systems/index.d.ts +3 -0
  46. package/resources/systems/index.d.ts.map +1 -0
  47. package/resources/systems/index.js +9 -0
  48. package/resources/systems/index.js.map +1 -0
  49. package/resources/systems/index.mjs +4 -0
  50. package/resources/systems/index.mjs.map +1 -0
  51. package/resources/systems/systems.d.mts +229 -0
  52. package/resources/systems/systems.d.mts.map +1 -0
  53. package/resources/systems/systems.d.ts +229 -0
  54. package/resources/systems/systems.d.ts.map +1 -0
  55. package/resources/systems/systems.js +151 -0
  56. package/resources/systems/systems.js.map +1 -0
  57. package/resources/systems/systems.mjs +146 -0
  58. package/resources/systems/systems.mjs.map +1 -0
  59. package/resources/systems/versions.d.mts +132 -0
  60. package/resources/systems/versions.d.mts.map +1 -0
  61. package/resources/systems/versions.d.ts +132 -0
  62. package/resources/systems/versions.d.ts.map +1 -0
  63. package/resources/{system-configs.js → systems/versions.js} +26 -25
  64. package/resources/systems/versions.js.map +1 -0
  65. package/resources/{system-configs.mjs → systems/versions.mjs} +24 -23
  66. package/resources/systems/versions.mjs.map +1 -0
  67. package/resources/systems.d.mts +1 -224
  68. package/resources/systems.d.mts.map +1 -1
  69. package/resources/systems.d.ts +1 -224
  70. package/resources/systems.d.ts.map +1 -1
  71. package/resources/systems.js +2 -139
  72. package/resources/systems.js.map +1 -1
  73. package/resources/systems.mjs +1 -137
  74. package/resources/systems.mjs.map +1 -1
  75. package/resources/testsets.d.mts +1 -1
  76. package/resources/testsets.d.ts +1 -1
  77. package/resources/testsets.js +1 -1
  78. package/resources/testsets.mjs +1 -1
  79. package/src/client.ts +22 -27
  80. package/src/lib/runAndEvaluate.ts +52 -27
  81. package/src/resources/index.ts +2 -8
  82. package/src/resources/metrics.ts +768 -0
  83. package/src/resources/runs.ts +5 -5
  84. package/src/resources/systems/index.ts +18 -0
  85. package/src/resources/systems/systems.ts +299 -0
  86. package/src/resources/systems/versions.ts +166 -0
  87. package/src/resources/systems.ts +1 -277
  88. package/src/resources/testsets.ts +1 -1
  89. package/src/version.ts +1 -1
  90. package/version.d.mts +1 -1
  91. package/version.d.mts.map +1 -1
  92. package/version.d.ts +1 -1
  93. package/version.d.ts.map +1 -1
  94. package/version.js +1 -1
  95. package/version.js.map +1 -1
  96. package/version.mjs +1 -1
  97. package/version.mjs.map +1 -1
  98. package/resources/system-configs.d.mts +0 -148
  99. package/resources/system-configs.d.mts.map +0 -1
  100. package/resources/system-configs.d.ts +0 -148
  101. package/resources/system-configs.d.ts.map +0 -1
  102. package/resources/system-configs.js.map +0 -1
  103. package/resources/system-configs.mjs.map +0 -1
  104. package/src/resources/system-configs.ts +0 -189
@@ -1,143 +1,6 @@
1
1
  "use strict";
2
2
  // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
3
3
  Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.Systems = void 0;
5
- const resource_1 = require("../core/resource.js");
6
- const pagination_1 = require("../core/pagination.js");
7
- const path_1 = require("../internal/utils/path.js");
8
- class Systems extends resource_1.APIResource {
9
- /**
10
- * Create a new system definition that specifies the interface contracts for a
11
- * component you want to evaluate.
12
- *
13
- * A system acts as a template that defines three key contracts through JSON
14
- * Schemas:
15
- *
16
- * 1. Input Schema: What data your system accepts (e.g., user queries, context
17
- * documents)
18
- * 2. Output Schema: What data your system produces (e.g., responses, confidence
19
- * scores)
20
- * 3. Config Schema: What parameters can be adjusted (e.g., model selection,
21
- * temperature)
22
- *
23
- * This separation lets you evaluate any system as a black box, focusing on its
24
- * interface rather than implementation details.
25
- *
26
- * @example
27
- * ```ts
28
- * const system = await client.systems.create('314', {
29
- * configSchema: {
30
- * type: 'object',
31
- * properties: {
32
- * temperature: { type: 'number' },
33
- * maxTokens: { type: 'integer' },
34
- * model: { type: 'string', enum: ['gpt-4', 'gpt-4-turbo'] },
35
- * },
36
- * required: ['model'],
37
- * },
38
- * description: 'Production chatbot powered by GPT-4',
39
- * inputSchema: {
40
- * type: 'object',
41
- * properties: {
42
- * messages: {
43
- * type: 'array',
44
- * items: {
45
- * type: 'object',
46
- * properties: {
47
- * role: { type: 'string', enum: ['system', 'user', 'assistant'] },
48
- * content: { type: 'string' },
49
- * },
50
- * required: ['role', 'content'],
51
- * },
52
- * },
53
- * },
54
- * required: ['messages'],
55
- * },
56
- * name: 'GPT-4 Chatbot',
57
- * outputSchema: {
58
- * type: 'object',
59
- * properties: { response: { type: 'string' } },
60
- * required: ['response'],
61
- * },
62
- * });
63
- * ```
64
- */
65
- create(projectID, body, options) {
66
- return this._client.post((0, path_1.path) `/projects/${projectID}/systems`, { body, ...options });
67
- }
68
- /**
69
- * Update an existing system definition. Only the fields provided in the request
70
- * body will be updated. If a field is provided, the new content will replace the
71
- * existing content. If a field is not provided, the existing content will remain
72
- * unchanged.
73
- *
74
- * When updating schemas:
75
- *
76
- * - The system will accept your changes regardless of compatibility with existing
77
- * configurations
78
- * - Schema updates won't invalidate existing evaluations or configurations
79
- * - For significant redesigns, creating a new system definition provides a cleaner
80
- * separation
81
- *
82
- * @example
83
- * ```ts
84
- * const system = await client.systems.update(
85
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
86
- * {
87
- * description:
88
- * 'Updated production chatbot powered by GPT-4 Turbo',
89
- * name: 'GPT-4 Turbo Chatbot',
90
- * },
91
- * );
92
- * ```
93
- */
94
- update(systemID, body = {}, options) {
95
- return this._client.patch((0, path_1.path) `/systems/${systemID}`, { body, ...options });
96
- }
97
- /**
98
- * Retrieve a paginated list of all systems. Systems are ordered by creation date.
99
- *
100
- * @example
101
- * ```ts
102
- * // Automatically fetches more pages as needed.
103
- * for await (const system of client.systems.list('314')) {
104
- * // ...
105
- * }
106
- * ```
107
- */
108
- list(projectID, query = {}, options) {
109
- return this._client.getAPIList((0, path_1.path) `/projects/${projectID}/systems`, (pagination_1.PaginatedResponse), {
110
- query,
111
- ...options,
112
- });
113
- }
114
- /**
115
- * Delete a system definition by ID. This will not delete associated system
116
- * configurations.
117
- *
118
- * @example
119
- * ```ts
120
- * const system = await client.systems.delete(
121
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
122
- * );
123
- * ```
124
- */
125
- delete(systemID, options) {
126
- return this._client.delete((0, path_1.path) `/systems/${systemID}`, options);
127
- }
128
- /**
129
- * Retrieve a specific system by ID.
130
- *
131
- * @example
132
- * ```ts
133
- * const system = await client.systems.get(
134
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
135
- * );
136
- * ```
137
- */
138
- get(systemID, options) {
139
- return this._client.get((0, path_1.path) `/systems/${systemID}`, options);
140
- }
141
- }
142
- exports.Systems = Systems;
4
+ const tslib_1 = require("../internal/tslib.js");
5
+ tslib_1.__exportStar(require("./systems/index.js"), exports);
143
6
  //# sourceMappingURL=systems.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"systems.js","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":";AAAA,sFAAsF;;;AAEtF,kDAA+C;AAE/C,sDAAkG;AAElG,oDAA8C;AAE9C,MAAa,OAAQ,SAAQ,sBAAW;IACtC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAuDG;IACH,MAAM,CAAC,SAAiB,EAAE,IAAwB,EAAE,OAAwB;QAC1E,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAA,WAAI,EAAA,aAAa,SAAS,UAAU,EAAE,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACvF,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;IACH,MAAM,CACJ,QAAgB,EAChB,OAA8C,EAAE,EAChD,OAAwB;QAExB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAA,WAAI,EAAA,YAAY,QAAQ,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;;;;;;;;;OAUG;IACH,IAAI,CACF,SAAiB,EACjB,QAA6C,EAAE,EAC/C,OAAwB;QAExB,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,IAAA,WAAI,EAAA,aAAa,SAAS,UAAU,EAAE,CAAA,8BAAyB,CAAA,EAAE;YAC9F,KAAK;YACL,GAAG,OAAO;SACX,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,MAAM,CAAC,QAAgB,EAAE,OAAwB;QAC/C,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAA,WAAI,EAAA,YAAY,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IAClE,CAAC;IAED;;;;;;;;;OASG;IACH,GAAG,CAAC,QAAgB,EAAE,OAAwB;QAC5C,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAA,WAAI,EAAA,YAAY,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;CACF;AAjJD,0BAiJC"}
1
+ {"version":3,"file":"systems.js","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":";AAAA,sFAAsF;;;AAEtF,6DAAgC"}
@@ -1,139 +1,3 @@
1
1
  // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
- import { APIResource } from "../core/resource.mjs";
3
- import { PaginatedResponse } from "../core/pagination.mjs";
4
- import { path } from "../internal/utils/path.mjs";
5
- export class Systems extends APIResource {
6
- /**
7
- * Create a new system definition that specifies the interface contracts for a
8
- * component you want to evaluate.
9
- *
10
- * A system acts as a template that defines three key contracts through JSON
11
- * Schemas:
12
- *
13
- * 1. Input Schema: What data your system accepts (e.g., user queries, context
14
- * documents)
15
- * 2. Output Schema: What data your system produces (e.g., responses, confidence
16
- * scores)
17
- * 3. Config Schema: What parameters can be adjusted (e.g., model selection,
18
- * temperature)
19
- *
20
- * This separation lets you evaluate any system as a black box, focusing on its
21
- * interface rather than implementation details.
22
- *
23
- * @example
24
- * ```ts
25
- * const system = await client.systems.create('314', {
26
- * configSchema: {
27
- * type: 'object',
28
- * properties: {
29
- * temperature: { type: 'number' },
30
- * maxTokens: { type: 'integer' },
31
- * model: { type: 'string', enum: ['gpt-4', 'gpt-4-turbo'] },
32
- * },
33
- * required: ['model'],
34
- * },
35
- * description: 'Production chatbot powered by GPT-4',
36
- * inputSchema: {
37
- * type: 'object',
38
- * properties: {
39
- * messages: {
40
- * type: 'array',
41
- * items: {
42
- * type: 'object',
43
- * properties: {
44
- * role: { type: 'string', enum: ['system', 'user', 'assistant'] },
45
- * content: { type: 'string' },
46
- * },
47
- * required: ['role', 'content'],
48
- * },
49
- * },
50
- * },
51
- * required: ['messages'],
52
- * },
53
- * name: 'GPT-4 Chatbot',
54
- * outputSchema: {
55
- * type: 'object',
56
- * properties: { response: { type: 'string' } },
57
- * required: ['response'],
58
- * },
59
- * });
60
- * ```
61
- */
62
- create(projectID, body, options) {
63
- return this._client.post(path `/projects/${projectID}/systems`, { body, ...options });
64
- }
65
- /**
66
- * Update an existing system definition. Only the fields provided in the request
67
- * body will be updated. If a field is provided, the new content will replace the
68
- * existing content. If a field is not provided, the existing content will remain
69
- * unchanged.
70
- *
71
- * When updating schemas:
72
- *
73
- * - The system will accept your changes regardless of compatibility with existing
74
- * configurations
75
- * - Schema updates won't invalidate existing evaluations or configurations
76
- * - For significant redesigns, creating a new system definition provides a cleaner
77
- * separation
78
- *
79
- * @example
80
- * ```ts
81
- * const system = await client.systems.update(
82
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
83
- * {
84
- * description:
85
- * 'Updated production chatbot powered by GPT-4 Turbo',
86
- * name: 'GPT-4 Turbo Chatbot',
87
- * },
88
- * );
89
- * ```
90
- */
91
- update(systemID, body = {}, options) {
92
- return this._client.patch(path `/systems/${systemID}`, { body, ...options });
93
- }
94
- /**
95
- * Retrieve a paginated list of all systems. Systems are ordered by creation date.
96
- *
97
- * @example
98
- * ```ts
99
- * // Automatically fetches more pages as needed.
100
- * for await (const system of client.systems.list('314')) {
101
- * // ...
102
- * }
103
- * ```
104
- */
105
- list(projectID, query = {}, options) {
106
- return this._client.getAPIList(path `/projects/${projectID}/systems`, (PaginatedResponse), {
107
- query,
108
- ...options,
109
- });
110
- }
111
- /**
112
- * Delete a system definition by ID. This will not delete associated system
113
- * configurations.
114
- *
115
- * @example
116
- * ```ts
117
- * const system = await client.systems.delete(
118
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
119
- * );
120
- * ```
121
- */
122
- delete(systemID, options) {
123
- return this._client.delete(path `/systems/${systemID}`, options);
124
- }
125
- /**
126
- * Retrieve a specific system by ID.
127
- *
128
- * @example
129
- * ```ts
130
- * const system = await client.systems.get(
131
- * '12345678-0a8b-4f66-b6f3-2ddcfa097257',
132
- * );
133
- * ```
134
- */
135
- get(systemID, options) {
136
- return this._client.get(path `/systems/${systemID}`, options);
137
- }
138
- }
2
+ export * from "./systems/index.mjs";
139
3
  //# sourceMappingURL=systems.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF;OAE/E,EAAE,WAAW,EAAE;OAEf,EAAe,iBAAiB,EAAgC;OAEhE,EAAE,IAAI,EAAE;AAEf,MAAM,OAAO,OAAQ,SAAQ,WAAW;IACtC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAuDG;IACH,MAAM,CAAC,SAAiB,EAAE,IAAwB,EAAE,OAAwB;QAC1E,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAA,aAAa,SAAS,UAAU,EAAE,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACvF,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;IACH,MAAM,CACJ,QAAgB,EAChB,OAA8C,EAAE,EAChD,OAAwB;QAExB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAA,YAAY,QAAQ,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;;;;;;;;;OAUG;IACH,IAAI,CACF,SAAiB,EACjB,QAA6C,EAAE,EAC/C,OAAwB;QAExB,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAA,aAAa,SAAS,UAAU,EAAE,CAAA,iBAAyB,CAAA,EAAE;YAC9F,KAAK;YACL,GAAG,OAAO;SACX,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,MAAM,CAAC,QAAgB,EAAE,OAAwB;QAC/C,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAA,YAAY,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IAClE,CAAC;IAED;;;;;;;;;OASG;IACH,GAAG,CAAC,QAAgB,EAAE,OAAwB;QAC5C,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAA,YAAY,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;CACF"}
1
+ {"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF"}
@@ -75,7 +75,7 @@ export declare class Testsets extends APIResource {
75
75
  */
76
76
  delete(testsetID: string, options?: RequestOptions): APIPromise<TestsetDeleteResponse>;
77
77
  /**
78
- * Get Testset by ID
78
+ * Get Testset
79
79
  *
80
80
  * @example
81
81
  * ```ts
@@ -75,7 +75,7 @@ export declare class Testsets extends APIResource {
75
75
  */
76
76
  delete(testsetID: string, options?: RequestOptions): APIPromise<TestsetDeleteResponse>;
77
77
  /**
78
- * Get Testset by ID
78
+ * Get Testset
79
79
  *
80
80
  * @example
81
81
  * ```ts
@@ -89,7 +89,7 @@ class Testsets extends resource_1.APIResource {
89
89
  return this._client.delete((0, path_1.path) `/testsets/${testsetID}`, options);
90
90
  }
91
91
  /**
92
- * Get Testset by ID
92
+ * Get Testset
93
93
  *
94
94
  * @example
95
95
  * ```ts
@@ -86,7 +86,7 @@ export class Testsets extends APIResource {
86
86
  return this._client.delete(path `/testsets/${testsetID}`, options);
87
87
  }
88
88
  /**
89
- * Get Testset by ID
89
+ * Get Testset
90
90
  *
91
91
  * @example
92
92
  * ```ts
package/src/client.ts CHANGED
@@ -22,6 +22,7 @@ import { APIPromise } from './core/api-promise';
22
22
  import { type Fetch } from './internal/builtin-types';
23
23
  import { HeadersLike, NullableHeaders, buildHeaders } from './internal/headers';
24
24
  import { FinalRequestOptions, RequestOptions } from './internal/request-options';
25
+ import { Metric, MetricCreateParams, MetricUpdateParams, Metrics } from './resources/metrics';
25
26
  import {
26
27
  Project,
27
28
  ProjectCreateParams,
@@ -32,22 +33,6 @@ import {
32
33
  import { Record as RecordsAPIRecord, RecordCreateParams, Records } from './resources/records';
33
34
  import { Run, RunCreateParams, Runs } from './resources/runs';
34
35
  import { Score, ScoreUpsertParams, Scores } from './resources/scores';
35
- import {
36
- SystemConfig,
37
- SystemConfigCreateParams,
38
- SystemConfigListParams,
39
- SystemConfigs,
40
- SystemConfigsPaginatedResponse,
41
- } from './resources/system-configs';
42
- import {
43
- System,
44
- SystemCreateParams,
45
- SystemDeleteResponse,
46
- SystemListParams,
47
- SystemUpdateParams,
48
- Systems,
49
- SystemsPaginatedResponse,
50
- } from './resources/systems';
51
36
  import {
52
37
  Testcase,
53
38
  TestcaseCreateParams,
@@ -71,6 +56,15 @@ import {
71
56
  import { readEnv } from './internal/utils/env';
72
57
  import { formatRequestDetails, loggerFor } from './internal/utils/log';
73
58
  import { isEmptyObj } from './internal/utils/values';
59
+ import {
60
+ System,
61
+ SystemCreateParams,
62
+ SystemDeleteResponse,
63
+ SystemListParams,
64
+ SystemUpdateParams,
65
+ Systems,
66
+ SystemsPaginatedResponse,
67
+ } from './resources/systems/systems';
74
68
 
75
69
  const environments = {
76
70
  production: 'https://api2.scorecard.io/api/v2',
@@ -216,6 +210,8 @@ export class Scorecard {
216
210
  "The SCORECARD_API_KEY environment variable is missing or empty; either provide it, or instantiate the Scorecard client with an apiKey option, like new Scorecard({ apiKey: 'My API Key' }).",
217
211
  );
218
212
  }
213
+ // Support both API keys (which start with 'ak_') and legacy JWT bearer tokens
214
+ apiKey = !apiKey || apiKey.startsWith('ak_') ? apiKey : `Bearer ${apiKey}`;
219
215
 
220
216
  const options: ClientOptions = {
221
217
  apiKey,
@@ -278,7 +274,7 @@ export class Scorecard {
278
274
  }
279
275
 
280
276
  protected authHeaders(opts: FinalRequestOptions): NullableHeaders | undefined {
281
- return buildHeaders([{ Authorization: `Bearer ${this.apiKey}` }]);
277
+ return buildHeaders([{ Authorization: this.apiKey }]);
282
278
  }
283
279
 
284
280
  /**
@@ -801,19 +797,19 @@ export class Scorecard {
801
797
  testsets: API.Testsets = new API.Testsets(this);
802
798
  testcases: API.Testcases = new API.Testcases(this);
803
799
  runs: API.Runs = new API.Runs(this);
800
+ metrics: API.Metrics = new API.Metrics(this);
804
801
  records: API.Records = new API.Records(this);
805
802
  scores: API.Scores = new API.Scores(this);
806
803
  systems: API.Systems = new API.Systems(this);
807
- systemConfigs: API.SystemConfigs = new API.SystemConfigs(this);
808
804
  }
809
805
  Scorecard.Projects = Projects;
810
806
  Scorecard.Testsets = Testsets;
811
807
  Scorecard.Testcases = Testcases;
812
808
  Scorecard.Runs = Runs;
809
+ Scorecard.Metrics = Metrics;
813
810
  Scorecard.Records = Records;
814
811
  Scorecard.Scores = Scores;
815
812
  Scorecard.Systems = Systems;
816
- Scorecard.SystemConfigs = SystemConfigs;
817
813
  export declare namespace Scorecard {
818
814
  export type RequestOptions = Opts.RequestOptions;
819
815
 
@@ -855,6 +851,13 @@ export declare namespace Scorecard {
855
851
 
856
852
  export { Runs as Runs, type Run as Run, type RunCreateParams as RunCreateParams };
857
853
 
854
+ export {
855
+ Metrics as Metrics,
856
+ type Metric as Metric,
857
+ type MetricCreateParams as MetricCreateParams,
858
+ type MetricUpdateParams as MetricUpdateParams,
859
+ };
860
+
858
861
  export {
859
862
  Records as Records,
860
863
  type RecordsAPIRecord as Record,
@@ -873,13 +876,5 @@ export declare namespace Scorecard {
873
876
  type SystemListParams as SystemListParams,
874
877
  };
875
878
 
876
- export {
877
- SystemConfigs as SystemConfigs,
878
- type SystemConfig as SystemConfig,
879
- type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
880
- type SystemConfigCreateParams as SystemConfigCreateParams,
881
- type SystemConfigListParams as SystemConfigListParams,
882
- };
883
-
884
879
  export type APIError = API.APIError;
885
880
  }
@@ -1,5 +1,7 @@
1
1
  import { Scorecard } from '../client';
2
- import { SystemConfig, Testcase } from '../resources';
2
+ import { Testcase } from '../resources';
3
+ import { ScorecardError } from '../error';
4
+ import { SystemVersion } from '../resources/systems';
3
5
 
4
6
  type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
5
7
  // Project and metrics are always required
@@ -14,17 +16,17 @@ type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput ex
14
16
  */
15
17
  metricIds: Array<string>;
16
18
  } & (
17
- | // If system config is provided, the system function receives a system config
19
+ | // If systemVersionId is provided, the system function receives a system version
18
20
  {
19
21
  /**
20
- * The ID of the System Configuration to use for the run.
22
+ * The ID of the SystemVersion to use for the run.
21
23
  */
22
- systemConfigId: string;
24
+ systemVersionId: string;
23
25
 
24
26
  /**
25
27
  * The system function to run on the Testset.
26
28
  */
27
- system: (testcaseInput: SystemInput, systemConfig: SystemConfig) => Promise<SystemOutput>;
29
+ system: (testcaseInput: SystemInput, systemVersion: SystemVersion) => Promise<SystemOutput>;
28
30
  }
29
31
  // Otherwise, the system function receives only the testcase input
30
32
  | {
@@ -68,17 +70,17 @@ async function* testcaseIterator<SystemInput extends Record<string, any>>(
68
70
  if ('testsetId' in args) {
69
71
  for await (const testcase of scorecard.testcases.list(args.testsetId)) {
70
72
  yield {
71
- ...testcase,
72
73
  testcaseId: testcase.id,
73
74
  inputs: testcase.inputs as SystemInput,
75
+ expected: testcase.expected,
74
76
  };
75
77
  }
76
78
  } else {
77
79
  for (const testcase of args.testcases) {
78
80
  yield {
79
- ...testcase,
80
81
  testcaseId: 'id' in testcase ? testcase.id : null,
81
82
  inputs: testcase.inputs as SystemInput,
83
+ expected: testcase.expected,
82
84
  };
83
85
  }
84
86
  }
@@ -92,9 +94,10 @@ async function* testcaseIterator<SystemInput extends Record<string, any>>(
92
94
  * @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
93
95
  * @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
94
96
  * @param args.metricIds The IDs of the Metrics to use for evaluation.
95
- * @param args.systemConfigId The optional ID of the System Configuration to associate with the Run.
97
+ * @param args.systemVersionId The optional ID of the System Version to associate with the Run.
96
98
  * @param args.system The system to run on the Testset.
97
99
  * @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
100
+ * @param options.trials The number of times to run the system on each Testcase. 1 by default.
98
101
  */
99
102
  export async function runAndEvaluate<
100
103
  SystemInput extends Record<string, any>,
@@ -103,44 +106,66 @@ export async function runAndEvaluate<
103
106
  scorecard: Scorecard,
104
107
  args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
105
108
  options: {
106
- runInParallel: boolean;
109
+ /**
110
+ * Whether to call `args.system` in parallel. False (sequential) by default.
111
+ */
112
+ runInParallel?: boolean;
113
+ /**
114
+ * The number of times to run the system on each Testcase. 1 by default.
115
+ */
116
+ trials?: number;
107
117
  } = {
108
118
  runInParallel: false,
119
+ trials: 1,
109
120
  },
110
- ): Promise<Pick<Scorecard.Runs.Run, 'id'> & { url: string }> {
111
- const hasSystemConfig = 'systemConfigId' in args;
121
+ ): Promise<{
122
+ /** The ID of the Run. */
123
+ id: string;
124
+ /** The URL of the Run. */
125
+ url: string;
126
+ }> {
127
+ const runInParallel = options.runInParallel ?? false;
128
+ const trials = options.trials ?? 1;
129
+ if (!(Number.isInteger(trials) && trials >= 1)) {
130
+ throw new ScorecardError('trials must be a positive integer');
131
+ }
132
+
133
+ const hasSystemVersion = 'systemVersionId' in args;
112
134
  const hasTestset = 'testsetId' in args;
113
135
 
114
136
  const runPromise = scorecard.runs.create(args.projectId, {
115
137
  testsetId: hasTestset ? args.testsetId : null,
116
138
  metricIds: args.metricIds,
117
- ...(hasSystemConfig ?
139
+ ...(hasSystemVersion ?
118
140
  {
119
- systemConfigId: args.systemConfigId,
141
+ systemVersionId: args.systemVersionId,
120
142
  }
121
143
  : null),
122
144
  });
123
- const systemConfig = hasSystemConfig ? await scorecard.systemConfigs.get(args.systemConfigId) : null;
145
+ const systemVersion = hasSystemVersion ? await scorecard.systems.versions.get(args.systemVersionId) : null;
124
146
  const run = await runPromise;
125
147
 
126
148
  const recordPromises: Array<Promise<unknown>> = [];
127
149
 
128
150
  for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
129
- const modelResponsePromise = hasSystemConfig ? args.system(inputs, systemConfig!) : args.system(inputs);
151
+ for (let i = 0; i < trials; i++) {
152
+ const modelResponsePromise =
153
+ hasSystemVersion ? args.system(inputs, systemVersion!) : args.system(inputs);
130
154
 
131
- function createRecord(outputs: SystemOutput): Promise<unknown> {
132
- return scorecard.records.create(run.id, {
133
- inputs,
134
- expected,
135
- outputs,
136
- ...(testcaseId != null ? { testcaseId } : null),
137
- });
138
- }
155
+ function createRecord(outputs: SystemOutput): Promise<unknown> {
156
+ return scorecard.records.create(run.id, {
157
+ inputs,
158
+ expected,
159
+ outputs,
160
+ ...(testcaseId != null ? { testcaseId } : null),
161
+ });
162
+ }
139
163
 
140
- if (options.runInParallel) {
141
- recordPromises.push(modelResponsePromise.then(createRecord));
142
- } else {
143
- recordPromises.push(createRecord(await modelResponsePromise));
164
+ if (runInParallel) {
165
+ recordPromises.push(modelResponsePromise.then(createRecord));
166
+ } else {
167
+ recordPromises.push(createRecord(await modelResponsePromise));
168
+ }
144
169
  }
145
170
  }
146
171
  // Wait until all the Records are created
@@ -1,6 +1,7 @@
1
1
  // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  export * from './shared';
4
+ export { Metrics, type Metric, type MetricCreateParams, type MetricUpdateParams } from './metrics';
4
5
  export {
5
6
  Projects,
6
7
  type Project,
@@ -11,13 +12,6 @@ export {
11
12
  export { Records, type Record, type RecordCreateParams } from './records';
12
13
  export { Runs, type Run, type RunCreateParams } from './runs';
13
14
  export { Scores, type Score, type ScoreUpsertParams } from './scores';
14
- export {
15
- SystemConfigs,
16
- type SystemConfig,
17
- type SystemConfigCreateParams,
18
- type SystemConfigListParams,
19
- type SystemConfigsPaginatedResponse,
20
- } from './system-configs';
21
15
  export {
22
16
  Systems,
23
17
  type System,
@@ -26,7 +20,7 @@ export {
26
20
  type SystemUpdateParams,
27
21
  type SystemListParams,
28
22
  type SystemsPaginatedResponse,
29
- } from './systems';
23
+ } from './systems/systems';
30
24
  export {
31
25
  Testcases,
32
26
  type Testcase,