scorecard-ai 1.0.0-alpha.7 → 1.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +1 -1
- package/client.d.mts +2 -2
- package/client.d.mts.map +1 -1
- package/client.d.ts +2 -2
- package/client.d.ts.map +1 -1
- package/client.js.map +1 -1
- package/client.mjs.map +1 -1
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +1 -5
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +1 -5
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/records.d.mts +11 -9
- package/resources/records.d.mts.map +1 -1
- package/resources/records.d.ts +11 -9
- package/resources/records.d.ts.map +1 -1
- package/resources/records.js +3 -1
- package/resources/records.js.map +1 -1
- package/resources/records.mjs +3 -1
- package/resources/records.mjs.map +1 -1
- package/resources/runs.d.mts +1 -28
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +1 -28
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +0 -13
- package/resources/runs.js.map +1 -1
- package/resources/runs.mjs +0 -13
- package/resources/runs.mjs.map +1 -1
- package/resources/testcases.d.mts +10 -18
- package/resources/testcases.d.mts.map +1 -1
- package/resources/testcases.d.ts +10 -18
- package/resources/testcases.d.ts.map +1 -1
- package/resources/testsets.d.mts +31 -31
- package/resources/testsets.d.mts.map +1 -1
- package/resources/testsets.d.ts +31 -31
- package/resources/testsets.d.ts.map +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +2 -8
- package/src/lib/runAndEvaluate.ts +1 -6
- package/src/resources/index.ts +1 -1
- package/src/resources/records.ts +13 -11
- package/src/resources/runs.ts +1 -53
- package/src/resources/testcases.ts +11 -19
- package/src/resources/testsets.ts +31 -31
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
package/resources/testsets.mjs
CHANGED
package/src/client.ts
CHANGED
|
@@ -30,7 +30,7 @@ import {
|
|
|
30
30
|
ProjectsPaginatedResponse,
|
|
31
31
|
} from './resources/projects';
|
|
32
32
|
import { Record as RecordsAPIRecord, RecordCreateParams, Records } from './resources/records';
|
|
33
|
-
import { Run, RunCreateParams,
|
|
33
|
+
import { Run, RunCreateParams, Runs } from './resources/runs';
|
|
34
34
|
import { Score, ScoreUpsertParams, Scores } from './resources/scores';
|
|
35
35
|
import {
|
|
36
36
|
SystemConfig,
|
|
@@ -840,13 +840,7 @@ export declare namespace Scorecard {
|
|
|
840
840
|
type TestcaseDeleteParams as TestcaseDeleteParams,
|
|
841
841
|
};
|
|
842
842
|
|
|
843
|
-
export {
|
|
844
|
-
Runs as Runs,
|
|
845
|
-
type Run as Run,
|
|
846
|
-
type RunUpdateResponse as RunUpdateResponse,
|
|
847
|
-
type RunCreateParams as RunCreateParams,
|
|
848
|
-
type RunUpdateParams as RunUpdateParams,
|
|
849
|
-
};
|
|
843
|
+
export { Runs as Runs, type Run as Run, type RunCreateParams as RunCreateParams };
|
|
850
844
|
|
|
851
845
|
export {
|
|
852
846
|
Records as Records,
|
|
@@ -35,7 +35,7 @@ export async function runAndEvaluate<SystemInput extends Object, SystemOutput ex
|
|
|
35
35
|
const promise = scorecard.records.create(run.id, {
|
|
36
36
|
testcaseId: testcase.id,
|
|
37
37
|
inputs: testcase.inputs,
|
|
38
|
-
|
|
38
|
+
expected: testcase.expected,
|
|
39
39
|
outputs: modelResponse as Record<string, unknown>,
|
|
40
40
|
});
|
|
41
41
|
recordPromises.push(promise);
|
|
@@ -43,11 +43,6 @@ export async function runAndEvaluate<SystemInput extends Object, SystemOutput ex
|
|
|
43
43
|
// Wait until all the Records are created
|
|
44
44
|
await Promise.all(recordPromises);
|
|
45
45
|
|
|
46
|
-
// Mark the Run as done with execution and ready for scoring.
|
|
47
|
-
await scorecard.runs.update(run.id, {
|
|
48
|
-
status: 'awaiting_scoring',
|
|
49
|
-
});
|
|
50
|
-
|
|
51
46
|
const runUrl = `https://app.getscorecard.ai/projects/${projectId}/runs/grades/${run.id}`;
|
|
52
47
|
|
|
53
48
|
return { id: run.id, url: runUrl };
|
package/src/resources/index.ts
CHANGED
|
@@ -9,7 +9,7 @@ export {
|
|
|
9
9
|
type ProjectsPaginatedResponse,
|
|
10
10
|
} from './projects';
|
|
11
11
|
export { Records, type Record, type RecordCreateParams } from './records';
|
|
12
|
-
export { Runs, type Run, type
|
|
12
|
+
export { Runs, type Run, type RunCreateParams } from './runs';
|
|
13
13
|
export { Scores, type Score, type ScoreUpsertParams } from './scores';
|
|
14
14
|
export {
|
|
15
15
|
SystemConfigs,
|
package/src/resources/records.ts
CHANGED
|
@@ -13,8 +13,10 @@ export class Records extends APIResource {
|
|
|
13
13
|
* @example
|
|
14
14
|
* ```ts
|
|
15
15
|
* const record = await client.records.create('135', {
|
|
16
|
+
* expected: {
|
|
17
|
+
* idealAnswer: 'Paris is the capital of France',
|
|
18
|
+
* },
|
|
16
19
|
* inputs: { question: 'What is the capital of France?' },
|
|
17
|
-
* labels: { idealAnswer: 'Paris is the capital of France' },
|
|
18
20
|
* outputs: { response: 'The capital of France is Paris.' },
|
|
19
21
|
* testcaseId: '248',
|
|
20
22
|
* });
|
|
@@ -35,15 +37,15 @@ export interface Record {
|
|
|
35
37
|
id: string;
|
|
36
38
|
|
|
37
39
|
/**
|
|
38
|
-
* The
|
|
39
|
-
* schema.
|
|
40
|
+
* The expected outputs for the Testcase.
|
|
40
41
|
*/
|
|
41
|
-
|
|
42
|
+
expected: BuiltinRecord<string, unknown>;
|
|
42
43
|
|
|
43
44
|
/**
|
|
44
|
-
* The
|
|
45
|
+
* The actual inputs sent to the system, which should match the system's input
|
|
46
|
+
* schema.
|
|
45
47
|
*/
|
|
46
|
-
|
|
48
|
+
inputs: BuiltinRecord<string, unknown>;
|
|
47
49
|
|
|
48
50
|
/**
|
|
49
51
|
* The actual outputs from the system.
|
|
@@ -63,15 +65,15 @@ export interface Record {
|
|
|
63
65
|
|
|
64
66
|
export interface RecordCreateParams {
|
|
65
67
|
/**
|
|
66
|
-
* The
|
|
67
|
-
* schema.
|
|
68
|
+
* The expected outputs for the Testcase.
|
|
68
69
|
*/
|
|
69
|
-
|
|
70
|
+
expected: BuiltinRecord<string, unknown>;
|
|
70
71
|
|
|
71
72
|
/**
|
|
72
|
-
* The
|
|
73
|
+
* The actual inputs sent to the system, which should match the system's input
|
|
74
|
+
* schema.
|
|
73
75
|
*/
|
|
74
|
-
|
|
76
|
+
inputs: BuiltinRecord<string, unknown>;
|
|
75
77
|
|
|
76
78
|
/**
|
|
77
79
|
* The actual outputs from the system.
|
package/src/resources/runs.ts
CHANGED
|
@@ -21,20 +21,6 @@ export class Runs extends APIResource {
|
|
|
21
21
|
create(projectID: string, body: RunCreateParams, options?: RequestOptions): APIPromise<Run> {
|
|
22
22
|
return this._client.post(path`/projects/${projectID}/runs`, { body, ...options });
|
|
23
23
|
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Update the status of a Run.
|
|
27
|
-
*
|
|
28
|
-
* @example
|
|
29
|
-
* ```ts
|
|
30
|
-
* const run = await client.runs.update('135', {
|
|
31
|
-
* status: 'awaiting_scoring',
|
|
32
|
-
* });
|
|
33
|
-
* ```
|
|
34
|
-
*/
|
|
35
|
-
update(runID: string, body: RunUpdateParams, options?: RequestOptions): APIPromise<RunUpdateResponse> {
|
|
36
|
-
return this._client.patch(path`/runs/${runID}`, { body, ...options });
|
|
37
|
-
}
|
|
38
24
|
}
|
|
39
25
|
|
|
40
26
|
/**
|
|
@@ -74,25 +60,6 @@ export interface Run {
|
|
|
74
60
|
systemConfigId?: string;
|
|
75
61
|
}
|
|
76
62
|
|
|
77
|
-
export interface RunUpdateResponse {
|
|
78
|
-
/**
|
|
79
|
-
* The ID of the Run.
|
|
80
|
-
*/
|
|
81
|
-
id: string;
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* The status of the Run.
|
|
85
|
-
*/
|
|
86
|
-
status:
|
|
87
|
-
| 'pending'
|
|
88
|
-
| 'awaiting_execution'
|
|
89
|
-
| 'running_execution'
|
|
90
|
-
| 'awaiting_scoring'
|
|
91
|
-
| 'running_scoring'
|
|
92
|
-
| 'awaiting_human_scoring'
|
|
93
|
-
| 'completed';
|
|
94
|
-
}
|
|
95
|
-
|
|
96
63
|
export interface RunCreateParams {
|
|
97
64
|
/**
|
|
98
65
|
* The IDs of the metrics this Run is using.
|
|
@@ -110,25 +77,6 @@ export interface RunCreateParams {
|
|
|
110
77
|
systemConfigId?: string;
|
|
111
78
|
}
|
|
112
79
|
|
|
113
|
-
export interface RunUpdateParams {
|
|
114
|
-
/**
|
|
115
|
-
* The status of the Run.
|
|
116
|
-
*/
|
|
117
|
-
status:
|
|
118
|
-
| 'pending'
|
|
119
|
-
| 'awaiting_execution'
|
|
120
|
-
| 'running_execution'
|
|
121
|
-
| 'awaiting_scoring'
|
|
122
|
-
| 'running_scoring'
|
|
123
|
-
| 'awaiting_human_scoring'
|
|
124
|
-
| 'completed';
|
|
125
|
-
}
|
|
126
|
-
|
|
127
80
|
export declare namespace Runs {
|
|
128
|
-
export {
|
|
129
|
-
type Run as Run,
|
|
130
|
-
type RunUpdateResponse as RunUpdateResponse,
|
|
131
|
-
type RunCreateParams as RunCreateParams,
|
|
132
|
-
type RunUpdateParams as RunUpdateParams,
|
|
133
|
-
};
|
|
81
|
+
export { type Run as Run, type RunCreateParams as RunCreateParams };
|
|
134
82
|
}
|
|
@@ -120,11 +120,11 @@ export type TestcasesPaginatedResponse = PaginatedResponse<Testcase>;
|
|
|
120
120
|
|
|
121
121
|
/**
|
|
122
122
|
* A test case in the Scorecard system. Contains JSON data that is validated
|
|
123
|
-
* against the schema defined by its Testset. The `inputs` and `
|
|
124
|
-
* derived from the `data` field based on the Testset's `fieldMapping`, and
|
|
125
|
-
* all mapped fields, including those with validation errors. Testcases are
|
|
126
|
-
* regardless of validation results, with any validation errors included in
|
|
127
|
-
* `validationErrors` field.
|
|
123
|
+
* against the schema defined by its Testset. The `inputs` and `expected` fields
|
|
124
|
+
* are derived from the `data` field based on the Testset's `fieldMapping`, and
|
|
125
|
+
* include all mapped fields, including those with validation errors. Testcases are
|
|
126
|
+
* stored regardless of validation results, with any validation errors included in
|
|
127
|
+
* the `validationErrors` field.
|
|
128
128
|
*/
|
|
129
129
|
export interface Testcase {
|
|
130
130
|
/**
|
|
@@ -132,6 +132,12 @@ export interface Testcase {
|
|
|
132
132
|
*/
|
|
133
133
|
id: string;
|
|
134
134
|
|
|
135
|
+
/**
|
|
136
|
+
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
137
|
+
* marked as expected outputs, including those with validation errors.
|
|
138
|
+
*/
|
|
139
|
+
expected: Record<string, unknown>;
|
|
140
|
+
|
|
135
141
|
/**
|
|
136
142
|
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
137
143
|
* marked as inputs, including those with validation errors.
|
|
@@ -143,12 +149,6 @@ export interface Testcase {
|
|
|
143
149
|
*/
|
|
144
150
|
jsonData: Record<string, unknown>;
|
|
145
151
|
|
|
146
|
-
/**
|
|
147
|
-
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
148
|
-
* marked as labels, including those with validation errors.
|
|
149
|
-
*/
|
|
150
|
-
labels: Record<string, unknown>;
|
|
151
|
-
|
|
152
152
|
/**
|
|
153
153
|
* The ID of the Testset this Testcase belongs to.
|
|
154
154
|
*/
|
|
@@ -194,14 +194,6 @@ export interface TestcaseCreateParams {
|
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
export namespace TestcaseCreateParams {
|
|
197
|
-
/**
|
|
198
|
-
* A test case in the Scorecard system. Contains JSON data that is validated
|
|
199
|
-
* against the schema defined by its Testset. The `inputs` and `labels` fields are
|
|
200
|
-
* derived from the `data` field based on the Testset's `fieldMapping`, and include
|
|
201
|
-
* all mapped fields, including those with validation errors. Testcases are stored
|
|
202
|
-
* regardless of validation results, with any validation errors included in the
|
|
203
|
-
* `validationErrors` field.
|
|
204
|
-
*/
|
|
205
197
|
export interface Item {
|
|
206
198
|
/**
|
|
207
199
|
* The JSON data of the Testcase, which is validated against the Testset's schema.
|
|
@@ -17,7 +17,7 @@ export class Testsets extends APIResource {
|
|
|
17
17
|
* description: 'Testset for long context Q&A chatbot.',
|
|
18
18
|
* fieldMapping: {
|
|
19
19
|
* inputs: ['question'],
|
|
20
|
-
*
|
|
20
|
+
* expected: ['idealAnswer'],
|
|
21
21
|
* metadata: [],
|
|
22
22
|
* },
|
|
23
23
|
* jsonSchema: {
|
|
@@ -119,9 +119,9 @@ export type TestsetsPaginatedResponse = PaginatedResponse<Testset>;
|
|
|
119
119
|
/**
|
|
120
120
|
* A collection of Testcases that share the same schema. Each Testset defines the
|
|
121
121
|
* structure of its Testcases through a JSON schema. The `fieldMapping` object maps
|
|
122
|
-
* top-level keys of the Testcase schema to their roles (input/
|
|
123
|
-
* mentioned in the `fieldMapping` during creation or update are treated
|
|
124
|
-
* metadata.
|
|
122
|
+
* top-level keys of the Testcase schema to their roles (input/expected output).
|
|
123
|
+
* Fields not mentioned in the `fieldMapping` during creation or update are treated
|
|
124
|
+
* as metadata.
|
|
125
125
|
*
|
|
126
126
|
* ## JSON Schema validation constraints supported:
|
|
127
127
|
*
|
|
@@ -154,8 +154,8 @@ export interface Testset {
|
|
|
154
154
|
description: string;
|
|
155
155
|
|
|
156
156
|
/**
|
|
157
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
158
|
-
* Unmapped fields are treated as metadata.
|
|
157
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
158
|
+
* output). Unmapped fields are treated as metadata.
|
|
159
159
|
*/
|
|
160
160
|
fieldMapping: Testset.FieldMapping;
|
|
161
161
|
|
|
@@ -172,22 +172,22 @@ export interface Testset {
|
|
|
172
172
|
|
|
173
173
|
export namespace Testset {
|
|
174
174
|
/**
|
|
175
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
176
|
-
* Unmapped fields are treated as metadata.
|
|
175
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
176
|
+
* output). Unmapped fields are treated as metadata.
|
|
177
177
|
*/
|
|
178
178
|
export interface FieldMapping {
|
|
179
179
|
/**
|
|
180
|
-
* Fields that represent
|
|
180
|
+
* Fields that represent expected outputs.
|
|
181
181
|
*/
|
|
182
|
-
|
|
182
|
+
expected: Array<string>;
|
|
183
183
|
|
|
184
184
|
/**
|
|
185
|
-
* Fields that represent
|
|
185
|
+
* Fields that represent inputs to the AI system.
|
|
186
186
|
*/
|
|
187
|
-
|
|
187
|
+
inputs: Array<string>;
|
|
188
188
|
|
|
189
189
|
/**
|
|
190
|
-
* Fields that are not inputs or
|
|
190
|
+
* Fields that are not inputs or expected outputs.
|
|
191
191
|
*/
|
|
192
192
|
metadata: Array<string>;
|
|
193
193
|
}
|
|
@@ -207,8 +207,8 @@ export interface TestsetCreateParams {
|
|
|
207
207
|
description: string;
|
|
208
208
|
|
|
209
209
|
/**
|
|
210
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
211
|
-
* Unmapped fields are treated as metadata.
|
|
210
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
211
|
+
* output). Unmapped fields are treated as metadata.
|
|
212
212
|
*/
|
|
213
213
|
fieldMapping: TestsetCreateParams.FieldMapping;
|
|
214
214
|
|
|
@@ -225,22 +225,22 @@ export interface TestsetCreateParams {
|
|
|
225
225
|
|
|
226
226
|
export namespace TestsetCreateParams {
|
|
227
227
|
/**
|
|
228
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
229
|
-
* Unmapped fields are treated as metadata.
|
|
228
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
229
|
+
* output). Unmapped fields are treated as metadata.
|
|
230
230
|
*/
|
|
231
231
|
export interface FieldMapping {
|
|
232
232
|
/**
|
|
233
|
-
* Fields that represent
|
|
233
|
+
* Fields that represent expected outputs.
|
|
234
234
|
*/
|
|
235
|
-
|
|
235
|
+
expected: Array<string>;
|
|
236
236
|
|
|
237
237
|
/**
|
|
238
|
-
* Fields that represent
|
|
238
|
+
* Fields that represent inputs to the AI system.
|
|
239
239
|
*/
|
|
240
|
-
|
|
240
|
+
inputs: Array<string>;
|
|
241
241
|
|
|
242
242
|
/**
|
|
243
|
-
* Fields that are not inputs or
|
|
243
|
+
* Fields that are not inputs or expected outputs.
|
|
244
244
|
*/
|
|
245
245
|
metadata: Array<string>;
|
|
246
246
|
}
|
|
@@ -253,8 +253,8 @@ export interface TestsetUpdateParams {
|
|
|
253
253
|
description?: string;
|
|
254
254
|
|
|
255
255
|
/**
|
|
256
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
257
|
-
* Unmapped fields are treated as metadata.
|
|
256
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
257
|
+
* output). Unmapped fields are treated as metadata.
|
|
258
258
|
*/
|
|
259
259
|
fieldMapping?: TestsetUpdateParams.FieldMapping;
|
|
260
260
|
|
|
@@ -271,22 +271,22 @@ export interface TestsetUpdateParams {
|
|
|
271
271
|
|
|
272
272
|
export namespace TestsetUpdateParams {
|
|
273
273
|
/**
|
|
274
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
275
|
-
* Unmapped fields are treated as metadata.
|
|
274
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
275
|
+
* output). Unmapped fields are treated as metadata.
|
|
276
276
|
*/
|
|
277
277
|
export interface FieldMapping {
|
|
278
278
|
/**
|
|
279
|
-
* Fields that represent
|
|
279
|
+
* Fields that represent expected outputs.
|
|
280
280
|
*/
|
|
281
|
-
|
|
281
|
+
expected: Array<string>;
|
|
282
282
|
|
|
283
283
|
/**
|
|
284
|
-
* Fields that represent
|
|
284
|
+
* Fields that represent inputs to the AI system.
|
|
285
285
|
*/
|
|
286
|
-
|
|
286
|
+
inputs: Array<string>;
|
|
287
287
|
|
|
288
288
|
/**
|
|
289
|
-
* Fields that are not inputs or
|
|
289
|
+
* Fields that are not inputs or expected outputs.
|
|
290
290
|
*/
|
|
291
291
|
metadata: Array<string>;
|
|
292
292
|
}
|
package/src/version.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '1.0.0-alpha.
|
|
1
|
+
export const VERSION = '1.0.0-alpha.8'; // x-release-please-version
|
package/version.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.0.0-alpha.
|
|
1
|
+
export declare const VERSION = "1.0.0-alpha.8";
|
|
2
2
|
//# sourceMappingURL=version.d.mts.map
|
package/version.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.0.0-alpha.
|
|
1
|
+
export declare const VERSION = "1.0.0-alpha.8";
|
|
2
2
|
//# sourceMappingURL=version.d.ts.map
|
package/version.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.VERSION = void 0;
|
|
4
|
-
exports.VERSION = '1.0.0-alpha.
|
|
4
|
+
exports.VERSION = '1.0.0-alpha.8'; // x-release-please-version
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
package/version.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const VERSION = '1.0.0-alpha.
|
|
1
|
+
export const VERSION = '1.0.0-alpha.8'; // x-release-please-version
|
|
2
2
|
//# sourceMappingURL=version.mjs.map
|