scorecard-ai 1.0.0-alpha.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +48 -19
- package/client.d.mts +4 -4
- package/client.d.mts.map +1 -1
- package/client.d.ts +4 -4
- package/client.d.ts.map +1 -1
- package/client.js +7 -5
- package/client.js.map +1 -1
- package/client.mjs +7 -5
- package/client.mjs.map +1 -1
- package/internal/tslib.js +6 -6
- package/lib/runAndEvaluate.d.mts +20 -8
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +20 -8
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +31 -21
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +31 -21
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/index.d.mts +2 -2
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +4 -4
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -2
- package/resources/index.mjs.map +1 -1
- package/resources/metrics.d.mts +611 -0
- package/resources/metrics.d.mts.map +1 -0
- package/resources/metrics.d.ts +611 -0
- package/resources/metrics.d.ts.map +1 -0
- package/resources/metrics.js +52 -0
- package/resources/metrics.js.map +1 -0
- package/resources/metrics.mjs +48 -0
- package/resources/metrics.mjs.map +1 -0
- package/resources/runs.d.mts +5 -5
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +5 -5
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -1
- package/resources/runs.mjs +1 -1
- package/resources/systems/index.d.mts +3 -0
- package/resources/systems/index.d.mts.map +1 -0
- package/resources/systems/index.d.ts +3 -0
- package/resources/systems/index.d.ts.map +1 -0
- package/resources/systems/index.js +9 -0
- package/resources/systems/index.js.map +1 -0
- package/resources/systems/index.mjs +4 -0
- package/resources/systems/index.mjs.map +1 -0
- package/resources/systems/systems.d.mts +229 -0
- package/resources/systems/systems.d.mts.map +1 -0
- package/resources/systems/systems.d.ts +229 -0
- package/resources/systems/systems.d.ts.map +1 -0
- package/resources/systems/systems.js +151 -0
- package/resources/systems/systems.js.map +1 -0
- package/resources/systems/systems.mjs +146 -0
- package/resources/systems/systems.mjs.map +1 -0
- package/resources/systems/versions.d.mts +132 -0
- package/resources/systems/versions.d.mts.map +1 -0
- package/resources/systems/versions.d.ts +132 -0
- package/resources/systems/versions.d.ts.map +1 -0
- package/resources/{system-configs.js → systems/versions.js} +26 -25
- package/resources/systems/versions.js.map +1 -0
- package/resources/{system-configs.mjs → systems/versions.mjs} +24 -23
- package/resources/systems/versions.mjs.map +1 -0
- package/resources/systems.d.mts +1 -224
- package/resources/systems.d.mts.map +1 -1
- package/resources/systems.d.ts +1 -224
- package/resources/systems.d.ts.map +1 -1
- package/resources/systems.js +2 -139
- package/resources/systems.js.map +1 -1
- package/resources/systems.mjs +1 -137
- package/resources/systems.mjs.map +1 -1
- package/resources/testsets.d.mts +1 -1
- package/resources/testsets.d.ts +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +22 -27
- package/src/lib/runAndEvaluate.ts +52 -27
- package/src/resources/index.ts +2 -8
- package/src/resources/metrics.ts +768 -0
- package/src/resources/runs.ts +5 -5
- package/src/resources/systems/index.ts +18 -0
- package/src/resources/systems/systems.ts +299 -0
- package/src/resources/systems/versions.ts +166 -0
- package/src/resources/systems.ts +1 -277
- package/src/resources/testsets.ts +1 -1
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.mts.map +1 -1
- package/version.d.ts +1 -1
- package/version.d.ts.map +1 -1
- package/version.js +1 -1
- package/version.js.map +1 -1
- package/version.mjs +1 -1
- package/version.mjs.map +1 -1
- package/resources/system-configs.d.mts +0 -148
- package/resources/system-configs.d.mts.map +0 -1
- package/resources/system-configs.d.ts +0 -148
- package/resources/system-configs.d.ts.map +0 -1
- package/resources/system-configs.js.map +0 -1
- package/resources/system-configs.mjs.map +0 -1
- package/src/resources/system-configs.ts +0 -189
package/resources/systems.js
CHANGED
|
@@ -1,143 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const pagination_1 = require("../core/pagination.js");
|
|
7
|
-
const path_1 = require("../internal/utils/path.js");
|
|
8
|
-
class Systems extends resource_1.APIResource {
|
|
9
|
-
/**
|
|
10
|
-
* Create a new system definition that specifies the interface contracts for a
|
|
11
|
-
* component you want to evaluate.
|
|
12
|
-
*
|
|
13
|
-
* A system acts as a template that defines three key contracts through JSON
|
|
14
|
-
* Schemas:
|
|
15
|
-
*
|
|
16
|
-
* 1. Input Schema: What data your system accepts (e.g., user queries, context
|
|
17
|
-
* documents)
|
|
18
|
-
* 2. Output Schema: What data your system produces (e.g., responses, confidence
|
|
19
|
-
* scores)
|
|
20
|
-
* 3. Config Schema: What parameters can be adjusted (e.g., model selection,
|
|
21
|
-
* temperature)
|
|
22
|
-
*
|
|
23
|
-
* This separation lets you evaluate any system as a black box, focusing on its
|
|
24
|
-
* interface rather than implementation details.
|
|
25
|
-
*
|
|
26
|
-
* @example
|
|
27
|
-
* ```ts
|
|
28
|
-
* const system = await client.systems.create('314', {
|
|
29
|
-
* configSchema: {
|
|
30
|
-
* type: 'object',
|
|
31
|
-
* properties: {
|
|
32
|
-
* temperature: { type: 'number' },
|
|
33
|
-
* maxTokens: { type: 'integer' },
|
|
34
|
-
* model: { type: 'string', enum: ['gpt-4', 'gpt-4-turbo'] },
|
|
35
|
-
* },
|
|
36
|
-
* required: ['model'],
|
|
37
|
-
* },
|
|
38
|
-
* description: 'Production chatbot powered by GPT-4',
|
|
39
|
-
* inputSchema: {
|
|
40
|
-
* type: 'object',
|
|
41
|
-
* properties: {
|
|
42
|
-
* messages: {
|
|
43
|
-
* type: 'array',
|
|
44
|
-
* items: {
|
|
45
|
-
* type: 'object',
|
|
46
|
-
* properties: {
|
|
47
|
-
* role: { type: 'string', enum: ['system', 'user', 'assistant'] },
|
|
48
|
-
* content: { type: 'string' },
|
|
49
|
-
* },
|
|
50
|
-
* required: ['role', 'content'],
|
|
51
|
-
* },
|
|
52
|
-
* },
|
|
53
|
-
* },
|
|
54
|
-
* required: ['messages'],
|
|
55
|
-
* },
|
|
56
|
-
* name: 'GPT-4 Chatbot',
|
|
57
|
-
* outputSchema: {
|
|
58
|
-
* type: 'object',
|
|
59
|
-
* properties: { response: { type: 'string' } },
|
|
60
|
-
* required: ['response'],
|
|
61
|
-
* },
|
|
62
|
-
* });
|
|
63
|
-
* ```
|
|
64
|
-
*/
|
|
65
|
-
create(projectID, body, options) {
|
|
66
|
-
return this._client.post((0, path_1.path) `/projects/${projectID}/systems`, { body, ...options });
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* Update an existing system definition. Only the fields provided in the request
|
|
70
|
-
* body will be updated. If a field is provided, the new content will replace the
|
|
71
|
-
* existing content. If a field is not provided, the existing content will remain
|
|
72
|
-
* unchanged.
|
|
73
|
-
*
|
|
74
|
-
* When updating schemas:
|
|
75
|
-
*
|
|
76
|
-
* - The system will accept your changes regardless of compatibility with existing
|
|
77
|
-
* configurations
|
|
78
|
-
* - Schema updates won't invalidate existing evaluations or configurations
|
|
79
|
-
* - For significant redesigns, creating a new system definition provides a cleaner
|
|
80
|
-
* separation
|
|
81
|
-
*
|
|
82
|
-
* @example
|
|
83
|
-
* ```ts
|
|
84
|
-
* const system = await client.systems.update(
|
|
85
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
86
|
-
* {
|
|
87
|
-
* description:
|
|
88
|
-
* 'Updated production chatbot powered by GPT-4 Turbo',
|
|
89
|
-
* name: 'GPT-4 Turbo Chatbot',
|
|
90
|
-
* },
|
|
91
|
-
* );
|
|
92
|
-
* ```
|
|
93
|
-
*/
|
|
94
|
-
update(systemID, body = {}, options) {
|
|
95
|
-
return this._client.patch((0, path_1.path) `/systems/${systemID}`, { body, ...options });
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Retrieve a paginated list of all systems. Systems are ordered by creation date.
|
|
99
|
-
*
|
|
100
|
-
* @example
|
|
101
|
-
* ```ts
|
|
102
|
-
* // Automatically fetches more pages as needed.
|
|
103
|
-
* for await (const system of client.systems.list('314')) {
|
|
104
|
-
* // ...
|
|
105
|
-
* }
|
|
106
|
-
* ```
|
|
107
|
-
*/
|
|
108
|
-
list(projectID, query = {}, options) {
|
|
109
|
-
return this._client.getAPIList((0, path_1.path) `/projects/${projectID}/systems`, (pagination_1.PaginatedResponse), {
|
|
110
|
-
query,
|
|
111
|
-
...options,
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
/**
|
|
115
|
-
* Delete a system definition by ID. This will not delete associated system
|
|
116
|
-
* configurations.
|
|
117
|
-
*
|
|
118
|
-
* @example
|
|
119
|
-
* ```ts
|
|
120
|
-
* const system = await client.systems.delete(
|
|
121
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
122
|
-
* );
|
|
123
|
-
* ```
|
|
124
|
-
*/
|
|
125
|
-
delete(systemID, options) {
|
|
126
|
-
return this._client.delete((0, path_1.path) `/systems/${systemID}`, options);
|
|
127
|
-
}
|
|
128
|
-
/**
|
|
129
|
-
* Retrieve a specific system by ID.
|
|
130
|
-
*
|
|
131
|
-
* @example
|
|
132
|
-
* ```ts
|
|
133
|
-
* const system = await client.systems.get(
|
|
134
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
135
|
-
* );
|
|
136
|
-
* ```
|
|
137
|
-
*/
|
|
138
|
-
get(systemID, options) {
|
|
139
|
-
return this._client.get((0, path_1.path) `/systems/${systemID}`, options);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
exports.Systems = Systems;
|
|
4
|
+
const tslib_1 = require("../internal/tslib.js");
|
|
5
|
+
tslib_1.__exportStar(require("./systems/index.js"), exports);
|
|
143
6
|
//# sourceMappingURL=systems.js.map
|
package/resources/systems.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"systems.js","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":";AAAA,sFAAsF;;;AAEtF,
|
|
1
|
+
{"version":3,"file":"systems.js","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":";AAAA,sFAAsF;;;AAEtF,6DAAgC"}
|
package/resources/systems.mjs
CHANGED
|
@@ -1,139 +1,3 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
import { PaginatedResponse } from "../core/pagination.mjs";
|
|
4
|
-
import { path } from "../internal/utils/path.mjs";
|
|
5
|
-
export class Systems extends APIResource {
|
|
6
|
-
/**
|
|
7
|
-
* Create a new system definition that specifies the interface contracts for a
|
|
8
|
-
* component you want to evaluate.
|
|
9
|
-
*
|
|
10
|
-
* A system acts as a template that defines three key contracts through JSON
|
|
11
|
-
* Schemas:
|
|
12
|
-
*
|
|
13
|
-
* 1. Input Schema: What data your system accepts (e.g., user queries, context
|
|
14
|
-
* documents)
|
|
15
|
-
* 2. Output Schema: What data your system produces (e.g., responses, confidence
|
|
16
|
-
* scores)
|
|
17
|
-
* 3. Config Schema: What parameters can be adjusted (e.g., model selection,
|
|
18
|
-
* temperature)
|
|
19
|
-
*
|
|
20
|
-
* This separation lets you evaluate any system as a black box, focusing on its
|
|
21
|
-
* interface rather than implementation details.
|
|
22
|
-
*
|
|
23
|
-
* @example
|
|
24
|
-
* ```ts
|
|
25
|
-
* const system = await client.systems.create('314', {
|
|
26
|
-
* configSchema: {
|
|
27
|
-
* type: 'object',
|
|
28
|
-
* properties: {
|
|
29
|
-
* temperature: { type: 'number' },
|
|
30
|
-
* maxTokens: { type: 'integer' },
|
|
31
|
-
* model: { type: 'string', enum: ['gpt-4', 'gpt-4-turbo'] },
|
|
32
|
-
* },
|
|
33
|
-
* required: ['model'],
|
|
34
|
-
* },
|
|
35
|
-
* description: 'Production chatbot powered by GPT-4',
|
|
36
|
-
* inputSchema: {
|
|
37
|
-
* type: 'object',
|
|
38
|
-
* properties: {
|
|
39
|
-
* messages: {
|
|
40
|
-
* type: 'array',
|
|
41
|
-
* items: {
|
|
42
|
-
* type: 'object',
|
|
43
|
-
* properties: {
|
|
44
|
-
* role: { type: 'string', enum: ['system', 'user', 'assistant'] },
|
|
45
|
-
* content: { type: 'string' },
|
|
46
|
-
* },
|
|
47
|
-
* required: ['role', 'content'],
|
|
48
|
-
* },
|
|
49
|
-
* },
|
|
50
|
-
* },
|
|
51
|
-
* required: ['messages'],
|
|
52
|
-
* },
|
|
53
|
-
* name: 'GPT-4 Chatbot',
|
|
54
|
-
* outputSchema: {
|
|
55
|
-
* type: 'object',
|
|
56
|
-
* properties: { response: { type: 'string' } },
|
|
57
|
-
* required: ['response'],
|
|
58
|
-
* },
|
|
59
|
-
* });
|
|
60
|
-
* ```
|
|
61
|
-
*/
|
|
62
|
-
create(projectID, body, options) {
|
|
63
|
-
return this._client.post(path `/projects/${projectID}/systems`, { body, ...options });
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Update an existing system definition. Only the fields provided in the request
|
|
67
|
-
* body will be updated. If a field is provided, the new content will replace the
|
|
68
|
-
* existing content. If a field is not provided, the existing content will remain
|
|
69
|
-
* unchanged.
|
|
70
|
-
*
|
|
71
|
-
* When updating schemas:
|
|
72
|
-
*
|
|
73
|
-
* - The system will accept your changes regardless of compatibility with existing
|
|
74
|
-
* configurations
|
|
75
|
-
* - Schema updates won't invalidate existing evaluations or configurations
|
|
76
|
-
* - For significant redesigns, creating a new system definition provides a cleaner
|
|
77
|
-
* separation
|
|
78
|
-
*
|
|
79
|
-
* @example
|
|
80
|
-
* ```ts
|
|
81
|
-
* const system = await client.systems.update(
|
|
82
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
83
|
-
* {
|
|
84
|
-
* description:
|
|
85
|
-
* 'Updated production chatbot powered by GPT-4 Turbo',
|
|
86
|
-
* name: 'GPT-4 Turbo Chatbot',
|
|
87
|
-
* },
|
|
88
|
-
* );
|
|
89
|
-
* ```
|
|
90
|
-
*/
|
|
91
|
-
update(systemID, body = {}, options) {
|
|
92
|
-
return this._client.patch(path `/systems/${systemID}`, { body, ...options });
|
|
93
|
-
}
|
|
94
|
-
/**
|
|
95
|
-
* Retrieve a paginated list of all systems. Systems are ordered by creation date.
|
|
96
|
-
*
|
|
97
|
-
* @example
|
|
98
|
-
* ```ts
|
|
99
|
-
* // Automatically fetches more pages as needed.
|
|
100
|
-
* for await (const system of client.systems.list('314')) {
|
|
101
|
-
* // ...
|
|
102
|
-
* }
|
|
103
|
-
* ```
|
|
104
|
-
*/
|
|
105
|
-
list(projectID, query = {}, options) {
|
|
106
|
-
return this._client.getAPIList(path `/projects/${projectID}/systems`, (PaginatedResponse), {
|
|
107
|
-
query,
|
|
108
|
-
...options,
|
|
109
|
-
});
|
|
110
|
-
}
|
|
111
|
-
/**
|
|
112
|
-
* Delete a system definition by ID. This will not delete associated system
|
|
113
|
-
* configurations.
|
|
114
|
-
*
|
|
115
|
-
* @example
|
|
116
|
-
* ```ts
|
|
117
|
-
* const system = await client.systems.delete(
|
|
118
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
119
|
-
* );
|
|
120
|
-
* ```
|
|
121
|
-
*/
|
|
122
|
-
delete(systemID, options) {
|
|
123
|
-
return this._client.delete(path `/systems/${systemID}`, options);
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Retrieve a specific system by ID.
|
|
127
|
-
*
|
|
128
|
-
* @example
|
|
129
|
-
* ```ts
|
|
130
|
-
* const system = await client.systems.get(
|
|
131
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
132
|
-
* );
|
|
133
|
-
* ```
|
|
134
|
-
*/
|
|
135
|
-
get(systemID, options) {
|
|
136
|
-
return this._client.get(path `/systems/${systemID}`, options);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
2
|
+
export * from "./systems/index.mjs";
|
|
139
3
|
//# sourceMappingURL=systems.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF
|
|
1
|
+
{"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF"}
|
package/resources/testsets.d.mts
CHANGED
package/resources/testsets.d.ts
CHANGED
package/resources/testsets.js
CHANGED
package/resources/testsets.mjs
CHANGED
package/src/client.ts
CHANGED
|
@@ -22,6 +22,7 @@ import { APIPromise } from './core/api-promise';
|
|
|
22
22
|
import { type Fetch } from './internal/builtin-types';
|
|
23
23
|
import { HeadersLike, NullableHeaders, buildHeaders } from './internal/headers';
|
|
24
24
|
import { FinalRequestOptions, RequestOptions } from './internal/request-options';
|
|
25
|
+
import { Metric, MetricCreateParams, MetricUpdateParams, Metrics } from './resources/metrics';
|
|
25
26
|
import {
|
|
26
27
|
Project,
|
|
27
28
|
ProjectCreateParams,
|
|
@@ -32,22 +33,6 @@ import {
|
|
|
32
33
|
import { Record as RecordsAPIRecord, RecordCreateParams, Records } from './resources/records';
|
|
33
34
|
import { Run, RunCreateParams, Runs } from './resources/runs';
|
|
34
35
|
import { Score, ScoreUpsertParams, Scores } from './resources/scores';
|
|
35
|
-
import {
|
|
36
|
-
SystemConfig,
|
|
37
|
-
SystemConfigCreateParams,
|
|
38
|
-
SystemConfigListParams,
|
|
39
|
-
SystemConfigs,
|
|
40
|
-
SystemConfigsPaginatedResponse,
|
|
41
|
-
} from './resources/system-configs';
|
|
42
|
-
import {
|
|
43
|
-
System,
|
|
44
|
-
SystemCreateParams,
|
|
45
|
-
SystemDeleteResponse,
|
|
46
|
-
SystemListParams,
|
|
47
|
-
SystemUpdateParams,
|
|
48
|
-
Systems,
|
|
49
|
-
SystemsPaginatedResponse,
|
|
50
|
-
} from './resources/systems';
|
|
51
36
|
import {
|
|
52
37
|
Testcase,
|
|
53
38
|
TestcaseCreateParams,
|
|
@@ -71,6 +56,15 @@ import {
|
|
|
71
56
|
import { readEnv } from './internal/utils/env';
|
|
72
57
|
import { formatRequestDetails, loggerFor } from './internal/utils/log';
|
|
73
58
|
import { isEmptyObj } from './internal/utils/values';
|
|
59
|
+
import {
|
|
60
|
+
System,
|
|
61
|
+
SystemCreateParams,
|
|
62
|
+
SystemDeleteResponse,
|
|
63
|
+
SystemListParams,
|
|
64
|
+
SystemUpdateParams,
|
|
65
|
+
Systems,
|
|
66
|
+
SystemsPaginatedResponse,
|
|
67
|
+
} from './resources/systems/systems';
|
|
74
68
|
|
|
75
69
|
const environments = {
|
|
76
70
|
production: 'https://api2.scorecard.io/api/v2',
|
|
@@ -216,6 +210,8 @@ export class Scorecard {
|
|
|
216
210
|
"The SCORECARD_API_KEY environment variable is missing or empty; either provide it, or instantiate the Scorecard client with an apiKey option, like new Scorecard({ apiKey: 'My API Key' }).",
|
|
217
211
|
);
|
|
218
212
|
}
|
|
213
|
+
// Support both API keys (which start with 'ak_') and legacy JWT bearer tokens
|
|
214
|
+
apiKey = !apiKey || apiKey.startsWith('ak_') ? apiKey : `Bearer ${apiKey}`;
|
|
219
215
|
|
|
220
216
|
const options: ClientOptions = {
|
|
221
217
|
apiKey,
|
|
@@ -278,7 +274,7 @@ export class Scorecard {
|
|
|
278
274
|
}
|
|
279
275
|
|
|
280
276
|
protected authHeaders(opts: FinalRequestOptions): NullableHeaders | undefined {
|
|
281
|
-
return buildHeaders([{ Authorization:
|
|
277
|
+
return buildHeaders([{ Authorization: this.apiKey }]);
|
|
282
278
|
}
|
|
283
279
|
|
|
284
280
|
/**
|
|
@@ -801,19 +797,19 @@ export class Scorecard {
|
|
|
801
797
|
testsets: API.Testsets = new API.Testsets(this);
|
|
802
798
|
testcases: API.Testcases = new API.Testcases(this);
|
|
803
799
|
runs: API.Runs = new API.Runs(this);
|
|
800
|
+
metrics: API.Metrics = new API.Metrics(this);
|
|
804
801
|
records: API.Records = new API.Records(this);
|
|
805
802
|
scores: API.Scores = new API.Scores(this);
|
|
806
803
|
systems: API.Systems = new API.Systems(this);
|
|
807
|
-
systemConfigs: API.SystemConfigs = new API.SystemConfigs(this);
|
|
808
804
|
}
|
|
809
805
|
Scorecard.Projects = Projects;
|
|
810
806
|
Scorecard.Testsets = Testsets;
|
|
811
807
|
Scorecard.Testcases = Testcases;
|
|
812
808
|
Scorecard.Runs = Runs;
|
|
809
|
+
Scorecard.Metrics = Metrics;
|
|
813
810
|
Scorecard.Records = Records;
|
|
814
811
|
Scorecard.Scores = Scores;
|
|
815
812
|
Scorecard.Systems = Systems;
|
|
816
|
-
Scorecard.SystemConfigs = SystemConfigs;
|
|
817
813
|
export declare namespace Scorecard {
|
|
818
814
|
export type RequestOptions = Opts.RequestOptions;
|
|
819
815
|
|
|
@@ -855,6 +851,13 @@ export declare namespace Scorecard {
|
|
|
855
851
|
|
|
856
852
|
export { Runs as Runs, type Run as Run, type RunCreateParams as RunCreateParams };
|
|
857
853
|
|
|
854
|
+
export {
|
|
855
|
+
Metrics as Metrics,
|
|
856
|
+
type Metric as Metric,
|
|
857
|
+
type MetricCreateParams as MetricCreateParams,
|
|
858
|
+
type MetricUpdateParams as MetricUpdateParams,
|
|
859
|
+
};
|
|
860
|
+
|
|
858
861
|
export {
|
|
859
862
|
Records as Records,
|
|
860
863
|
type RecordsAPIRecord as Record,
|
|
@@ -873,13 +876,5 @@ export declare namespace Scorecard {
|
|
|
873
876
|
type SystemListParams as SystemListParams,
|
|
874
877
|
};
|
|
875
878
|
|
|
876
|
-
export {
|
|
877
|
-
SystemConfigs as SystemConfigs,
|
|
878
|
-
type SystemConfig as SystemConfig,
|
|
879
|
-
type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
|
|
880
|
-
type SystemConfigCreateParams as SystemConfigCreateParams,
|
|
881
|
-
type SystemConfigListParams as SystemConfigListParams,
|
|
882
|
-
};
|
|
883
|
-
|
|
884
879
|
export type APIError = API.APIError;
|
|
885
880
|
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { Scorecard } from '../client';
|
|
2
|
-
import {
|
|
2
|
+
import { Testcase } from '../resources';
|
|
3
|
+
import { ScorecardError } from '../error';
|
|
4
|
+
import { SystemVersion } from '../resources/systems';
|
|
3
5
|
|
|
4
6
|
type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
|
|
5
7
|
// Project and metrics are always required
|
|
@@ -14,17 +16,17 @@ type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput ex
|
|
|
14
16
|
*/
|
|
15
17
|
metricIds: Array<string>;
|
|
16
18
|
} & (
|
|
17
|
-
| // If
|
|
19
|
+
| // If systemVersionId is provided, the system function receives a system version
|
|
18
20
|
{
|
|
19
21
|
/**
|
|
20
|
-
* The ID of the
|
|
22
|
+
* The ID of the SystemVersion to use for the run.
|
|
21
23
|
*/
|
|
22
|
-
|
|
24
|
+
systemVersionId: string;
|
|
23
25
|
|
|
24
26
|
/**
|
|
25
27
|
* The system function to run on the Testset.
|
|
26
28
|
*/
|
|
27
|
-
system: (testcaseInput: SystemInput,
|
|
29
|
+
system: (testcaseInput: SystemInput, systemVersion: SystemVersion) => Promise<SystemOutput>;
|
|
28
30
|
}
|
|
29
31
|
// Otherwise, the system function receives only the testcase input
|
|
30
32
|
| {
|
|
@@ -68,17 +70,17 @@ async function* testcaseIterator<SystemInput extends Record<string, any>>(
|
|
|
68
70
|
if ('testsetId' in args) {
|
|
69
71
|
for await (const testcase of scorecard.testcases.list(args.testsetId)) {
|
|
70
72
|
yield {
|
|
71
|
-
...testcase,
|
|
72
73
|
testcaseId: testcase.id,
|
|
73
74
|
inputs: testcase.inputs as SystemInput,
|
|
75
|
+
expected: testcase.expected,
|
|
74
76
|
};
|
|
75
77
|
}
|
|
76
78
|
} else {
|
|
77
79
|
for (const testcase of args.testcases) {
|
|
78
80
|
yield {
|
|
79
|
-
...testcase,
|
|
80
81
|
testcaseId: 'id' in testcase ? testcase.id : null,
|
|
81
82
|
inputs: testcase.inputs as SystemInput,
|
|
83
|
+
expected: testcase.expected,
|
|
82
84
|
};
|
|
83
85
|
}
|
|
84
86
|
}
|
|
@@ -92,9 +94,10 @@ async function* testcaseIterator<SystemInput extends Record<string, any>>(
|
|
|
92
94
|
* @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
|
|
93
95
|
* @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
|
|
94
96
|
* @param args.metricIds The IDs of the Metrics to use for evaluation.
|
|
95
|
-
* @param args.
|
|
97
|
+
* @param args.systemVersionId The optional ID of the System Version to associate with the Run.
|
|
96
98
|
* @param args.system The system to run on the Testset.
|
|
97
99
|
* @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
|
|
100
|
+
* @param options.trials The number of times to run the system on each Testcase. 1 by default.
|
|
98
101
|
*/
|
|
99
102
|
export async function runAndEvaluate<
|
|
100
103
|
SystemInput extends Record<string, any>,
|
|
@@ -103,44 +106,66 @@ export async function runAndEvaluate<
|
|
|
103
106
|
scorecard: Scorecard,
|
|
104
107
|
args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
|
|
105
108
|
options: {
|
|
106
|
-
|
|
109
|
+
/**
|
|
110
|
+
* Whether to call `args.system` in parallel. False (sequential) by default.
|
|
111
|
+
*/
|
|
112
|
+
runInParallel?: boolean;
|
|
113
|
+
/**
|
|
114
|
+
* The number of times to run the system on each Testcase. 1 by default.
|
|
115
|
+
*/
|
|
116
|
+
trials?: number;
|
|
107
117
|
} = {
|
|
108
118
|
runInParallel: false,
|
|
119
|
+
trials: 1,
|
|
109
120
|
},
|
|
110
|
-
): Promise<
|
|
111
|
-
|
|
121
|
+
): Promise<{
|
|
122
|
+
/** The ID of the Run. */
|
|
123
|
+
id: string;
|
|
124
|
+
/** The URL of the Run. */
|
|
125
|
+
url: string;
|
|
126
|
+
}> {
|
|
127
|
+
const runInParallel = options.runInParallel ?? false;
|
|
128
|
+
const trials = options.trials ?? 1;
|
|
129
|
+
if (!(Number.isInteger(trials) && trials >= 1)) {
|
|
130
|
+
throw new ScorecardError('trials must be a positive integer');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const hasSystemVersion = 'systemVersionId' in args;
|
|
112
134
|
const hasTestset = 'testsetId' in args;
|
|
113
135
|
|
|
114
136
|
const runPromise = scorecard.runs.create(args.projectId, {
|
|
115
137
|
testsetId: hasTestset ? args.testsetId : null,
|
|
116
138
|
metricIds: args.metricIds,
|
|
117
|
-
...(
|
|
139
|
+
...(hasSystemVersion ?
|
|
118
140
|
{
|
|
119
|
-
|
|
141
|
+
systemVersionId: args.systemVersionId,
|
|
120
142
|
}
|
|
121
143
|
: null),
|
|
122
144
|
});
|
|
123
|
-
const
|
|
145
|
+
const systemVersion = hasSystemVersion ? await scorecard.systems.versions.get(args.systemVersionId) : null;
|
|
124
146
|
const run = await runPromise;
|
|
125
147
|
|
|
126
148
|
const recordPromises: Array<Promise<unknown>> = [];
|
|
127
149
|
|
|
128
150
|
for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
|
|
129
|
-
|
|
151
|
+
for (let i = 0; i < trials; i++) {
|
|
152
|
+
const modelResponsePromise =
|
|
153
|
+
hasSystemVersion ? args.system(inputs, systemVersion!) : args.system(inputs);
|
|
130
154
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
155
|
+
function createRecord(outputs: SystemOutput): Promise<unknown> {
|
|
156
|
+
return scorecard.records.create(run.id, {
|
|
157
|
+
inputs,
|
|
158
|
+
expected,
|
|
159
|
+
outputs,
|
|
160
|
+
...(testcaseId != null ? { testcaseId } : null),
|
|
161
|
+
});
|
|
162
|
+
}
|
|
139
163
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
164
|
+
if (runInParallel) {
|
|
165
|
+
recordPromises.push(modelResponsePromise.then(createRecord));
|
|
166
|
+
} else {
|
|
167
|
+
recordPromises.push(createRecord(await modelResponsePromise));
|
|
168
|
+
}
|
|
144
169
|
}
|
|
145
170
|
}
|
|
146
171
|
// Wait until all the Records are created
|
package/src/resources/index.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
3
|
export * from './shared';
|
|
4
|
+
export { Metrics, type Metric, type MetricCreateParams, type MetricUpdateParams } from './metrics';
|
|
4
5
|
export {
|
|
5
6
|
Projects,
|
|
6
7
|
type Project,
|
|
@@ -11,13 +12,6 @@ export {
|
|
|
11
12
|
export { Records, type Record, type RecordCreateParams } from './records';
|
|
12
13
|
export { Runs, type Run, type RunCreateParams } from './runs';
|
|
13
14
|
export { Scores, type Score, type ScoreUpsertParams } from './scores';
|
|
14
|
-
export {
|
|
15
|
-
SystemConfigs,
|
|
16
|
-
type SystemConfig,
|
|
17
|
-
type SystemConfigCreateParams,
|
|
18
|
-
type SystemConfigListParams,
|
|
19
|
-
type SystemConfigsPaginatedResponse,
|
|
20
|
-
} from './system-configs';
|
|
21
15
|
export {
|
|
22
16
|
Systems,
|
|
23
17
|
type System,
|
|
@@ -26,7 +20,7 @@ export {
|
|
|
26
20
|
type SystemUpdateParams,
|
|
27
21
|
type SystemListParams,
|
|
28
22
|
type SystemsPaginatedResponse,
|
|
29
|
-
} from './systems';
|
|
23
|
+
} from './systems/systems';
|
|
30
24
|
export {
|
|
31
25
|
Testcases,
|
|
32
26
|
type Testcase,
|