scorecard-ai 1.0.0-alpha.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +25 -37
- package/client.d.mts +5 -4
- package/client.d.mts.map +1 -1
- package/client.d.ts +5 -4
- package/client.d.ts.map +1 -1
- package/client.js +19 -4
- package/client.js.map +1 -1
- package/client.mjs +19 -4
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +1 -1
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +1 -1
- package/core/pagination.d.ts.map +1 -1
- package/index.d.mts +1 -0
- package/index.d.mts.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +3 -1
- package/index.js.map +1 -1
- package/index.mjs +1 -0
- package/index.mjs.map +1 -1
- package/internal/detect-platform.js +3 -3
- package/internal/detect-platform.js.map +1 -1
- package/internal/detect-platform.mjs +3 -3
- package/internal/detect-platform.mjs.map +1 -1
- package/internal/shim-types.d.mts +11 -22
- package/internal/shim-types.d.mts.map +1 -0
- package/internal/shim-types.d.ts +11 -22
- package/internal/shim-types.d.ts.map +1 -0
- package/internal/shim-types.js +4 -0
- package/internal/shim-types.js.map +1 -0
- package/internal/shim-types.mjs +3 -0
- package/internal/shim-types.mjs.map +1 -0
- package/internal/shims.d.mts +2 -2
- package/internal/shims.d.mts.map +1 -1
- package/internal/shims.d.ts +2 -2
- package/internal/shims.d.ts.map +1 -1
- package/internal/tslib.js +6 -6
- package/internal/uploads.js.map +1 -1
- package/internal/uploads.mjs.map +1 -1
- package/lib/runAndEvaluate.d.mts +62 -10
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +62 -10
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +72 -19
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +72 -19
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -4
- package/resources/index.d.mts +2 -2
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +4 -4
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -2
- package/resources/index.mjs.map +1 -1
- package/resources/metrics.d.mts +421 -0
- package/resources/metrics.d.mts.map +1 -0
- package/resources/metrics.d.ts +421 -0
- package/resources/metrics.d.ts.map +1 -0
- package/resources/metrics.js +33 -0
- package/resources/metrics.js.map +1 -0
- package/resources/metrics.mjs +29 -0
- package/resources/metrics.mjs.map +1 -0
- package/resources/runs.d.mts +8 -8
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +8 -8
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -1
- package/resources/runs.mjs +1 -1
- package/resources/systems/index.d.mts +3 -0
- package/resources/systems/index.d.mts.map +1 -0
- package/resources/systems/index.d.ts +3 -0
- package/resources/systems/index.d.ts.map +1 -0
- package/resources/systems/index.js +9 -0
- package/resources/systems/index.js.map +1 -0
- package/resources/systems/index.mjs +4 -0
- package/resources/systems/index.mjs.map +1 -0
- package/resources/systems/systems.d.mts +229 -0
- package/resources/systems/systems.d.mts.map +1 -0
- package/resources/systems/systems.d.ts +229 -0
- package/resources/systems/systems.d.ts.map +1 -0
- package/resources/systems/systems.js +151 -0
- package/resources/systems/systems.js.map +1 -0
- package/resources/systems/systems.mjs +146 -0
- package/resources/systems/systems.mjs.map +1 -0
- package/resources/systems/versions.d.mts +132 -0
- package/resources/systems/versions.d.mts.map +1 -0
- package/resources/systems/versions.d.ts +132 -0
- package/resources/systems/versions.d.ts.map +1 -0
- package/resources/systems/versions.js +82 -0
- package/resources/systems/versions.js.map +1 -0
- package/resources/systems/versions.mjs +78 -0
- package/resources/systems/versions.mjs.map +1 -0
- package/resources/systems.d.mts +1 -224
- package/resources/systems.d.mts.map +1 -1
- package/resources/systems.d.ts +1 -224
- package/resources/systems.d.ts.map +1 -1
- package/resources/systems.js +2 -139
- package/resources/systems.js.map +1 -1
- package/resources/systems.mjs +1 -137
- package/resources/systems.mjs.map +1 -1
- package/resources/testsets.d.mts +1 -1
- package/resources/testsets.d.ts +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +28 -28
- package/src/core/pagination.ts +1 -1
- package/src/index.ts +2 -0
- package/src/internal/detect-platform.ts +3 -3
- package/src/internal/shim-types.ts +26 -0
- package/src/internal/shims.ts +2 -2
- package/src/internal/uploads.ts +1 -1
- package/src/lib/runAndEvaluate.ts +159 -31
- package/src/resources/index.ts +2 -9
- package/src/resources/metrics.ts +525 -0
- package/src/resources/runs.ts +8 -8
- package/src/resources/systems/index.ts +18 -0
- package/src/resources/systems/systems.ts +299 -0
- package/src/resources/systems/versions.ts +166 -0
- package/src/resources/systems.ts +1 -277
- package/src/resources/testsets.ts +1 -1
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.mts.map +1 -1
- package/version.d.ts +1 -1
- package/version.d.ts.map +1 -1
- package/version.js +1 -1
- package/version.js.map +1 -1
- package/version.mjs +1 -1
- package/version.mjs.map +1 -1
- package/resources/system-configs.d.mts +0 -155
- package/resources/system-configs.d.mts.map +0 -1
- package/resources/system-configs.d.ts +0 -155
- package/resources/system-configs.d.ts.map +0 -1
- package/resources/system-configs.js +0 -83
- package/resources/system-configs.js.map +0 -1
- package/resources/system-configs.mjs +0 -79
- package/resources/system-configs.mjs.map +0 -1
- package/src/internal/shim-types.d.ts +0 -28
- package/src/resources/system-configs.ts +0 -203
package/resources/systems.mjs
CHANGED
|
@@ -1,139 +1,3 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
import { PaginatedResponse } from "../core/pagination.mjs";
|
|
4
|
-
import { path } from "../internal/utils/path.mjs";
|
|
5
|
-
export class Systems extends APIResource {
|
|
6
|
-
/**
|
|
7
|
-
* Create a new system definition that specifies the interface contracts for a
|
|
8
|
-
* component you want to evaluate.
|
|
9
|
-
*
|
|
10
|
-
* A system acts as a template that defines three key contracts through JSON
|
|
11
|
-
* Schemas:
|
|
12
|
-
*
|
|
13
|
-
* 1. Input Schema: What data your system accepts (e.g., user queries, context
|
|
14
|
-
* documents)
|
|
15
|
-
* 2. Output Schema: What data your system produces (e.g., responses, confidence
|
|
16
|
-
* scores)
|
|
17
|
-
* 3. Config Schema: What parameters can be adjusted (e.g., model selection,
|
|
18
|
-
* temperature)
|
|
19
|
-
*
|
|
20
|
-
* This separation lets you evaluate any system as a black box, focusing on its
|
|
21
|
-
* interface rather than implementation details.
|
|
22
|
-
*
|
|
23
|
-
* @example
|
|
24
|
-
* ```ts
|
|
25
|
-
* const system = await client.systems.create('314', {
|
|
26
|
-
* configSchema: {
|
|
27
|
-
* type: 'object',
|
|
28
|
-
* properties: {
|
|
29
|
-
* temperature: { type: 'number' },
|
|
30
|
-
* maxTokens: { type: 'integer' },
|
|
31
|
-
* model: { type: 'string', enum: ['gpt-4', 'gpt-4-turbo'] },
|
|
32
|
-
* },
|
|
33
|
-
* required: ['model'],
|
|
34
|
-
* },
|
|
35
|
-
* description: 'Production chatbot powered by GPT-4',
|
|
36
|
-
* inputSchema: {
|
|
37
|
-
* type: 'object',
|
|
38
|
-
* properties: {
|
|
39
|
-
* messages: {
|
|
40
|
-
* type: 'array',
|
|
41
|
-
* items: {
|
|
42
|
-
* type: 'object',
|
|
43
|
-
* properties: {
|
|
44
|
-
* role: { type: 'string', enum: ['system', 'user', 'assistant'] },
|
|
45
|
-
* content: { type: 'string' },
|
|
46
|
-
* },
|
|
47
|
-
* required: ['role', 'content'],
|
|
48
|
-
* },
|
|
49
|
-
* },
|
|
50
|
-
* },
|
|
51
|
-
* required: ['messages'],
|
|
52
|
-
* },
|
|
53
|
-
* name: 'GPT-4 Chatbot',
|
|
54
|
-
* outputSchema: {
|
|
55
|
-
* type: 'object',
|
|
56
|
-
* properties: { response: { type: 'string' } },
|
|
57
|
-
* required: ['response'],
|
|
58
|
-
* },
|
|
59
|
-
* });
|
|
60
|
-
* ```
|
|
61
|
-
*/
|
|
62
|
-
create(projectID, body, options) {
|
|
63
|
-
return this._client.post(path `/projects/${projectID}/systems`, { body, ...options });
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Update an existing system definition. Only the fields provided in the request
|
|
67
|
-
* body will be updated. If a field is provided, the new content will replace the
|
|
68
|
-
* existing content. If a field is not provided, the existing content will remain
|
|
69
|
-
* unchanged.
|
|
70
|
-
*
|
|
71
|
-
* When updating schemas:
|
|
72
|
-
*
|
|
73
|
-
* - The system will accept your changes regardless of compatibility with existing
|
|
74
|
-
* configurations
|
|
75
|
-
* - Schema updates won't invalidate existing evaluations or configurations
|
|
76
|
-
* - For significant redesigns, creating a new system definition provides a cleaner
|
|
77
|
-
* separation
|
|
78
|
-
*
|
|
79
|
-
* @example
|
|
80
|
-
* ```ts
|
|
81
|
-
* const system = await client.systems.update(
|
|
82
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
83
|
-
* {
|
|
84
|
-
* description:
|
|
85
|
-
* 'Updated production chatbot powered by GPT-4 Turbo',
|
|
86
|
-
* name: 'GPT-4 Turbo Chatbot',
|
|
87
|
-
* },
|
|
88
|
-
* );
|
|
89
|
-
* ```
|
|
90
|
-
*/
|
|
91
|
-
update(systemID, body = {}, options) {
|
|
92
|
-
return this._client.patch(path `/systems/${systemID}`, { body, ...options });
|
|
93
|
-
}
|
|
94
|
-
/**
|
|
95
|
-
* Retrieve a paginated list of all systems. Systems are ordered by creation date.
|
|
96
|
-
*
|
|
97
|
-
* @example
|
|
98
|
-
* ```ts
|
|
99
|
-
* // Automatically fetches more pages as needed.
|
|
100
|
-
* for await (const system of client.systems.list('314')) {
|
|
101
|
-
* // ...
|
|
102
|
-
* }
|
|
103
|
-
* ```
|
|
104
|
-
*/
|
|
105
|
-
list(projectID, query = {}, options) {
|
|
106
|
-
return this._client.getAPIList(path `/projects/${projectID}/systems`, (PaginatedResponse), {
|
|
107
|
-
query,
|
|
108
|
-
...options,
|
|
109
|
-
});
|
|
110
|
-
}
|
|
111
|
-
/**
|
|
112
|
-
* Delete a system definition by ID. This will not delete associated system
|
|
113
|
-
* configurations.
|
|
114
|
-
*
|
|
115
|
-
* @example
|
|
116
|
-
* ```ts
|
|
117
|
-
* const system = await client.systems.delete(
|
|
118
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
119
|
-
* );
|
|
120
|
-
* ```
|
|
121
|
-
*/
|
|
122
|
-
delete(systemID, options) {
|
|
123
|
-
return this._client.delete(path `/systems/${systemID}`, options);
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Retrieve a specific system by ID.
|
|
127
|
-
*
|
|
128
|
-
* @example
|
|
129
|
-
* ```ts
|
|
130
|
-
* const system = await client.systems.get(
|
|
131
|
-
* '12345678-0a8b-4f66-b6f3-2ddcfa097257',
|
|
132
|
-
* );
|
|
133
|
-
* ```
|
|
134
|
-
*/
|
|
135
|
-
get(systemID, options) {
|
|
136
|
-
return this._client.get(path `/systems/${systemID}`, options);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
2
|
+
export * from "./systems/index.mjs";
|
|
139
3
|
//# sourceMappingURL=systems.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF
|
|
1
|
+
{"version":3,"file":"systems.mjs","sourceRoot":"","sources":["../src/resources/systems.ts"],"names":[],"mappings":"AAAA,sFAAsF"}
|
package/resources/testsets.d.mts
CHANGED
package/resources/testsets.d.ts
CHANGED
package/resources/testsets.js
CHANGED
package/resources/testsets.mjs
CHANGED
package/src/client.ts
CHANGED
|
@@ -22,6 +22,7 @@ import { APIPromise } from './core/api-promise';
|
|
|
22
22
|
import { type Fetch } from './internal/builtin-types';
|
|
23
23
|
import { HeadersLike, NullableHeaders, buildHeaders } from './internal/headers';
|
|
24
24
|
import { FinalRequestOptions, RequestOptions } from './internal/request-options';
|
|
25
|
+
import { Metric, MetricCreateParams, Metrics } from './resources/metrics';
|
|
25
26
|
import {
|
|
26
27
|
Project,
|
|
27
28
|
ProjectCreateParams,
|
|
@@ -32,23 +33,6 @@ import {
|
|
|
32
33
|
import { Record as RecordsAPIRecord, RecordCreateParams, Records } from './resources/records';
|
|
33
34
|
import { Run, RunCreateParams, Runs } from './resources/runs';
|
|
34
35
|
import { Score, ScoreUpsertParams, Scores } from './resources/scores';
|
|
35
|
-
import {
|
|
36
|
-
SystemConfig,
|
|
37
|
-
SystemConfigCreateParams,
|
|
38
|
-
SystemConfigGetParams,
|
|
39
|
-
SystemConfigListParams,
|
|
40
|
-
SystemConfigs,
|
|
41
|
-
SystemConfigsPaginatedResponse,
|
|
42
|
-
} from './resources/system-configs';
|
|
43
|
-
import {
|
|
44
|
-
System,
|
|
45
|
-
SystemCreateParams,
|
|
46
|
-
SystemDeleteResponse,
|
|
47
|
-
SystemListParams,
|
|
48
|
-
SystemUpdateParams,
|
|
49
|
-
Systems,
|
|
50
|
-
SystemsPaginatedResponse,
|
|
51
|
-
} from './resources/systems';
|
|
52
36
|
import {
|
|
53
37
|
Testcase,
|
|
54
38
|
TestcaseCreateParams,
|
|
@@ -72,6 +56,15 @@ import {
|
|
|
72
56
|
import { readEnv } from './internal/utils/env';
|
|
73
57
|
import { formatRequestDetails, loggerFor } from './internal/utils/log';
|
|
74
58
|
import { isEmptyObj } from './internal/utils/values';
|
|
59
|
+
import {
|
|
60
|
+
System,
|
|
61
|
+
SystemCreateParams,
|
|
62
|
+
SystemDeleteResponse,
|
|
63
|
+
SystemListParams,
|
|
64
|
+
SystemUpdateParams,
|
|
65
|
+
Systems,
|
|
66
|
+
SystemsPaginatedResponse,
|
|
67
|
+
} from './resources/systems/systems';
|
|
75
68
|
|
|
76
69
|
const environments = {
|
|
77
70
|
production: 'https://api2.scorecard.io/api/v2',
|
|
@@ -80,6 +73,18 @@ const environments = {
|
|
|
80
73
|
};
|
|
81
74
|
type Environment = keyof typeof environments;
|
|
82
75
|
|
|
76
|
+
function baseApiUrlToBaseAppUrl(baseApiUrl: string): string {
|
|
77
|
+
if (baseApiUrl === environments.production) {
|
|
78
|
+
return 'https://app.scorecard.io';
|
|
79
|
+
} else if (baseApiUrl === environments.staging) {
|
|
80
|
+
return 'https://staging.app.getscorecard.ai';
|
|
81
|
+
} else if (baseApiUrl === environments.local) {
|
|
82
|
+
return 'http://localhost:3002';
|
|
83
|
+
} else {
|
|
84
|
+
return 'https://staging.app.getscorecard.ai';
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
83
88
|
export interface ClientOptions {
|
|
84
89
|
/**
|
|
85
90
|
* Defaults to process.env['SCORECARD_API_KEY'].
|
|
@@ -170,6 +175,7 @@ export class Scorecard {
|
|
|
170
175
|
apiKey: string;
|
|
171
176
|
|
|
172
177
|
baseURL: string;
|
|
178
|
+
baseAppURL: string;
|
|
173
179
|
maxRetries: number;
|
|
174
180
|
timeout: number;
|
|
175
181
|
logger: Logger | undefined;
|
|
@@ -219,6 +225,7 @@ export class Scorecard {
|
|
|
219
225
|
}
|
|
220
226
|
|
|
221
227
|
this.baseURL = options.baseURL || environments[options.environment || 'production'];
|
|
228
|
+
this.baseAppURL = baseApiUrlToBaseAppUrl(this.baseURL);
|
|
222
229
|
this.timeout = options.timeout ?? Scorecard.DEFAULT_TIMEOUT /* 1 minute */;
|
|
223
230
|
this.logger = options.logger ?? console;
|
|
224
231
|
const defaultLogLevel = 'warn';
|
|
@@ -788,19 +795,19 @@ export class Scorecard {
|
|
|
788
795
|
testsets: API.Testsets = new API.Testsets(this);
|
|
789
796
|
testcases: API.Testcases = new API.Testcases(this);
|
|
790
797
|
runs: API.Runs = new API.Runs(this);
|
|
798
|
+
metrics: API.Metrics = new API.Metrics(this);
|
|
791
799
|
records: API.Records = new API.Records(this);
|
|
792
800
|
scores: API.Scores = new API.Scores(this);
|
|
793
801
|
systems: API.Systems = new API.Systems(this);
|
|
794
|
-
systemConfigs: API.SystemConfigs = new API.SystemConfigs(this);
|
|
795
802
|
}
|
|
796
803
|
Scorecard.Projects = Projects;
|
|
797
804
|
Scorecard.Testsets = Testsets;
|
|
798
805
|
Scorecard.Testcases = Testcases;
|
|
799
806
|
Scorecard.Runs = Runs;
|
|
807
|
+
Scorecard.Metrics = Metrics;
|
|
800
808
|
Scorecard.Records = Records;
|
|
801
809
|
Scorecard.Scores = Scores;
|
|
802
810
|
Scorecard.Systems = Systems;
|
|
803
|
-
Scorecard.SystemConfigs = SystemConfigs;
|
|
804
811
|
export declare namespace Scorecard {
|
|
805
812
|
export type RequestOptions = Opts.RequestOptions;
|
|
806
813
|
|
|
@@ -842,6 +849,8 @@ export declare namespace Scorecard {
|
|
|
842
849
|
|
|
843
850
|
export { Runs as Runs, type Run as Run, type RunCreateParams as RunCreateParams };
|
|
844
851
|
|
|
852
|
+
export { Metrics as Metrics, type Metric as Metric, type MetricCreateParams as MetricCreateParams };
|
|
853
|
+
|
|
845
854
|
export {
|
|
846
855
|
Records as Records,
|
|
847
856
|
type RecordsAPIRecord as Record,
|
|
@@ -860,14 +869,5 @@ export declare namespace Scorecard {
|
|
|
860
869
|
type SystemListParams as SystemListParams,
|
|
861
870
|
};
|
|
862
871
|
|
|
863
|
-
export {
|
|
864
|
-
SystemConfigs as SystemConfigs,
|
|
865
|
-
type SystemConfig as SystemConfig,
|
|
866
|
-
type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
|
|
867
|
-
type SystemConfigCreateParams as SystemConfigCreateParams,
|
|
868
|
-
type SystemConfigListParams as SystemConfigListParams,
|
|
869
|
-
type SystemConfigGetParams as SystemConfigGetParams,
|
|
870
|
-
};
|
|
871
|
-
|
|
872
872
|
export type APIError = API.APIError;
|
|
873
873
|
}
|
package/src/core/pagination.ts
CHANGED
package/src/index.ts
CHANGED
|
@@ -85,10 +85,10 @@ const getPlatformProperties = (): PlatformProperties => {
|
|
|
85
85
|
return {
|
|
86
86
|
'X-Stainless-Lang': 'js',
|
|
87
87
|
'X-Stainless-Package-Version': VERSION,
|
|
88
|
-
'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform),
|
|
89
|
-
'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch),
|
|
88
|
+
'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform ?? 'unknown'),
|
|
89
|
+
'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch ?? 'unknown'),
|
|
90
90
|
'X-Stainless-Runtime': 'node',
|
|
91
|
-
'X-Stainless-Runtime-Version': (globalThis as any).process.version,
|
|
91
|
+
'X-Stainless-Runtime-Version': (globalThis as any).process.version ?? 'unknown',
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Shims for types that we can't always rely on being available globally.
|
|
5
|
+
*
|
|
6
|
+
* Note: these only exist at the type-level, there is no corresponding runtime
|
|
7
|
+
* version for any of these symbols.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
type NeverToAny<T> = T extends never ? any : T;
|
|
11
|
+
|
|
12
|
+
/** @ts-ignore */
|
|
13
|
+
type _DOMReadableStream<R = any> = globalThis.ReadableStream<R>;
|
|
14
|
+
|
|
15
|
+
/** @ts-ignore */
|
|
16
|
+
type _NodeReadableStream<R = any> = import('stream/web').ReadableStream<R>;
|
|
17
|
+
|
|
18
|
+
type _ConditionalNodeReadableStream<R = any> =
|
|
19
|
+
typeof globalThis extends { ReadableStream: any } ? never : _NodeReadableStream<R>;
|
|
20
|
+
|
|
21
|
+
type _ReadableStream<R = any> = NeverToAny<
|
|
22
|
+
| ([0] extends [1 & _DOMReadableStream<R>] ? never : _DOMReadableStream<R>)
|
|
23
|
+
| ([0] extends [1 & _ConditionalNodeReadableStream<R>] ? never : _ConditionalNodeReadableStream<R>)
|
|
24
|
+
>;
|
|
25
|
+
|
|
26
|
+
export type { _ReadableStream as ReadableStream };
|
package/src/internal/shims.ts
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
* messages in cases where an environment isn't fully supported.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import type { Fetch } from './builtin-types';
|
|
11
|
+
import type { ReadableStream } from './shim-types';
|
|
12
12
|
|
|
13
13
|
export function getDefaultFetch(): Fetch {
|
|
14
14
|
if (typeof fetch !== 'undefined') {
|
package/src/internal/uploads.ts
CHANGED
|
@@ -138,7 +138,7 @@ export const createForm = async <T = Record<string, unknown>>(
|
|
|
138
138
|
|
|
139
139
|
// We check for Blob not File because Bun.File doesn't inherit from File,
|
|
140
140
|
// but they both inherit from Blob and have a `name` property at runtime.
|
|
141
|
-
const isNamedBlob = (value:
|
|
141
|
+
const isNamedBlob = (value: unknown) => value instanceof Blob && 'name' in value;
|
|
142
142
|
|
|
143
143
|
const isUploadable = (value: unknown) =>
|
|
144
144
|
typeof value === 'object' &&
|
|
@@ -1,49 +1,177 @@
|
|
|
1
1
|
import { Scorecard } from '../client';
|
|
2
|
+
import { Testcase } from '../resources';
|
|
3
|
+
import { ScorecardError } from '../error';
|
|
4
|
+
import { SystemVersion } from '../resources/systems';
|
|
5
|
+
|
|
6
|
+
type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
|
|
7
|
+
// Project and metrics are always required
|
|
8
|
+
{
|
|
9
|
+
/**
|
|
10
|
+
* The ID of the Project to run the system on.
|
|
11
|
+
*/
|
|
12
|
+
projectId: string;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* The IDs of the Metrics to use for evaluation.
|
|
16
|
+
*/
|
|
17
|
+
metricIds: Array<string>;
|
|
18
|
+
} & (
|
|
19
|
+
| // If systemVersionId is provided, the system function receives a system version
|
|
20
|
+
{
|
|
21
|
+
/**
|
|
22
|
+
* The ID of the SystemVersion to use for the run.
|
|
23
|
+
*/
|
|
24
|
+
systemVersionId: string;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* The system function to run on the Testset.
|
|
28
|
+
*/
|
|
29
|
+
system: (testcaseInput: SystemInput, systemVersion: SystemVersion) => Promise<SystemOutput>;
|
|
30
|
+
}
|
|
31
|
+
// Otherwise, the system function receives only the testcase input
|
|
32
|
+
| {
|
|
33
|
+
/**
|
|
34
|
+
* The system function to run on the Testset.
|
|
35
|
+
*/
|
|
36
|
+
system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
|
|
37
|
+
}
|
|
38
|
+
) &
|
|
39
|
+
// If testset is not provided, you must pass in all the testcases manually
|
|
40
|
+
(| {
|
|
41
|
+
/**
|
|
42
|
+
* The ID of the Scorecard Testset to run the system on.
|
|
43
|
+
*/
|
|
44
|
+
testsetId: string;
|
|
45
|
+
}
|
|
46
|
+
| {
|
|
47
|
+
/**
|
|
48
|
+
* The list of test cases to run the system on. Can be a list of Scorecard Testcases or a list of inputs and expected outputs.
|
|
49
|
+
*/
|
|
50
|
+
testcases:
|
|
51
|
+
| Array<{
|
|
52
|
+
inputs: SystemInput;
|
|
53
|
+
expected: Record<string, unknown>;
|
|
54
|
+
}>
|
|
55
|
+
| Array<Testcase>;
|
|
56
|
+
}
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Returns an async generator over the given Testset or Testcases.
|
|
61
|
+
*/
|
|
62
|
+
async function* testcaseIterator<SystemInput extends Record<string, any>>(
|
|
63
|
+
scorecard: Scorecard,
|
|
64
|
+
args: RunAndEvaluateArgs<SystemInput, any>,
|
|
65
|
+
): AsyncGenerator<{
|
|
66
|
+
testcaseId: string | null;
|
|
67
|
+
inputs: SystemInput;
|
|
68
|
+
expected: Record<string, unknown>;
|
|
69
|
+
}> {
|
|
70
|
+
if ('testsetId' in args) {
|
|
71
|
+
for await (const testcase of scorecard.testcases.list(args.testsetId)) {
|
|
72
|
+
yield {
|
|
73
|
+
testcaseId: testcase.id,
|
|
74
|
+
inputs: testcase.inputs as SystemInput,
|
|
75
|
+
expected: testcase.expected,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
} else {
|
|
79
|
+
for (const testcase of args.testcases) {
|
|
80
|
+
yield {
|
|
81
|
+
testcaseId: 'id' in testcase ? testcase.id : null,
|
|
82
|
+
inputs: testcase.inputs as SystemInput,
|
|
83
|
+
expected: testcase.expected,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
2
88
|
|
|
3
89
|
/**
|
|
4
90
|
* Runs a system on a Testset and records the results in Scorecard.
|
|
5
91
|
*
|
|
6
92
|
* @param scorecard The Scorecard client
|
|
7
|
-
* @param projectId The ID of the Project to run the system on.
|
|
8
|
-
* @param testsetId The ID of the Testset to run the system on.
|
|
9
|
-
* @param
|
|
10
|
-
* @param
|
|
93
|
+
* @param args.projectId The ID of the Project to run the system on.
|
|
94
|
+
* @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
|
|
95
|
+
* @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
|
|
96
|
+
* @param args.metricIds The IDs of the Metrics to use for evaluation.
|
|
97
|
+
* @param args.systemVersionId The optional ID of the System Version to associate with the Run.
|
|
98
|
+
* @param args.system The system to run on the Testset.
|
|
99
|
+
* @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
|
|
100
|
+
* @param options.trials The number of times to run the system on each Testcase. 1 by default.
|
|
11
101
|
*/
|
|
12
|
-
export async function runAndEvaluate<
|
|
102
|
+
export async function runAndEvaluate<
|
|
103
|
+
SystemInput extends Record<string, any>,
|
|
104
|
+
SystemOutput extends Record<string, any>,
|
|
105
|
+
>(
|
|
13
106
|
scorecard: Scorecard,
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
107
|
+
args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
|
|
108
|
+
options: {
|
|
109
|
+
/**
|
|
110
|
+
* Whether to call `args.system` in parallel. False (sequential) by default.
|
|
111
|
+
*/
|
|
112
|
+
runInParallel?: boolean;
|
|
113
|
+
/**
|
|
114
|
+
* The number of times to run the system on each Testcase. 1 by default.
|
|
115
|
+
*/
|
|
116
|
+
trials?: number;
|
|
117
|
+
} = {
|
|
118
|
+
runInParallel: false,
|
|
119
|
+
trials: 1,
|
|
24
120
|
},
|
|
25
|
-
): Promise<
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
121
|
+
): Promise<{
|
|
122
|
+
/** The ID of the Run. */
|
|
123
|
+
id: string;
|
|
124
|
+
/** The URL of the Run. */
|
|
125
|
+
url: string;
|
|
126
|
+
}> {
|
|
127
|
+
const runInParallel = options.runInParallel ?? false;
|
|
128
|
+
const trials = options.trials ?? 1;
|
|
129
|
+
if (!(Number.isInteger(trials) && trials >= 1)) {
|
|
130
|
+
throw new ScorecardError('trials must be a positive integer');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const hasSystemVersion = 'systemVersionId' in args;
|
|
134
|
+
const hasTestset = 'testsetId' in args;
|
|
135
|
+
|
|
136
|
+
const runPromise = scorecard.runs.create(args.projectId, {
|
|
137
|
+
testsetId: hasTestset ? args.testsetId : null,
|
|
138
|
+
metricIds: args.metricIds,
|
|
139
|
+
...(hasSystemVersion ?
|
|
140
|
+
{
|
|
141
|
+
systemVersionId: args.systemVersionId,
|
|
142
|
+
}
|
|
143
|
+
: null),
|
|
29
144
|
});
|
|
145
|
+
const systemVersion = hasSystemVersion ? await scorecard.systems.versions.get(args.systemVersionId) : null;
|
|
146
|
+
const run = await runPromise;
|
|
147
|
+
|
|
148
|
+
const recordPromises: Array<Promise<unknown>> = [];
|
|
149
|
+
|
|
150
|
+
for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
|
|
151
|
+
for (let i = 0; i < trials; i++) {
|
|
152
|
+
const modelResponsePromise =
|
|
153
|
+
hasSystemVersion ? args.system(inputs, systemVersion!) : args.system(inputs);
|
|
154
|
+
|
|
155
|
+
function createRecord(outputs: SystemOutput): Promise<unknown> {
|
|
156
|
+
return scorecard.records.create(run.id, {
|
|
157
|
+
inputs,
|
|
158
|
+
expected,
|
|
159
|
+
outputs,
|
|
160
|
+
...(testcaseId != null ? { testcaseId } : null),
|
|
161
|
+
});
|
|
162
|
+
}
|
|
30
163
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
inputs: testcase.inputs,
|
|
38
|
-
expected: testcase.expected,
|
|
39
|
-
outputs: modelResponse as Record<string, unknown>,
|
|
40
|
-
});
|
|
41
|
-
recordPromises.push(promise);
|
|
164
|
+
if (runInParallel) {
|
|
165
|
+
recordPromises.push(modelResponsePromise.then(createRecord));
|
|
166
|
+
} else {
|
|
167
|
+
recordPromises.push(createRecord(await modelResponsePromise));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
42
170
|
}
|
|
43
171
|
// Wait until all the Records are created
|
|
44
172
|
await Promise.all(recordPromises);
|
|
45
173
|
|
|
46
|
-
const runUrl =
|
|
174
|
+
const runUrl = `${scorecard.baseAppURL}/projects/${args.projectId}/runs/${run.id}`;
|
|
47
175
|
|
|
48
176
|
return { id: run.id, url: runUrl };
|
|
49
177
|
}
|
package/src/resources/index.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
3
|
export * from './shared';
|
|
4
|
+
export { Metrics, type Metric, type MetricCreateParams } from './metrics';
|
|
4
5
|
export {
|
|
5
6
|
Projects,
|
|
6
7
|
type Project,
|
|
@@ -11,14 +12,6 @@ export {
|
|
|
11
12
|
export { Records, type Record, type RecordCreateParams } from './records';
|
|
12
13
|
export { Runs, type Run, type RunCreateParams } from './runs';
|
|
13
14
|
export { Scores, type Score, type ScoreUpsertParams } from './scores';
|
|
14
|
-
export {
|
|
15
|
-
SystemConfigs,
|
|
16
|
-
type SystemConfig,
|
|
17
|
-
type SystemConfigCreateParams,
|
|
18
|
-
type SystemConfigListParams,
|
|
19
|
-
type SystemConfigGetParams,
|
|
20
|
-
type SystemConfigsPaginatedResponse,
|
|
21
|
-
} from './system-configs';
|
|
22
15
|
export {
|
|
23
16
|
Systems,
|
|
24
17
|
type System,
|
|
@@ -27,7 +20,7 @@ export {
|
|
|
27
20
|
type SystemUpdateParams,
|
|
28
21
|
type SystemListParams,
|
|
29
22
|
type SystemsPaginatedResponse,
|
|
30
|
-
} from './systems';
|
|
23
|
+
} from './systems/systems';
|
|
31
24
|
export {
|
|
32
25
|
Testcases,
|
|
33
26
|
type Testcase,
|