scorecard-ai 1.0.0-alpha.8 → 1.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/README.md +25 -37
- package/client.d.mts +3 -2
- package/client.d.mts.map +1 -1
- package/client.d.ts +3 -2
- package/client.d.ts.map +1 -1
- package/client.js +15 -0
- package/client.js.map +1 -1
- package/client.mjs +15 -0
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +1 -1
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +1 -1
- package/core/pagination.d.ts.map +1 -1
- package/index.d.mts +1 -0
- package/index.d.mts.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +3 -1
- package/index.js.map +1 -1
- package/index.mjs +1 -0
- package/index.mjs.map +1 -1
- package/internal/detect-platform.js +3 -3
- package/internal/detect-platform.js.map +1 -1
- package/internal/detect-platform.mjs +3 -3
- package/internal/detect-platform.mjs.map +1 -1
- package/internal/shim-types.d.mts +11 -22
- package/internal/shim-types.d.mts.map +1 -0
- package/internal/shim-types.d.ts +11 -22
- package/internal/shim-types.d.ts.map +1 -0
- package/internal/shim-types.js +4 -0
- package/internal/shim-types.js.map +1 -0
- package/internal/shim-types.mjs +3 -0
- package/internal/shim-types.mjs.map +1 -0
- package/internal/shims.d.mts +2 -2
- package/internal/shims.d.mts.map +1 -1
- package/internal/shims.d.ts +2 -2
- package/internal/shims.d.ts.map +1 -1
- package/internal/uploads.js.map +1 -1
- package/internal/uploads.mjs.map +1 -1
- package/lib/runAndEvaluate.d.mts +49 -9
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +49 -9
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +62 -19
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +62 -19
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -4
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/runs.d.mts +6 -6
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +6 -6
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -1
- package/resources/runs.mjs +1 -1
- package/resources/system-configs.d.mts +2 -9
- package/resources/system-configs.d.mts.map +1 -1
- package/resources/system-configs.d.ts +2 -9
- package/resources/system-configs.d.ts.map +1 -1
- package/resources/system-configs.js +2 -4
- package/resources/system-configs.js.map +1 -1
- package/resources/system-configs.mjs +2 -4
- package/resources/system-configs.mjs.map +1 -1
- package/src/client.ts +14 -2
- package/src/core/pagination.ts +1 -1
- package/src/index.ts +2 -0
- package/src/internal/detect-platform.ts +3 -3
- package/src/internal/shim-types.ts +26 -0
- package/src/internal/shims.ts +2 -2
- package/src/internal/uploads.ts +1 -1
- package/src/lib/runAndEvaluate.ts +133 -30
- package/src/resources/index.ts +0 -1
- package/src/resources/runs.ts +6 -6
- package/src/resources/system-configs.ts +2 -16
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/src/internal/shim-types.d.ts +0 -28
|
@@ -1,49 +1,152 @@
|
|
|
1
1
|
import { Scorecard } from '../client';
|
|
2
|
+
import { SystemConfig, Testcase } from '../resources';
|
|
3
|
+
|
|
4
|
+
type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
|
|
5
|
+
// Project and metrics are always required
|
|
6
|
+
{
|
|
7
|
+
/**
|
|
8
|
+
* The ID of the Project to run the system on.
|
|
9
|
+
*/
|
|
10
|
+
projectId: string;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* The IDs of the Metrics to use for evaluation.
|
|
14
|
+
*/
|
|
15
|
+
metricIds: Array<string>;
|
|
16
|
+
} & (
|
|
17
|
+
| // If system config is provided, the system function receives a system config
|
|
18
|
+
{
|
|
19
|
+
/**
|
|
20
|
+
* The ID of the System Configuration to use for the run.
|
|
21
|
+
*/
|
|
22
|
+
systemConfigId: string;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* The system function to run on the Testset.
|
|
26
|
+
*/
|
|
27
|
+
system: (testcaseInput: SystemInput, systemConfig: SystemConfig) => Promise<SystemOutput>;
|
|
28
|
+
}
|
|
29
|
+
// Otherwise, the system function receives only the testcase input
|
|
30
|
+
| {
|
|
31
|
+
/**
|
|
32
|
+
* The system function to run on the Testset.
|
|
33
|
+
*/
|
|
34
|
+
system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
|
|
35
|
+
}
|
|
36
|
+
) &
|
|
37
|
+
// If testset is not provided, you must pass in all the testcases manually
|
|
38
|
+
(| {
|
|
39
|
+
/**
|
|
40
|
+
* The ID of the Scorecard Testset to run the system on.
|
|
41
|
+
*/
|
|
42
|
+
testsetId: string;
|
|
43
|
+
}
|
|
44
|
+
| {
|
|
45
|
+
/**
|
|
46
|
+
* The list of test cases to run the system on. Can be a list of Scorecard Testcases or a list of inputs and expected outputs.
|
|
47
|
+
*/
|
|
48
|
+
testcases:
|
|
49
|
+
| Array<{
|
|
50
|
+
inputs: SystemInput;
|
|
51
|
+
expected: Record<string, unknown>;
|
|
52
|
+
}>
|
|
53
|
+
| Array<Testcase>;
|
|
54
|
+
}
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Returns an async generator over the given Testset or Testcases.
|
|
59
|
+
*/
|
|
60
|
+
async function* testcaseIterator<SystemInput extends Record<string, any>>(
|
|
61
|
+
scorecard: Scorecard,
|
|
62
|
+
args: RunAndEvaluateArgs<SystemInput, any>,
|
|
63
|
+
): AsyncGenerator<{
|
|
64
|
+
testcaseId: string | null;
|
|
65
|
+
inputs: SystemInput;
|
|
66
|
+
expected: Record<string, unknown>;
|
|
67
|
+
}> {
|
|
68
|
+
if ('testsetId' in args) {
|
|
69
|
+
for await (const testcase of scorecard.testcases.list(args.testsetId)) {
|
|
70
|
+
yield {
|
|
71
|
+
...testcase,
|
|
72
|
+
testcaseId: testcase.id,
|
|
73
|
+
inputs: testcase.inputs as SystemInput,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
} else {
|
|
77
|
+
for (const testcase of args.testcases) {
|
|
78
|
+
yield {
|
|
79
|
+
...testcase,
|
|
80
|
+
testcaseId: 'id' in testcase ? testcase.id : null,
|
|
81
|
+
inputs: testcase.inputs as SystemInput,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
2
86
|
|
|
3
87
|
/**
|
|
4
88
|
* Runs a system on a Testset and records the results in Scorecard.
|
|
5
89
|
*
|
|
6
90
|
* @param scorecard The Scorecard client
|
|
7
|
-
* @param projectId The ID of the Project to run the system on.
|
|
8
|
-
* @param testsetId The ID of the Testset to run the system on.
|
|
9
|
-
* @param
|
|
10
|
-
* @param
|
|
91
|
+
* @param args.projectId The ID of the Project to run the system on.
|
|
92
|
+
* @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
|
|
93
|
+
* @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
|
|
94
|
+
* @param args.metricIds The IDs of the Metrics to use for evaluation.
|
|
95
|
+
* @param args.systemConfigId The optional ID of the System Configuration to associate with the Run.
|
|
96
|
+
* @param args.system The system to run on the Testset.
|
|
97
|
+
* @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
|
|
11
98
|
*/
|
|
12
|
-
export async function runAndEvaluate<
|
|
99
|
+
export async function runAndEvaluate<
|
|
100
|
+
SystemInput extends Record<string, any>,
|
|
101
|
+
SystemOutput extends Record<string, any>,
|
|
102
|
+
>(
|
|
13
103
|
scorecard: Scorecard,
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}: {
|
|
20
|
-
projectId: string;
|
|
21
|
-
testsetId: string;
|
|
22
|
-
metricIds: Array<string>;
|
|
23
|
-
system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
|
|
104
|
+
args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
|
|
105
|
+
options: {
|
|
106
|
+
runInParallel: boolean;
|
|
107
|
+
} = {
|
|
108
|
+
runInParallel: false,
|
|
24
109
|
},
|
|
25
110
|
): Promise<Pick<Scorecard.Runs.Run, 'id'> & { url: string }> {
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
111
|
+
const hasSystemConfig = 'systemConfigId' in args;
|
|
112
|
+
const hasTestset = 'testsetId' in args;
|
|
113
|
+
|
|
114
|
+
const runPromise = scorecard.runs.create(args.projectId, {
|
|
115
|
+
testsetId: hasTestset ? args.testsetId : null,
|
|
116
|
+
metricIds: args.metricIds,
|
|
117
|
+
...(hasSystemConfig ?
|
|
118
|
+
{
|
|
119
|
+
systemConfigId: args.systemConfigId,
|
|
120
|
+
}
|
|
121
|
+
: null),
|
|
29
122
|
});
|
|
123
|
+
const systemConfig = hasSystemConfig ? await scorecard.systemConfigs.get(args.systemConfigId) : null;
|
|
124
|
+
const run = await runPromise;
|
|
125
|
+
|
|
126
|
+
const recordPromises: Array<Promise<unknown>> = [];
|
|
127
|
+
|
|
128
|
+
for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
|
|
129
|
+
const modelResponsePromise = hasSystemConfig ? args.system(inputs, systemConfig!) : args.system(inputs);
|
|
130
|
+
|
|
131
|
+
function createRecord(outputs: SystemOutput): Promise<unknown> {
|
|
132
|
+
return scorecard.records.create(run.id, {
|
|
133
|
+
inputs,
|
|
134
|
+
expected,
|
|
135
|
+
outputs,
|
|
136
|
+
...(testcaseId != null ? { testcaseId } : null),
|
|
137
|
+
});
|
|
138
|
+
}
|
|
30
139
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
testcaseId: testcase.id,
|
|
37
|
-
inputs: testcase.inputs,
|
|
38
|
-
expected: testcase.expected,
|
|
39
|
-
outputs: modelResponse as Record<string, unknown>,
|
|
40
|
-
});
|
|
41
|
-
recordPromises.push(promise);
|
|
140
|
+
if (options.runInParallel) {
|
|
141
|
+
recordPromises.push(modelResponsePromise.then(createRecord));
|
|
142
|
+
} else {
|
|
143
|
+
recordPromises.push(createRecord(await modelResponsePromise));
|
|
144
|
+
}
|
|
42
145
|
}
|
|
43
146
|
// Wait until all the Records are created
|
|
44
147
|
await Promise.all(recordPromises);
|
|
45
148
|
|
|
46
|
-
const runUrl =
|
|
149
|
+
const runUrl = `${scorecard.baseAppURL}/projects/${args.projectId}/runs/${run.id}`;
|
|
47
150
|
|
|
48
151
|
return { id: run.id, url: runUrl };
|
|
49
152
|
}
|
package/src/resources/index.ts
CHANGED
package/src/resources/runs.ts
CHANGED
|
@@ -13,8 +13,8 @@ export class Runs extends APIResource {
|
|
|
13
13
|
* ```ts
|
|
14
14
|
* const run = await client.runs.create('314', {
|
|
15
15
|
* metricIds: ['789', '101'],
|
|
16
|
-
* testsetId: '246',
|
|
17
16
|
* systemConfigId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
17
|
+
* testsetId: '246',
|
|
18
18
|
* });
|
|
19
19
|
* ```
|
|
20
20
|
*/
|
|
@@ -52,7 +52,7 @@ export interface Run {
|
|
|
52
52
|
/**
|
|
53
53
|
* The ID of the Testset this Run is testing.
|
|
54
54
|
*/
|
|
55
|
-
testsetId: string;
|
|
55
|
+
testsetId: string | null;
|
|
56
56
|
|
|
57
57
|
/**
|
|
58
58
|
* The ID of the system configuration this Run is using.
|
|
@@ -67,14 +67,14 @@ export interface RunCreateParams {
|
|
|
67
67
|
metricIds: Array<string>;
|
|
68
68
|
|
|
69
69
|
/**
|
|
70
|
-
* The ID of the
|
|
70
|
+
* The ID of the system configuration this Run is using.
|
|
71
71
|
*/
|
|
72
|
-
|
|
72
|
+
systemConfigId?: string;
|
|
73
73
|
|
|
74
74
|
/**
|
|
75
|
-
* The ID of the
|
|
75
|
+
* The ID of the Testset this Run is testing.
|
|
76
76
|
*/
|
|
77
|
-
|
|
77
|
+
testsetId?: string | null;
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
export declare namespace Runs {
|
|
@@ -81,17 +81,11 @@ export class SystemConfigs extends APIResource {
|
|
|
81
81
|
* ```ts
|
|
82
82
|
* const systemConfig = await client.systemConfigs.get(
|
|
83
83
|
* '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
84
|
-
* { systemId: '12345678-0a8b-4f66-b6f3-2ddcfa097257' },
|
|
85
84
|
* );
|
|
86
85
|
* ```
|
|
87
86
|
*/
|
|
88
|
-
get(
|
|
89
|
-
systemConfigID
|
|
90
|
-
params: SystemConfigGetParams,
|
|
91
|
-
options?: RequestOptions,
|
|
92
|
-
): APIPromise<SystemConfig> {
|
|
93
|
-
const { systemId } = params;
|
|
94
|
-
return this._client.get(path`/systems/${systemId}/configs/${systemConfigID}`, options);
|
|
87
|
+
get(systemConfigID: string, options?: RequestOptions): APIPromise<SystemConfig> {
|
|
88
|
+
return this._client.get(path`/systems/configs/${systemConfigID}`, options);
|
|
95
89
|
}
|
|
96
90
|
}
|
|
97
91
|
|
|
@@ -185,19 +179,11 @@ export namespace SystemConfigCreateParams {
|
|
|
185
179
|
|
|
186
180
|
export interface SystemConfigListParams extends PaginatedResponseParams {}
|
|
187
181
|
|
|
188
|
-
export interface SystemConfigGetParams {
|
|
189
|
-
/**
|
|
190
|
-
* The ID of the system the configuration belongs to.
|
|
191
|
-
*/
|
|
192
|
-
systemId: string;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
182
|
export declare namespace SystemConfigs {
|
|
196
183
|
export {
|
|
197
184
|
type SystemConfig as SystemConfig,
|
|
198
185
|
type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
|
|
199
186
|
type SystemConfigCreateParams as SystemConfigCreateParams,
|
|
200
187
|
type SystemConfigListParams as SystemConfigListParams,
|
|
201
|
-
type SystemConfigGetParams as SystemConfigGetParams,
|
|
202
188
|
};
|
|
203
189
|
}
|
package/src/version.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '1.0.0-alpha.
|
|
1
|
+
export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
|
package/version.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.0.0-alpha.
|
|
1
|
+
export declare const VERSION = "1.0.0-alpha.9";
|
|
2
2
|
//# sourceMappingURL=version.d.mts.map
|
package/version.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.0.0-alpha.
|
|
1
|
+
export declare const VERSION = "1.0.0-alpha.9";
|
|
2
2
|
//# sourceMappingURL=version.d.ts.map
|
package/version.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.VERSION = void 0;
|
|
4
|
-
exports.VERSION = '1.0.0-alpha.
|
|
4
|
+
exports.VERSION = '1.0.0-alpha.9'; // x-release-please-version
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
package/version.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const VERSION = '1.0.0-alpha.
|
|
1
|
+
export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
|
|
2
2
|
//# sourceMappingURL=version.mjs.map
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Shims for types that we can't always rely on being available globally.
|
|
5
|
-
*
|
|
6
|
-
* Note: these only exist at the type-level, there is no corresponding runtime
|
|
7
|
-
* version for any of these symbols.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* In order to properly access the global `NodeJS` type, if it's available, we
|
|
12
|
-
* need to make use of declaration shadowing. Without this, any checks for the
|
|
13
|
-
* presence of `NodeJS.ReadableStream` will fail.
|
|
14
|
-
*/
|
|
15
|
-
declare namespace NodeJS {
|
|
16
|
-
interface ReadableStream {}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
type HasProperties<T> = keyof T extends never ? false : true;
|
|
20
|
-
|
|
21
|
-
// @ts-ignore
|
|
22
|
-
type _ReadableStream<R = any> =
|
|
23
|
-
// @ts-ignore
|
|
24
|
-
HasProperties<NodeJS.ReadableStream> extends true ? NodeJS.ReadableStream<R> : ReadableStream<R>;
|
|
25
|
-
|
|
26
|
-
// @ts-ignore
|
|
27
|
-
declare const _ReadableStream: unknown extends typeof ReadableStream ? never : typeof ReadableStream;
|
|
28
|
-
export { _ReadableStream as ReadableStream };
|