scorecard-ai 1.0.0-alpha.7 → 1.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +25 -37
- package/client.d.mts +5 -4
- package/client.d.mts.map +1 -1
- package/client.d.ts +5 -4
- package/client.d.ts.map +1 -1
- package/client.js +15 -0
- package/client.js.map +1 -1
- package/client.mjs +15 -0
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +1 -1
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +1 -1
- package/core/pagination.d.ts.map +1 -1
- package/index.d.mts +1 -0
- package/index.d.mts.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +3 -1
- package/index.js.map +1 -1
- package/index.mjs +1 -0
- package/index.mjs.map +1 -1
- package/internal/detect-platform.js +3 -3
- package/internal/detect-platform.js.map +1 -1
- package/internal/detect-platform.mjs +3 -3
- package/internal/detect-platform.mjs.map +1 -1
- package/internal/shim-types.d.mts +11 -22
- package/internal/shim-types.d.mts.map +1 -0
- package/internal/shim-types.d.ts +11 -22
- package/internal/shim-types.d.ts.map +1 -0
- package/internal/shim-types.js +4 -0
- package/internal/shim-types.js.map +1 -0
- package/internal/shim-types.mjs +3 -0
- package/internal/shim-types.mjs.map +1 -0
- package/internal/shims.d.mts +2 -2
- package/internal/shims.d.mts.map +1 -1
- package/internal/shims.d.ts +2 -2
- package/internal/shims.d.ts.map +1 -1
- package/internal/uploads.js.map +1 -1
- package/internal/uploads.mjs.map +1 -1
- package/lib/runAndEvaluate.d.mts +49 -9
- package/lib/runAndEvaluate.d.mts.map +1 -1
- package/lib/runAndEvaluate.d.ts +49 -9
- package/lib/runAndEvaluate.d.ts.map +1 -1
- package/lib/runAndEvaluate.js +62 -23
- package/lib/runAndEvaluate.js.map +1 -1
- package/lib/runAndEvaluate.mjs +62 -23
- package/lib/runAndEvaluate.mjs.map +1 -1
- package/package.json +1 -4
- package/resources/index.d.mts +2 -2
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/records.d.mts +11 -9
- package/resources/records.d.mts.map +1 -1
- package/resources/records.d.ts +11 -9
- package/resources/records.d.ts.map +1 -1
- package/resources/records.js +3 -1
- package/resources/records.js.map +1 -1
- package/resources/records.mjs +3 -1
- package/resources/records.mjs.map +1 -1
- package/resources/runs.d.mts +5 -32
- package/resources/runs.d.mts.map +1 -1
- package/resources/runs.d.ts +5 -32
- package/resources/runs.d.ts.map +1 -1
- package/resources/runs.js +1 -14
- package/resources/runs.js.map +1 -1
- package/resources/runs.mjs +1 -14
- package/resources/runs.mjs.map +1 -1
- package/resources/system-configs.d.mts +2 -9
- package/resources/system-configs.d.mts.map +1 -1
- package/resources/system-configs.d.ts +2 -9
- package/resources/system-configs.d.ts.map +1 -1
- package/resources/system-configs.js +2 -4
- package/resources/system-configs.js.map +1 -1
- package/resources/system-configs.mjs +2 -4
- package/resources/system-configs.mjs.map +1 -1
- package/resources/testcases.d.mts +10 -18
- package/resources/testcases.d.mts.map +1 -1
- package/resources/testcases.d.ts +10 -18
- package/resources/testcases.d.ts.map +1 -1
- package/resources/testsets.d.mts +31 -31
- package/resources/testsets.d.mts.map +1 -1
- package/resources/testsets.d.ts +31 -31
- package/resources/testsets.d.ts.map +1 -1
- package/resources/testsets.js +1 -1
- package/resources/testsets.mjs +1 -1
- package/src/client.ts +16 -10
- package/src/core/pagination.ts +1 -1
- package/src/index.ts +2 -0
- package/src/internal/detect-platform.ts +3 -3
- package/src/internal/shim-types.ts +26 -0
- package/src/internal/shims.ts +2 -2
- package/src/internal/uploads.ts +1 -1
- package/src/lib/runAndEvaluate.ts +133 -35
- package/src/resources/index.ts +1 -2
- package/src/resources/records.ts +13 -11
- package/src/resources/runs.ts +5 -57
- package/src/resources/system-configs.ts +2 -16
- package/src/resources/testcases.ts +11 -19
- package/src/resources/testsets.ts +31 -31
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/src/internal/shim-types.d.ts +0 -28
|
@@ -85,10 +85,10 @@ const getPlatformProperties = (): PlatformProperties => {
|
|
|
85
85
|
return {
|
|
86
86
|
'X-Stainless-Lang': 'js',
|
|
87
87
|
'X-Stainless-Package-Version': VERSION,
|
|
88
|
-
'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform),
|
|
89
|
-
'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch),
|
|
88
|
+
'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform ?? 'unknown'),
|
|
89
|
+
'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch ?? 'unknown'),
|
|
90
90
|
'X-Stainless-Runtime': 'node',
|
|
91
|
-
'X-Stainless-Runtime-Version': (globalThis as any).process.version,
|
|
91
|
+
'X-Stainless-Runtime-Version': (globalThis as any).process.version ?? 'unknown',
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Shims for types that we can't always rely on being available globally.
|
|
5
|
+
*
|
|
6
|
+
* Note: these only exist at the type-level, there is no corresponding runtime
|
|
7
|
+
* version for any of these symbols.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
type NeverToAny<T> = T extends never ? any : T;
|
|
11
|
+
|
|
12
|
+
/** @ts-ignore */
|
|
13
|
+
type _DOMReadableStream<R = any> = globalThis.ReadableStream<R>;
|
|
14
|
+
|
|
15
|
+
/** @ts-ignore */
|
|
16
|
+
type _NodeReadableStream<R = any> = import('stream/web').ReadableStream<R>;
|
|
17
|
+
|
|
18
|
+
type _ConditionalNodeReadableStream<R = any> =
|
|
19
|
+
typeof globalThis extends { ReadableStream: any } ? never : _NodeReadableStream<R>;
|
|
20
|
+
|
|
21
|
+
type _ReadableStream<R = any> = NeverToAny<
|
|
22
|
+
| ([0] extends [1 & _DOMReadableStream<R>] ? never : _DOMReadableStream<R>)
|
|
23
|
+
| ([0] extends [1 & _ConditionalNodeReadableStream<R>] ? never : _ConditionalNodeReadableStream<R>)
|
|
24
|
+
>;
|
|
25
|
+
|
|
26
|
+
export type { _ReadableStream as ReadableStream };
|
package/src/internal/shims.ts
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
* messages in cases where an environment isn't fully supported.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import type { Fetch } from './builtin-types';
|
|
11
|
+
import type { ReadableStream } from './shim-types';
|
|
12
12
|
|
|
13
13
|
export function getDefaultFetch(): Fetch {
|
|
14
14
|
if (typeof fetch !== 'undefined') {
|
package/src/internal/uploads.ts
CHANGED
|
@@ -138,7 +138,7 @@ export const createForm = async <T = Record<string, unknown>>(
|
|
|
138
138
|
|
|
139
139
|
// We check for Blob not File because Bun.File doesn't inherit from File,
|
|
140
140
|
// but they both inherit from Blob and have a `name` property at runtime.
|
|
141
|
-
const isNamedBlob = (value:
|
|
141
|
+
const isNamedBlob = (value: unknown) => value instanceof Blob && 'name' in value;
|
|
142
142
|
|
|
143
143
|
const isUploadable = (value: unknown) =>
|
|
144
144
|
typeof value === 'object' &&
|
|
@@ -1,54 +1,152 @@
|
|
|
1
1
|
import { Scorecard } from '../client';
|
|
2
|
+
import { SystemConfig, Testcase } from '../resources';
|
|
3
|
+
|
|
4
|
+
type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
|
|
5
|
+
// Project and metrics are always required
|
|
6
|
+
{
|
|
7
|
+
/**
|
|
8
|
+
* The ID of the Project to run the system on.
|
|
9
|
+
*/
|
|
10
|
+
projectId: string;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* The IDs of the Metrics to use for evaluation.
|
|
14
|
+
*/
|
|
15
|
+
metricIds: Array<string>;
|
|
16
|
+
} & (
|
|
17
|
+
| // If system config is provided, the system function receives a system config
|
|
18
|
+
{
|
|
19
|
+
/**
|
|
20
|
+
* The ID of the System Configuration to use for the run.
|
|
21
|
+
*/
|
|
22
|
+
systemConfigId: string;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* The system function to run on the Testset.
|
|
26
|
+
*/
|
|
27
|
+
system: (testcaseInput: SystemInput, systemConfig: SystemConfig) => Promise<SystemOutput>;
|
|
28
|
+
}
|
|
29
|
+
// Otherwise, the system function receives only the testcase input
|
|
30
|
+
| {
|
|
31
|
+
/**
|
|
32
|
+
* The system function to run on the Testset.
|
|
33
|
+
*/
|
|
34
|
+
system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
|
|
35
|
+
}
|
|
36
|
+
) &
|
|
37
|
+
// If testset is not provided, you must pass in all the testcases manually
|
|
38
|
+
(| {
|
|
39
|
+
/**
|
|
40
|
+
* The ID of the Scorecard Testset to run the system on.
|
|
41
|
+
*/
|
|
42
|
+
testsetId: string;
|
|
43
|
+
}
|
|
44
|
+
| {
|
|
45
|
+
/**
|
|
46
|
+
* The list of test cases to run the system on. Can be a list of Scorecard Testcases or a list of inputs and expected outputs.
|
|
47
|
+
*/
|
|
48
|
+
testcases:
|
|
49
|
+
| Array<{
|
|
50
|
+
inputs: SystemInput;
|
|
51
|
+
expected: Record<string, unknown>;
|
|
52
|
+
}>
|
|
53
|
+
| Array<Testcase>;
|
|
54
|
+
}
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Returns an async generator over the given Testset or Testcases.
|
|
59
|
+
*/
|
|
60
|
+
async function* testcaseIterator<SystemInput extends Record<string, any>>(
|
|
61
|
+
scorecard: Scorecard,
|
|
62
|
+
args: RunAndEvaluateArgs<SystemInput, any>,
|
|
63
|
+
): AsyncGenerator<{
|
|
64
|
+
testcaseId: string | null;
|
|
65
|
+
inputs: SystemInput;
|
|
66
|
+
expected: Record<string, unknown>;
|
|
67
|
+
}> {
|
|
68
|
+
if ('testsetId' in args) {
|
|
69
|
+
for await (const testcase of scorecard.testcases.list(args.testsetId)) {
|
|
70
|
+
yield {
|
|
71
|
+
...testcase,
|
|
72
|
+
testcaseId: testcase.id,
|
|
73
|
+
inputs: testcase.inputs as SystemInput,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
} else {
|
|
77
|
+
for (const testcase of args.testcases) {
|
|
78
|
+
yield {
|
|
79
|
+
...testcase,
|
|
80
|
+
testcaseId: 'id' in testcase ? testcase.id : null,
|
|
81
|
+
inputs: testcase.inputs as SystemInput,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
2
86
|
|
|
3
87
|
/**
|
|
4
88
|
* Runs a system on a Testset and records the results in Scorecard.
|
|
5
89
|
*
|
|
6
90
|
* @param scorecard The Scorecard client
|
|
7
|
-
* @param projectId The ID of the Project to run the system on.
|
|
8
|
-
* @param testsetId The ID of the Testset to run the system on.
|
|
9
|
-
* @param
|
|
10
|
-
* @param
|
|
91
|
+
* @param args.projectId The ID of the Project to run the system on.
|
|
92
|
+
* @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
|
|
93
|
+
* @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
|
|
94
|
+
* @param args.metricIds The IDs of the Metrics to use for evaluation.
|
|
95
|
+
* @param args.systemConfigId The optional ID of the System Configuration to associate with the Run.
|
|
96
|
+
* @param args.system The system to run on the Testset.
|
|
97
|
+
* @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
|
|
11
98
|
*/
|
|
12
|
-
export async function runAndEvaluate<
|
|
99
|
+
export async function runAndEvaluate<
|
|
100
|
+
SystemInput extends Record<string, any>,
|
|
101
|
+
SystemOutput extends Record<string, any>,
|
|
102
|
+
>(
|
|
13
103
|
scorecard: Scorecard,
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}: {
|
|
20
|
-
projectId: string;
|
|
21
|
-
testsetId: string;
|
|
22
|
-
metricIds: Array<string>;
|
|
23
|
-
system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
|
|
104
|
+
args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
|
|
105
|
+
options: {
|
|
106
|
+
runInParallel: boolean;
|
|
107
|
+
} = {
|
|
108
|
+
runInParallel: false,
|
|
24
109
|
},
|
|
25
110
|
): Promise<Pick<Scorecard.Runs.Run, 'id'> & { url: string }> {
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
111
|
+
const hasSystemConfig = 'systemConfigId' in args;
|
|
112
|
+
const hasTestset = 'testsetId' in args;
|
|
113
|
+
|
|
114
|
+
const runPromise = scorecard.runs.create(args.projectId, {
|
|
115
|
+
testsetId: hasTestset ? args.testsetId : null,
|
|
116
|
+
metricIds: args.metricIds,
|
|
117
|
+
...(hasSystemConfig ?
|
|
118
|
+
{
|
|
119
|
+
systemConfigId: args.systemConfigId,
|
|
120
|
+
}
|
|
121
|
+
: null),
|
|
29
122
|
});
|
|
123
|
+
const systemConfig = hasSystemConfig ? await scorecard.systemConfigs.get(args.systemConfigId) : null;
|
|
124
|
+
const run = await runPromise;
|
|
30
125
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
for await (const
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
126
|
+
const recordPromises: Array<Promise<unknown>> = [];
|
|
127
|
+
|
|
128
|
+
for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
|
|
129
|
+
const modelResponsePromise = hasSystemConfig ? args.system(inputs, systemConfig!) : args.system(inputs);
|
|
130
|
+
|
|
131
|
+
function createRecord(outputs: SystemOutput): Promise<unknown> {
|
|
132
|
+
return scorecard.records.create(run.id, {
|
|
133
|
+
inputs,
|
|
134
|
+
expected,
|
|
135
|
+
outputs,
|
|
136
|
+
...(testcaseId != null ? { testcaseId } : null),
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (options.runInParallel) {
|
|
141
|
+
recordPromises.push(modelResponsePromise.then(createRecord));
|
|
142
|
+
} else {
|
|
143
|
+
recordPromises.push(createRecord(await modelResponsePromise));
|
|
144
|
+
}
|
|
42
145
|
}
|
|
43
146
|
// Wait until all the Records are created
|
|
44
147
|
await Promise.all(recordPromises);
|
|
45
148
|
|
|
46
|
-
|
|
47
|
-
await scorecard.runs.update(run.id, {
|
|
48
|
-
status: 'awaiting_scoring',
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
const runUrl = `https://app.getscorecard.ai/projects/${projectId}/runs/grades/${run.id}`;
|
|
149
|
+
const runUrl = `${scorecard.baseAppURL}/projects/${args.projectId}/runs/${run.id}`;
|
|
52
150
|
|
|
53
151
|
return { id: run.id, url: runUrl };
|
|
54
152
|
}
|
package/src/resources/index.ts
CHANGED
|
@@ -9,14 +9,13 @@ export {
|
|
|
9
9
|
type ProjectsPaginatedResponse,
|
|
10
10
|
} from './projects';
|
|
11
11
|
export { Records, type Record, type RecordCreateParams } from './records';
|
|
12
|
-
export { Runs, type Run, type
|
|
12
|
+
export { Runs, type Run, type RunCreateParams } from './runs';
|
|
13
13
|
export { Scores, type Score, type ScoreUpsertParams } from './scores';
|
|
14
14
|
export {
|
|
15
15
|
SystemConfigs,
|
|
16
16
|
type SystemConfig,
|
|
17
17
|
type SystemConfigCreateParams,
|
|
18
18
|
type SystemConfigListParams,
|
|
19
|
-
type SystemConfigGetParams,
|
|
20
19
|
type SystemConfigsPaginatedResponse,
|
|
21
20
|
} from './system-configs';
|
|
22
21
|
export {
|
package/src/resources/records.ts
CHANGED
|
@@ -13,8 +13,10 @@ export class Records extends APIResource {
|
|
|
13
13
|
* @example
|
|
14
14
|
* ```ts
|
|
15
15
|
* const record = await client.records.create('135', {
|
|
16
|
+
* expected: {
|
|
17
|
+
* idealAnswer: 'Paris is the capital of France',
|
|
18
|
+
* },
|
|
16
19
|
* inputs: { question: 'What is the capital of France?' },
|
|
17
|
-
* labels: { idealAnswer: 'Paris is the capital of France' },
|
|
18
20
|
* outputs: { response: 'The capital of France is Paris.' },
|
|
19
21
|
* testcaseId: '248',
|
|
20
22
|
* });
|
|
@@ -35,15 +37,15 @@ export interface Record {
|
|
|
35
37
|
id: string;
|
|
36
38
|
|
|
37
39
|
/**
|
|
38
|
-
* The
|
|
39
|
-
* schema.
|
|
40
|
+
* The expected outputs for the Testcase.
|
|
40
41
|
*/
|
|
41
|
-
|
|
42
|
+
expected: BuiltinRecord<string, unknown>;
|
|
42
43
|
|
|
43
44
|
/**
|
|
44
|
-
* The
|
|
45
|
+
* The actual inputs sent to the system, which should match the system's input
|
|
46
|
+
* schema.
|
|
45
47
|
*/
|
|
46
|
-
|
|
48
|
+
inputs: BuiltinRecord<string, unknown>;
|
|
47
49
|
|
|
48
50
|
/**
|
|
49
51
|
* The actual outputs from the system.
|
|
@@ -63,15 +65,15 @@ export interface Record {
|
|
|
63
65
|
|
|
64
66
|
export interface RecordCreateParams {
|
|
65
67
|
/**
|
|
66
|
-
* The
|
|
67
|
-
* schema.
|
|
68
|
+
* The expected outputs for the Testcase.
|
|
68
69
|
*/
|
|
69
|
-
|
|
70
|
+
expected: BuiltinRecord<string, unknown>;
|
|
70
71
|
|
|
71
72
|
/**
|
|
72
|
-
* The
|
|
73
|
+
* The actual inputs sent to the system, which should match the system's input
|
|
74
|
+
* schema.
|
|
73
75
|
*/
|
|
74
|
-
|
|
76
|
+
inputs: BuiltinRecord<string, unknown>;
|
|
75
77
|
|
|
76
78
|
/**
|
|
77
79
|
* The actual outputs from the system.
|
package/src/resources/runs.ts
CHANGED
|
@@ -13,28 +13,14 @@ export class Runs extends APIResource {
|
|
|
13
13
|
* ```ts
|
|
14
14
|
* const run = await client.runs.create('314', {
|
|
15
15
|
* metricIds: ['789', '101'],
|
|
16
|
-
* testsetId: '246',
|
|
17
16
|
* systemConfigId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
17
|
+
* testsetId: '246',
|
|
18
18
|
* });
|
|
19
19
|
* ```
|
|
20
20
|
*/
|
|
21
21
|
create(projectID: string, body: RunCreateParams, options?: RequestOptions): APIPromise<Run> {
|
|
22
22
|
return this._client.post(path`/projects/${projectID}/runs`, { body, ...options });
|
|
23
23
|
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Update the status of a Run.
|
|
27
|
-
*
|
|
28
|
-
* @example
|
|
29
|
-
* ```ts
|
|
30
|
-
* const run = await client.runs.update('135', {
|
|
31
|
-
* status: 'awaiting_scoring',
|
|
32
|
-
* });
|
|
33
|
-
* ```
|
|
34
|
-
*/
|
|
35
|
-
update(runID: string, body: RunUpdateParams, options?: RequestOptions): APIPromise<RunUpdateResponse> {
|
|
36
|
-
return this._client.patch(path`/runs/${runID}`, { body, ...options });
|
|
37
|
-
}
|
|
38
24
|
}
|
|
39
25
|
|
|
40
26
|
/**
|
|
@@ -66,7 +52,7 @@ export interface Run {
|
|
|
66
52
|
/**
|
|
67
53
|
* The ID of the Testset this Run is testing.
|
|
68
54
|
*/
|
|
69
|
-
testsetId: string;
|
|
55
|
+
testsetId: string | null;
|
|
70
56
|
|
|
71
57
|
/**
|
|
72
58
|
* The ID of the system configuration this Run is using.
|
|
@@ -74,61 +60,23 @@ export interface Run {
|
|
|
74
60
|
systemConfigId?: string;
|
|
75
61
|
}
|
|
76
62
|
|
|
77
|
-
export interface RunUpdateResponse {
|
|
78
|
-
/**
|
|
79
|
-
* The ID of the Run.
|
|
80
|
-
*/
|
|
81
|
-
id: string;
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* The status of the Run.
|
|
85
|
-
*/
|
|
86
|
-
status:
|
|
87
|
-
| 'pending'
|
|
88
|
-
| 'awaiting_execution'
|
|
89
|
-
| 'running_execution'
|
|
90
|
-
| 'awaiting_scoring'
|
|
91
|
-
| 'running_scoring'
|
|
92
|
-
| 'awaiting_human_scoring'
|
|
93
|
-
| 'completed';
|
|
94
|
-
}
|
|
95
|
-
|
|
96
63
|
export interface RunCreateParams {
|
|
97
64
|
/**
|
|
98
65
|
* The IDs of the metrics this Run is using.
|
|
99
66
|
*/
|
|
100
67
|
metricIds: Array<string>;
|
|
101
68
|
|
|
102
|
-
/**
|
|
103
|
-
* The ID of the Testset this Run is testing.
|
|
104
|
-
*/
|
|
105
|
-
testsetId: string;
|
|
106
|
-
|
|
107
69
|
/**
|
|
108
70
|
* The ID of the system configuration this Run is using.
|
|
109
71
|
*/
|
|
110
72
|
systemConfigId?: string;
|
|
111
|
-
}
|
|
112
73
|
|
|
113
|
-
export interface RunUpdateParams {
|
|
114
74
|
/**
|
|
115
|
-
* The
|
|
75
|
+
* The ID of the Testset this Run is testing.
|
|
116
76
|
*/
|
|
117
|
-
|
|
118
|
-
| 'pending'
|
|
119
|
-
| 'awaiting_execution'
|
|
120
|
-
| 'running_execution'
|
|
121
|
-
| 'awaiting_scoring'
|
|
122
|
-
| 'running_scoring'
|
|
123
|
-
| 'awaiting_human_scoring'
|
|
124
|
-
| 'completed';
|
|
77
|
+
testsetId?: string | null;
|
|
125
78
|
}
|
|
126
79
|
|
|
127
80
|
export declare namespace Runs {
|
|
128
|
-
export {
|
|
129
|
-
type Run as Run,
|
|
130
|
-
type RunUpdateResponse as RunUpdateResponse,
|
|
131
|
-
type RunCreateParams as RunCreateParams,
|
|
132
|
-
type RunUpdateParams as RunUpdateParams,
|
|
133
|
-
};
|
|
81
|
+
export { type Run as Run, type RunCreateParams as RunCreateParams };
|
|
134
82
|
}
|
|
@@ -81,17 +81,11 @@ export class SystemConfigs extends APIResource {
|
|
|
81
81
|
* ```ts
|
|
82
82
|
* const systemConfig = await client.systemConfigs.get(
|
|
83
83
|
* '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
|
|
84
|
-
* { systemId: '12345678-0a8b-4f66-b6f3-2ddcfa097257' },
|
|
85
84
|
* );
|
|
86
85
|
* ```
|
|
87
86
|
*/
|
|
88
|
-
get(
|
|
89
|
-
systemConfigID
|
|
90
|
-
params: SystemConfigGetParams,
|
|
91
|
-
options?: RequestOptions,
|
|
92
|
-
): APIPromise<SystemConfig> {
|
|
93
|
-
const { systemId } = params;
|
|
94
|
-
return this._client.get(path`/systems/${systemId}/configs/${systemConfigID}`, options);
|
|
87
|
+
get(systemConfigID: string, options?: RequestOptions): APIPromise<SystemConfig> {
|
|
88
|
+
return this._client.get(path`/systems/configs/${systemConfigID}`, options);
|
|
95
89
|
}
|
|
96
90
|
}
|
|
97
91
|
|
|
@@ -185,19 +179,11 @@ export namespace SystemConfigCreateParams {
|
|
|
185
179
|
|
|
186
180
|
export interface SystemConfigListParams extends PaginatedResponseParams {}
|
|
187
181
|
|
|
188
|
-
export interface SystemConfigGetParams {
|
|
189
|
-
/**
|
|
190
|
-
* The ID of the system the configuration belongs to.
|
|
191
|
-
*/
|
|
192
|
-
systemId: string;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
182
|
export declare namespace SystemConfigs {
|
|
196
183
|
export {
|
|
197
184
|
type SystemConfig as SystemConfig,
|
|
198
185
|
type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
|
|
199
186
|
type SystemConfigCreateParams as SystemConfigCreateParams,
|
|
200
187
|
type SystemConfigListParams as SystemConfigListParams,
|
|
201
|
-
type SystemConfigGetParams as SystemConfigGetParams,
|
|
202
188
|
};
|
|
203
189
|
}
|
|
@@ -120,11 +120,11 @@ export type TestcasesPaginatedResponse = PaginatedResponse<Testcase>;
|
|
|
120
120
|
|
|
121
121
|
/**
|
|
122
122
|
* A test case in the Scorecard system. Contains JSON data that is validated
|
|
123
|
-
* against the schema defined by its Testset. The `inputs` and `
|
|
124
|
-
* derived from the `data` field based on the Testset's `fieldMapping`, and
|
|
125
|
-
* all mapped fields, including those with validation errors. Testcases are
|
|
126
|
-
* regardless of validation results, with any validation errors included in
|
|
127
|
-
* `validationErrors` field.
|
|
123
|
+
* against the schema defined by its Testset. The `inputs` and `expected` fields
|
|
124
|
+
* are derived from the `data` field based on the Testset's `fieldMapping`, and
|
|
125
|
+
* include all mapped fields, including those with validation errors. Testcases are
|
|
126
|
+
* stored regardless of validation results, with any validation errors included in
|
|
127
|
+
* the `validationErrors` field.
|
|
128
128
|
*/
|
|
129
129
|
export interface Testcase {
|
|
130
130
|
/**
|
|
@@ -132,6 +132,12 @@ export interface Testcase {
|
|
|
132
132
|
*/
|
|
133
133
|
id: string;
|
|
134
134
|
|
|
135
|
+
/**
|
|
136
|
+
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
137
|
+
* marked as expected outputs, including those with validation errors.
|
|
138
|
+
*/
|
|
139
|
+
expected: Record<string, unknown>;
|
|
140
|
+
|
|
135
141
|
/**
|
|
136
142
|
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
137
143
|
* marked as inputs, including those with validation errors.
|
|
@@ -143,12 +149,6 @@ export interface Testcase {
|
|
|
143
149
|
*/
|
|
144
150
|
jsonData: Record<string, unknown>;
|
|
145
151
|
|
|
146
|
-
/**
|
|
147
|
-
* Derived from data based on the Testset's fieldMapping. Contains all fields
|
|
148
|
-
* marked as labels, including those with validation errors.
|
|
149
|
-
*/
|
|
150
|
-
labels: Record<string, unknown>;
|
|
151
|
-
|
|
152
152
|
/**
|
|
153
153
|
* The ID of the Testset this Testcase belongs to.
|
|
154
154
|
*/
|
|
@@ -194,14 +194,6 @@ export interface TestcaseCreateParams {
|
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
export namespace TestcaseCreateParams {
|
|
197
|
-
/**
|
|
198
|
-
* A test case in the Scorecard system. Contains JSON data that is validated
|
|
199
|
-
* against the schema defined by its Testset. The `inputs` and `labels` fields are
|
|
200
|
-
* derived from the `data` field based on the Testset's `fieldMapping`, and include
|
|
201
|
-
* all mapped fields, including those with validation errors. Testcases are stored
|
|
202
|
-
* regardless of validation results, with any validation errors included in the
|
|
203
|
-
* `validationErrors` field.
|
|
204
|
-
*/
|
|
205
197
|
export interface Item {
|
|
206
198
|
/**
|
|
207
199
|
* The JSON data of the Testcase, which is validated against the Testset's schema.
|
|
@@ -17,7 +17,7 @@ export class Testsets extends APIResource {
|
|
|
17
17
|
* description: 'Testset for long context Q&A chatbot.',
|
|
18
18
|
* fieldMapping: {
|
|
19
19
|
* inputs: ['question'],
|
|
20
|
-
*
|
|
20
|
+
* expected: ['idealAnswer'],
|
|
21
21
|
* metadata: [],
|
|
22
22
|
* },
|
|
23
23
|
* jsonSchema: {
|
|
@@ -119,9 +119,9 @@ export type TestsetsPaginatedResponse = PaginatedResponse<Testset>;
|
|
|
119
119
|
/**
|
|
120
120
|
* A collection of Testcases that share the same schema. Each Testset defines the
|
|
121
121
|
* structure of its Testcases through a JSON schema. The `fieldMapping` object maps
|
|
122
|
-
* top-level keys of the Testcase schema to their roles (input/
|
|
123
|
-
* mentioned in the `fieldMapping` during creation or update are treated
|
|
124
|
-
* metadata.
|
|
122
|
+
* top-level keys of the Testcase schema to their roles (input/expected output).
|
|
123
|
+
* Fields not mentioned in the `fieldMapping` during creation or update are treated
|
|
124
|
+
* as metadata.
|
|
125
125
|
*
|
|
126
126
|
* ## JSON Schema validation constraints supported:
|
|
127
127
|
*
|
|
@@ -154,8 +154,8 @@ export interface Testset {
|
|
|
154
154
|
description: string;
|
|
155
155
|
|
|
156
156
|
/**
|
|
157
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
158
|
-
* Unmapped fields are treated as metadata.
|
|
157
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
158
|
+
* output). Unmapped fields are treated as metadata.
|
|
159
159
|
*/
|
|
160
160
|
fieldMapping: Testset.FieldMapping;
|
|
161
161
|
|
|
@@ -172,22 +172,22 @@ export interface Testset {
|
|
|
172
172
|
|
|
173
173
|
export namespace Testset {
|
|
174
174
|
/**
|
|
175
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
176
|
-
* Unmapped fields are treated as metadata.
|
|
175
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
176
|
+
* output). Unmapped fields are treated as metadata.
|
|
177
177
|
*/
|
|
178
178
|
export interface FieldMapping {
|
|
179
179
|
/**
|
|
180
|
-
* Fields that represent
|
|
180
|
+
* Fields that represent expected outputs.
|
|
181
181
|
*/
|
|
182
|
-
|
|
182
|
+
expected: Array<string>;
|
|
183
183
|
|
|
184
184
|
/**
|
|
185
|
-
* Fields that represent
|
|
185
|
+
* Fields that represent inputs to the AI system.
|
|
186
186
|
*/
|
|
187
|
-
|
|
187
|
+
inputs: Array<string>;
|
|
188
188
|
|
|
189
189
|
/**
|
|
190
|
-
* Fields that are not inputs or
|
|
190
|
+
* Fields that are not inputs or expected outputs.
|
|
191
191
|
*/
|
|
192
192
|
metadata: Array<string>;
|
|
193
193
|
}
|
|
@@ -207,8 +207,8 @@ export interface TestsetCreateParams {
|
|
|
207
207
|
description: string;
|
|
208
208
|
|
|
209
209
|
/**
|
|
210
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
211
|
-
* Unmapped fields are treated as metadata.
|
|
210
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
211
|
+
* output). Unmapped fields are treated as metadata.
|
|
212
212
|
*/
|
|
213
213
|
fieldMapping: TestsetCreateParams.FieldMapping;
|
|
214
214
|
|
|
@@ -225,22 +225,22 @@ export interface TestsetCreateParams {
|
|
|
225
225
|
|
|
226
226
|
export namespace TestsetCreateParams {
|
|
227
227
|
/**
|
|
228
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
229
|
-
* Unmapped fields are treated as metadata.
|
|
228
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
229
|
+
* output). Unmapped fields are treated as metadata.
|
|
230
230
|
*/
|
|
231
231
|
export interface FieldMapping {
|
|
232
232
|
/**
|
|
233
|
-
* Fields that represent
|
|
233
|
+
* Fields that represent expected outputs.
|
|
234
234
|
*/
|
|
235
|
-
|
|
235
|
+
expected: Array<string>;
|
|
236
236
|
|
|
237
237
|
/**
|
|
238
|
-
* Fields that represent
|
|
238
|
+
* Fields that represent inputs to the AI system.
|
|
239
239
|
*/
|
|
240
|
-
|
|
240
|
+
inputs: Array<string>;
|
|
241
241
|
|
|
242
242
|
/**
|
|
243
|
-
* Fields that are not inputs or
|
|
243
|
+
* Fields that are not inputs or expected outputs.
|
|
244
244
|
*/
|
|
245
245
|
metadata: Array<string>;
|
|
246
246
|
}
|
|
@@ -253,8 +253,8 @@ export interface TestsetUpdateParams {
|
|
|
253
253
|
description?: string;
|
|
254
254
|
|
|
255
255
|
/**
|
|
256
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
257
|
-
* Unmapped fields are treated as metadata.
|
|
256
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
257
|
+
* output). Unmapped fields are treated as metadata.
|
|
258
258
|
*/
|
|
259
259
|
fieldMapping?: TestsetUpdateParams.FieldMapping;
|
|
260
260
|
|
|
@@ -271,22 +271,22 @@ export interface TestsetUpdateParams {
|
|
|
271
271
|
|
|
272
272
|
export namespace TestsetUpdateParams {
|
|
273
273
|
/**
|
|
274
|
-
* Maps top-level keys of the Testcase schema to their roles (input/
|
|
275
|
-
* Unmapped fields are treated as metadata.
|
|
274
|
+
* Maps top-level keys of the Testcase schema to their roles (input/expected
|
|
275
|
+
* output). Unmapped fields are treated as metadata.
|
|
276
276
|
*/
|
|
277
277
|
export interface FieldMapping {
|
|
278
278
|
/**
|
|
279
|
-
* Fields that represent
|
|
279
|
+
* Fields that represent expected outputs.
|
|
280
280
|
*/
|
|
281
|
-
|
|
281
|
+
expected: Array<string>;
|
|
282
282
|
|
|
283
283
|
/**
|
|
284
|
-
* Fields that represent
|
|
284
|
+
* Fields that represent inputs to the AI system.
|
|
285
285
|
*/
|
|
286
|
-
|
|
286
|
+
inputs: Array<string>;
|
|
287
287
|
|
|
288
288
|
/**
|
|
289
|
-
* Fields that are not inputs or
|
|
289
|
+
* Fields that are not inputs or expected outputs.
|
|
290
290
|
*/
|
|
291
291
|
metadata: Array<string>;
|
|
292
292
|
}
|
package/src/version.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '1.0.0-alpha.
|
|
1
|
+
export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
|
package/version.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.0.0-alpha.
|
|
1
|
+
export declare const VERSION = "1.0.0-alpha.9";
|
|
2
2
|
//# sourceMappingURL=version.d.mts.map
|