scorecard-ai 1.0.0-alpha.7 → 1.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +25 -37
  3. package/client.d.mts +5 -4
  4. package/client.d.mts.map +1 -1
  5. package/client.d.ts +5 -4
  6. package/client.d.ts.map +1 -1
  7. package/client.js +15 -0
  8. package/client.js.map +1 -1
  9. package/client.mjs +15 -0
  10. package/client.mjs.map +1 -1
  11. package/core/pagination.d.mts +1 -1
  12. package/core/pagination.d.mts.map +1 -1
  13. package/core/pagination.d.ts +1 -1
  14. package/core/pagination.d.ts.map +1 -1
  15. package/index.d.mts +1 -0
  16. package/index.d.mts.map +1 -1
  17. package/index.d.ts +1 -0
  18. package/index.d.ts.map +1 -1
  19. package/index.js +3 -1
  20. package/index.js.map +1 -1
  21. package/index.mjs +1 -0
  22. package/index.mjs.map +1 -1
  23. package/internal/detect-platform.js +3 -3
  24. package/internal/detect-platform.js.map +1 -1
  25. package/internal/detect-platform.mjs +3 -3
  26. package/internal/detect-platform.mjs.map +1 -1
  27. package/internal/shim-types.d.mts +11 -22
  28. package/internal/shim-types.d.mts.map +1 -0
  29. package/internal/shim-types.d.ts +11 -22
  30. package/internal/shim-types.d.ts.map +1 -0
  31. package/internal/shim-types.js +4 -0
  32. package/internal/shim-types.js.map +1 -0
  33. package/internal/shim-types.mjs +3 -0
  34. package/internal/shim-types.mjs.map +1 -0
  35. package/internal/shims.d.mts +2 -2
  36. package/internal/shims.d.mts.map +1 -1
  37. package/internal/shims.d.ts +2 -2
  38. package/internal/shims.d.ts.map +1 -1
  39. package/internal/uploads.js.map +1 -1
  40. package/internal/uploads.mjs.map +1 -1
  41. package/lib/runAndEvaluate.d.mts +49 -9
  42. package/lib/runAndEvaluate.d.mts.map +1 -1
  43. package/lib/runAndEvaluate.d.ts +49 -9
  44. package/lib/runAndEvaluate.d.ts.map +1 -1
  45. package/lib/runAndEvaluate.js +62 -23
  46. package/lib/runAndEvaluate.js.map +1 -1
  47. package/lib/runAndEvaluate.mjs +62 -23
  48. package/lib/runAndEvaluate.mjs.map +1 -1
  49. package/package.json +1 -4
  50. package/resources/index.d.mts +2 -2
  51. package/resources/index.d.mts.map +1 -1
  52. package/resources/index.d.ts +2 -2
  53. package/resources/index.d.ts.map +1 -1
  54. package/resources/index.js.map +1 -1
  55. package/resources/index.mjs.map +1 -1
  56. package/resources/records.d.mts +11 -9
  57. package/resources/records.d.mts.map +1 -1
  58. package/resources/records.d.ts +11 -9
  59. package/resources/records.d.ts.map +1 -1
  60. package/resources/records.js +3 -1
  61. package/resources/records.js.map +1 -1
  62. package/resources/records.mjs +3 -1
  63. package/resources/records.mjs.map +1 -1
  64. package/resources/runs.d.mts +5 -32
  65. package/resources/runs.d.mts.map +1 -1
  66. package/resources/runs.d.ts +5 -32
  67. package/resources/runs.d.ts.map +1 -1
  68. package/resources/runs.js +1 -14
  69. package/resources/runs.js.map +1 -1
  70. package/resources/runs.mjs +1 -14
  71. package/resources/runs.mjs.map +1 -1
  72. package/resources/system-configs.d.mts +2 -9
  73. package/resources/system-configs.d.mts.map +1 -1
  74. package/resources/system-configs.d.ts +2 -9
  75. package/resources/system-configs.d.ts.map +1 -1
  76. package/resources/system-configs.js +2 -4
  77. package/resources/system-configs.js.map +1 -1
  78. package/resources/system-configs.mjs +2 -4
  79. package/resources/system-configs.mjs.map +1 -1
  80. package/resources/testcases.d.mts +10 -18
  81. package/resources/testcases.d.mts.map +1 -1
  82. package/resources/testcases.d.ts +10 -18
  83. package/resources/testcases.d.ts.map +1 -1
  84. package/resources/testsets.d.mts +31 -31
  85. package/resources/testsets.d.mts.map +1 -1
  86. package/resources/testsets.d.ts +31 -31
  87. package/resources/testsets.d.ts.map +1 -1
  88. package/resources/testsets.js +1 -1
  89. package/resources/testsets.mjs +1 -1
  90. package/src/client.ts +16 -10
  91. package/src/core/pagination.ts +1 -1
  92. package/src/index.ts +2 -0
  93. package/src/internal/detect-platform.ts +3 -3
  94. package/src/internal/shim-types.ts +26 -0
  95. package/src/internal/shims.ts +2 -2
  96. package/src/internal/uploads.ts +1 -1
  97. package/src/lib/runAndEvaluate.ts +133 -35
  98. package/src/resources/index.ts +1 -2
  99. package/src/resources/records.ts +13 -11
  100. package/src/resources/runs.ts +5 -57
  101. package/src/resources/system-configs.ts +2 -16
  102. package/src/resources/testcases.ts +11 -19
  103. package/src/resources/testsets.ts +31 -31
  104. package/src/version.ts +1 -1
  105. package/version.d.mts +1 -1
  106. package/version.d.ts +1 -1
  107. package/version.js +1 -1
  108. package/version.mjs +1 -1
  109. package/src/internal/shim-types.d.ts +0 -28
@@ -85,10 +85,10 @@ const getPlatformProperties = (): PlatformProperties => {
85
85
  return {
86
86
  'X-Stainless-Lang': 'js',
87
87
  'X-Stainless-Package-Version': VERSION,
88
- 'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform),
89
- 'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch),
88
+ 'X-Stainless-OS': normalizePlatform((globalThis as any).process.platform ?? 'unknown'),
89
+ 'X-Stainless-Arch': normalizeArch((globalThis as any).process.arch ?? 'unknown'),
90
90
  'X-Stainless-Runtime': 'node',
91
- 'X-Stainless-Runtime-Version': (globalThis as any).process.version,
91
+ 'X-Stainless-Runtime-Version': (globalThis as any).process.version ?? 'unknown',
92
92
  };
93
93
  }
94
94
 
@@ -0,0 +1,26 @@
1
+ // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ /**
4
+ * Shims for types that we can't always rely on being available globally.
5
+ *
6
+ * Note: these only exist at the type-level, there is no corresponding runtime
7
+ * version for any of these symbols.
8
+ */
9
+
10
+ type NeverToAny<T> = T extends never ? any : T;
11
+
12
+ /** @ts-ignore */
13
+ type _DOMReadableStream<R = any> = globalThis.ReadableStream<R>;
14
+
15
+ /** @ts-ignore */
16
+ type _NodeReadableStream<R = any> = import('stream/web').ReadableStream<R>;
17
+
18
+ type _ConditionalNodeReadableStream<R = any> =
19
+ typeof globalThis extends { ReadableStream: any } ? never : _NodeReadableStream<R>;
20
+
21
+ type _ReadableStream<R = any> = NeverToAny<
22
+ | ([0] extends [1 & _DOMReadableStream<R>] ? never : _DOMReadableStream<R>)
23
+ | ([0] extends [1 & _ConditionalNodeReadableStream<R>] ? never : _ConditionalNodeReadableStream<R>)
24
+ >;
25
+
26
+ export type { _ReadableStream as ReadableStream };
@@ -7,8 +7,8 @@
7
7
  * messages in cases where an environment isn't fully supported.
8
8
  */
9
9
 
10
- import { type Fetch } from './builtin-types';
11
- import { type ReadableStream } from './shim-types';
10
+ import type { Fetch } from './builtin-types';
11
+ import type { ReadableStream } from './shim-types';
12
12
 
13
13
  export function getDefaultFetch(): Fetch {
14
14
  if (typeof fetch !== 'undefined') {
@@ -138,7 +138,7 @@ export const createForm = async <T = Record<string, unknown>>(
138
138
 
139
139
  // We check for Blob not File because Bun.File doesn't inherit from File,
140
140
  // but they both inherit from Blob and have a `name` property at runtime.
141
- const isNamedBlob = (value: object) => value instanceof Blob && 'name' in value;
141
+ const isNamedBlob = (value: unknown) => value instanceof Blob && 'name' in value;
142
142
 
143
143
  const isUploadable = (value: unknown) =>
144
144
  typeof value === 'object' &&
@@ -1,54 +1,152 @@
1
1
  import { Scorecard } from '../client';
2
+ import { SystemConfig, Testcase } from '../resources';
3
+
4
+ type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
5
+ // Project and metrics are always required
6
+ {
7
+ /**
8
+ * The ID of the Project to run the system on.
9
+ */
10
+ projectId: string;
11
+
12
+ /**
13
+ * The IDs of the Metrics to use for evaluation.
14
+ */
15
+ metricIds: Array<string>;
16
+ } & (
17
+ | // If system config is provided, the system function receives a system config
18
+ {
19
+ /**
20
+ * The ID of the System Configuration to use for the run.
21
+ */
22
+ systemConfigId: string;
23
+
24
+ /**
25
+ * The system function to run on the Testset.
26
+ */
27
+ system: (testcaseInput: SystemInput, systemConfig: SystemConfig) => Promise<SystemOutput>;
28
+ }
29
+ // Otherwise, the system function receives only the testcase input
30
+ | {
31
+ /**
32
+ * The system function to run on the Testset.
33
+ */
34
+ system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
35
+ }
36
+ ) &
37
+ // If testset is not provided, you must pass in all the testcases manually
38
+ (| {
39
+ /**
40
+ * The ID of the Scorecard Testset to run the system on.
41
+ */
42
+ testsetId: string;
43
+ }
44
+ | {
45
+ /**
46
+ * The list of test cases to run the system on. Can be a list of Scorecard Testcases or a list of inputs and expected outputs.
47
+ */
48
+ testcases:
49
+ | Array<{
50
+ inputs: SystemInput;
51
+ expected: Record<string, unknown>;
52
+ }>
53
+ | Array<Testcase>;
54
+ }
55
+ );
56
+
57
+ /**
58
+ * Returns an async generator over the given Testset or Testcases.
59
+ */
60
+ async function* testcaseIterator<SystemInput extends Record<string, any>>(
61
+ scorecard: Scorecard,
62
+ args: RunAndEvaluateArgs<SystemInput, any>,
63
+ ): AsyncGenerator<{
64
+ testcaseId: string | null;
65
+ inputs: SystemInput;
66
+ expected: Record<string, unknown>;
67
+ }> {
68
+ if ('testsetId' in args) {
69
+ for await (const testcase of scorecard.testcases.list(args.testsetId)) {
70
+ yield {
71
+ ...testcase,
72
+ testcaseId: testcase.id,
73
+ inputs: testcase.inputs as SystemInput,
74
+ };
75
+ }
76
+ } else {
77
+ for (const testcase of args.testcases) {
78
+ yield {
79
+ ...testcase,
80
+ testcaseId: 'id' in testcase ? testcase.id : null,
81
+ inputs: testcase.inputs as SystemInput,
82
+ };
83
+ }
84
+ }
85
+ }
2
86
 
3
87
  /**
4
88
  * Runs a system on a Testset and records the results in Scorecard.
5
89
  *
6
90
  * @param scorecard The Scorecard client
7
- * @param projectId The ID of the Project to run the system on.
8
- * @param testsetId The ID of the Testset to run the system on.
9
- * @param metricIds The IDs of the Metrics to use for evaluation.
10
- * @param system The system to run on the Testset.
91
+ * @param args.projectId The ID of the Project to run the system on.
92
+ * @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
93
+ * @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
94
+ * @param args.metricIds The IDs of the Metrics to use for evaluation.
95
+ * @param args.systemConfigId The optional ID of the System Configuration to associate with the Run.
96
+ * @param args.system The system to run on the Testset.
97
+ * @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
11
98
  */
12
- export async function runAndEvaluate<SystemInput extends Object, SystemOutput extends Object>(
99
+ export async function runAndEvaluate<
100
+ SystemInput extends Record<string, any>,
101
+ SystemOutput extends Record<string, any>,
102
+ >(
13
103
  scorecard: Scorecard,
14
- {
15
- projectId,
16
- testsetId,
17
- metricIds,
18
- system,
19
- }: {
20
- projectId: string;
21
- testsetId: string;
22
- metricIds: Array<string>;
23
- system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
104
+ args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
105
+ options: {
106
+ runInParallel: boolean;
107
+ } = {
108
+ runInParallel: false,
24
109
  },
25
110
  ): Promise<Pick<Scorecard.Runs.Run, 'id'> & { url: string }> {
26
- const run = await scorecard.runs.create(projectId, {
27
- testsetId,
28
- metricIds,
111
+ const hasSystemConfig = 'systemConfigId' in args;
112
+ const hasTestset = 'testsetId' in args;
113
+
114
+ const runPromise = scorecard.runs.create(args.projectId, {
115
+ testsetId: hasTestset ? args.testsetId : null,
116
+ metricIds: args.metricIds,
117
+ ...(hasSystemConfig ?
118
+ {
119
+ systemConfigId: args.systemConfigId,
120
+ }
121
+ : null),
29
122
  });
123
+ const systemConfig = hasSystemConfig ? await scorecard.systemConfigs.get(args.systemConfigId) : null;
124
+ const run = await runPromise;
30
125
 
31
- // Run each Testcase sequentially
32
- const recordPromises: Array<Promise<any>> = [];
33
- for await (const testcase of scorecard.testcases.list(run.testsetId)) {
34
- const modelResponse = await system(testcase.inputs as SystemInput);
35
- const promise = scorecard.records.create(run.id, {
36
- testcaseId: testcase.id,
37
- inputs: testcase.inputs,
38
- labels: testcase.labels,
39
- outputs: modelResponse as Record<string, unknown>,
40
- });
41
- recordPromises.push(promise);
126
+ const recordPromises: Array<Promise<unknown>> = [];
127
+
128
+ for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
129
+ const modelResponsePromise = hasSystemConfig ? args.system(inputs, systemConfig!) : args.system(inputs);
130
+
131
+ function createRecord(outputs: SystemOutput): Promise<unknown> {
132
+ return scorecard.records.create(run.id, {
133
+ inputs,
134
+ expected,
135
+ outputs,
136
+ ...(testcaseId != null ? { testcaseId } : null),
137
+ });
138
+ }
139
+
140
+ if (options.runInParallel) {
141
+ recordPromises.push(modelResponsePromise.then(createRecord));
142
+ } else {
143
+ recordPromises.push(createRecord(await modelResponsePromise));
144
+ }
42
145
  }
43
146
  // Wait until all the Records are created
44
147
  await Promise.all(recordPromises);
45
148
 
46
- // Mark the Run as done with execution and ready for scoring.
47
- await scorecard.runs.update(run.id, {
48
- status: 'awaiting_scoring',
49
- });
50
-
51
- const runUrl = `https://app.getscorecard.ai/projects/${projectId}/runs/grades/${run.id}`;
149
+ const runUrl = `${scorecard.baseAppURL}/projects/${args.projectId}/runs/${run.id}`;
52
150
 
53
151
  return { id: run.id, url: runUrl };
54
152
  }
@@ -9,14 +9,13 @@ export {
9
9
  type ProjectsPaginatedResponse,
10
10
  } from './projects';
11
11
  export { Records, type Record, type RecordCreateParams } from './records';
12
- export { Runs, type Run, type RunUpdateResponse, type RunCreateParams, type RunUpdateParams } from './runs';
12
+ export { Runs, type Run, type RunCreateParams } from './runs';
13
13
  export { Scores, type Score, type ScoreUpsertParams } from './scores';
14
14
  export {
15
15
  SystemConfigs,
16
16
  type SystemConfig,
17
17
  type SystemConfigCreateParams,
18
18
  type SystemConfigListParams,
19
- type SystemConfigGetParams,
20
19
  type SystemConfigsPaginatedResponse,
21
20
  } from './system-configs';
22
21
  export {
@@ -13,8 +13,10 @@ export class Records extends APIResource {
13
13
  * @example
14
14
  * ```ts
15
15
  * const record = await client.records.create('135', {
16
+ * expected: {
17
+ * idealAnswer: 'Paris is the capital of France',
18
+ * },
16
19
  * inputs: { question: 'What is the capital of France?' },
17
- * labels: { idealAnswer: 'Paris is the capital of France' },
18
20
  * outputs: { response: 'The capital of France is Paris.' },
19
21
  * testcaseId: '248',
20
22
  * });
@@ -35,15 +37,15 @@ export interface Record {
35
37
  id: string;
36
38
 
37
39
  /**
38
- * The actual inputs sent to the system, which should match the system's input
39
- * schema.
40
+ * The expected outputs for the Testcase.
40
41
  */
41
- inputs: BuiltinRecord<string, unknown>;
42
+ expected: BuiltinRecord<string, unknown>;
42
43
 
43
44
  /**
44
- * The expected outputs for the Testcase.
45
+ * The actual inputs sent to the system, which should match the system's input
46
+ * schema.
45
47
  */
46
- labels: BuiltinRecord<string, unknown>;
48
+ inputs: BuiltinRecord<string, unknown>;
47
49
 
48
50
  /**
49
51
  * The actual outputs from the system.
@@ -63,15 +65,15 @@ export interface Record {
63
65
 
64
66
  export interface RecordCreateParams {
65
67
  /**
66
- * The actual inputs sent to the system, which should match the system's input
67
- * schema.
68
+ * The expected outputs for the Testcase.
68
69
  */
69
- inputs: BuiltinRecord<string, unknown>;
70
+ expected: BuiltinRecord<string, unknown>;
70
71
 
71
72
  /**
72
- * The expected outputs for the Testcase.
73
+ * The actual inputs sent to the system, which should match the system's input
74
+ * schema.
73
75
  */
74
- labels: BuiltinRecord<string, unknown>;
76
+ inputs: BuiltinRecord<string, unknown>;
75
77
 
76
78
  /**
77
79
  * The actual outputs from the system.
@@ -13,28 +13,14 @@ export class Runs extends APIResource {
13
13
  * ```ts
14
14
  * const run = await client.runs.create('314', {
15
15
  * metricIds: ['789', '101'],
16
- * testsetId: '246',
17
16
  * systemConfigId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
17
+ * testsetId: '246',
18
18
  * });
19
19
  * ```
20
20
  */
21
21
  create(projectID: string, body: RunCreateParams, options?: RequestOptions): APIPromise<Run> {
22
22
  return this._client.post(path`/projects/${projectID}/runs`, { body, ...options });
23
23
  }
24
-
25
- /**
26
- * Update the status of a Run.
27
- *
28
- * @example
29
- * ```ts
30
- * const run = await client.runs.update('135', {
31
- * status: 'awaiting_scoring',
32
- * });
33
- * ```
34
- */
35
- update(runID: string, body: RunUpdateParams, options?: RequestOptions): APIPromise<RunUpdateResponse> {
36
- return this._client.patch(path`/runs/${runID}`, { body, ...options });
37
- }
38
24
  }
39
25
 
40
26
  /**
@@ -66,7 +52,7 @@ export interface Run {
66
52
  /**
67
53
  * The ID of the Testset this Run is testing.
68
54
  */
69
- testsetId: string;
55
+ testsetId: string | null;
70
56
 
71
57
  /**
72
58
  * The ID of the system configuration this Run is using.
@@ -74,61 +60,23 @@ export interface Run {
74
60
  systemConfigId?: string;
75
61
  }
76
62
 
77
- export interface RunUpdateResponse {
78
- /**
79
- * The ID of the Run.
80
- */
81
- id: string;
82
-
83
- /**
84
- * The status of the Run.
85
- */
86
- status:
87
- | 'pending'
88
- | 'awaiting_execution'
89
- | 'running_execution'
90
- | 'awaiting_scoring'
91
- | 'running_scoring'
92
- | 'awaiting_human_scoring'
93
- | 'completed';
94
- }
95
-
96
63
  export interface RunCreateParams {
97
64
  /**
98
65
  * The IDs of the metrics this Run is using.
99
66
  */
100
67
  metricIds: Array<string>;
101
68
 
102
- /**
103
- * The ID of the Testset this Run is testing.
104
- */
105
- testsetId: string;
106
-
107
69
  /**
108
70
  * The ID of the system configuration this Run is using.
109
71
  */
110
72
  systemConfigId?: string;
111
- }
112
73
 
113
- export interface RunUpdateParams {
114
74
  /**
115
- * The status of the Run.
75
+ * The ID of the Testset this Run is testing.
116
76
  */
117
- status:
118
- | 'pending'
119
- | 'awaiting_execution'
120
- | 'running_execution'
121
- | 'awaiting_scoring'
122
- | 'running_scoring'
123
- | 'awaiting_human_scoring'
124
- | 'completed';
77
+ testsetId?: string | null;
125
78
  }
126
79
 
127
80
  export declare namespace Runs {
128
- export {
129
- type Run as Run,
130
- type RunUpdateResponse as RunUpdateResponse,
131
- type RunCreateParams as RunCreateParams,
132
- type RunUpdateParams as RunUpdateParams,
133
- };
81
+ export { type Run as Run, type RunCreateParams as RunCreateParams };
134
82
  }
@@ -81,17 +81,11 @@ export class SystemConfigs extends APIResource {
81
81
  * ```ts
82
82
  * const systemConfig = await client.systemConfigs.get(
83
83
  * '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
84
- * { systemId: '12345678-0a8b-4f66-b6f3-2ddcfa097257' },
85
84
  * );
86
85
  * ```
87
86
  */
88
- get(
89
- systemConfigID: string,
90
- params: SystemConfigGetParams,
91
- options?: RequestOptions,
92
- ): APIPromise<SystemConfig> {
93
- const { systemId } = params;
94
- return this._client.get(path`/systems/${systemId}/configs/${systemConfigID}`, options);
87
+ get(systemConfigID: string, options?: RequestOptions): APIPromise<SystemConfig> {
88
+ return this._client.get(path`/systems/configs/${systemConfigID}`, options);
95
89
  }
96
90
  }
97
91
 
@@ -185,19 +179,11 @@ export namespace SystemConfigCreateParams {
185
179
 
186
180
  export interface SystemConfigListParams extends PaginatedResponseParams {}
187
181
 
188
- export interface SystemConfigGetParams {
189
- /**
190
- * The ID of the system the configuration belongs to.
191
- */
192
- systemId: string;
193
- }
194
-
195
182
  export declare namespace SystemConfigs {
196
183
  export {
197
184
  type SystemConfig as SystemConfig,
198
185
  type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
199
186
  type SystemConfigCreateParams as SystemConfigCreateParams,
200
187
  type SystemConfigListParams as SystemConfigListParams,
201
- type SystemConfigGetParams as SystemConfigGetParams,
202
188
  };
203
189
  }
@@ -120,11 +120,11 @@ export type TestcasesPaginatedResponse = PaginatedResponse<Testcase>;
120
120
 
121
121
  /**
122
122
  * A test case in the Scorecard system. Contains JSON data that is validated
123
- * against the schema defined by its Testset. The `inputs` and `labels` fields are
124
- * derived from the `data` field based on the Testset's `fieldMapping`, and include
125
- * all mapped fields, including those with validation errors. Testcases are stored
126
- * regardless of validation results, with any validation errors included in the
127
- * `validationErrors` field.
123
+ * against the schema defined by its Testset. The `inputs` and `expected` fields
124
+ * are derived from the `data` field based on the Testset's `fieldMapping`, and
125
+ * include all mapped fields, including those with validation errors. Testcases are
126
+ * stored regardless of validation results, with any validation errors included in
127
+ * the `validationErrors` field.
128
128
  */
129
129
  export interface Testcase {
130
130
  /**
@@ -132,6 +132,12 @@ export interface Testcase {
132
132
  */
133
133
  id: string;
134
134
 
135
+ /**
136
+ * Derived from data based on the Testset's fieldMapping. Contains all fields
137
+ * marked as expected outputs, including those with validation errors.
138
+ */
139
+ expected: Record<string, unknown>;
140
+
135
141
  /**
136
142
  * Derived from data based on the Testset's fieldMapping. Contains all fields
137
143
  * marked as inputs, including those with validation errors.
@@ -143,12 +149,6 @@ export interface Testcase {
143
149
  */
144
150
  jsonData: Record<string, unknown>;
145
151
 
146
- /**
147
- * Derived from data based on the Testset's fieldMapping. Contains all fields
148
- * marked as labels, including those with validation errors.
149
- */
150
- labels: Record<string, unknown>;
151
-
152
152
  /**
153
153
  * The ID of the Testset this Testcase belongs to.
154
154
  */
@@ -194,14 +194,6 @@ export interface TestcaseCreateParams {
194
194
  }
195
195
 
196
196
  export namespace TestcaseCreateParams {
197
- /**
198
- * A test case in the Scorecard system. Contains JSON data that is validated
199
- * against the schema defined by its Testset. The `inputs` and `labels` fields are
200
- * derived from the `data` field based on the Testset's `fieldMapping`, and include
201
- * all mapped fields, including those with validation errors. Testcases are stored
202
- * regardless of validation results, with any validation errors included in the
203
- * `validationErrors` field.
204
- */
205
197
  export interface Item {
206
198
  /**
207
199
  * The JSON data of the Testcase, which is validated against the Testset's schema.
@@ -17,7 +17,7 @@ export class Testsets extends APIResource {
17
17
  * description: 'Testset for long context Q&A chatbot.',
18
18
  * fieldMapping: {
19
19
  * inputs: ['question'],
20
- * labels: ['idealAnswer'],
20
+ * expected: ['idealAnswer'],
21
21
  * metadata: [],
22
22
  * },
23
23
  * jsonSchema: {
@@ -119,9 +119,9 @@ export type TestsetsPaginatedResponse = PaginatedResponse<Testset>;
119
119
  /**
120
120
  * A collection of Testcases that share the same schema. Each Testset defines the
121
121
  * structure of its Testcases through a JSON schema. The `fieldMapping` object maps
122
- * top-level keys of the Testcase schema to their roles (input/label). Fields not
123
- * mentioned in the `fieldMapping` during creation or update are treated as
124
- * metadata.
122
+ * top-level keys of the Testcase schema to their roles (input/expected output).
123
+ * Fields not mentioned in the `fieldMapping` during creation or update are treated
124
+ * as metadata.
125
125
  *
126
126
  * ## JSON Schema validation constraints supported:
127
127
  *
@@ -154,8 +154,8 @@ export interface Testset {
154
154
  description: string;
155
155
 
156
156
  /**
157
- * Maps top-level keys of the Testcase schema to their roles (input/label).
158
- * Unmapped fields are treated as metadata.
157
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
158
+ * output). Unmapped fields are treated as metadata.
159
159
  */
160
160
  fieldMapping: Testset.FieldMapping;
161
161
 
@@ -172,22 +172,22 @@ export interface Testset {
172
172
 
173
173
  export namespace Testset {
174
174
  /**
175
- * Maps top-level keys of the Testcase schema to their roles (input/label).
176
- * Unmapped fields are treated as metadata.
175
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
176
+ * output). Unmapped fields are treated as metadata.
177
177
  */
178
178
  export interface FieldMapping {
179
179
  /**
180
- * Fields that represent inputs to the AI system.
180
+ * Fields that represent expected outputs.
181
181
  */
182
- inputs: Array<string>;
182
+ expected: Array<string>;
183
183
 
184
184
  /**
185
- * Fields that represent expected outputs/labels.
185
+ * Fields that represent inputs to the AI system.
186
186
  */
187
- labels: Array<string>;
187
+ inputs: Array<string>;
188
188
 
189
189
  /**
190
- * Fields that are not inputs or labels.
190
+ * Fields that are not inputs or expected outputs.
191
191
  */
192
192
  metadata: Array<string>;
193
193
  }
@@ -207,8 +207,8 @@ export interface TestsetCreateParams {
207
207
  description: string;
208
208
 
209
209
  /**
210
- * Maps top-level keys of the Testcase schema to their roles (input/label).
211
- * Unmapped fields are treated as metadata.
210
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
211
+ * output). Unmapped fields are treated as metadata.
212
212
  */
213
213
  fieldMapping: TestsetCreateParams.FieldMapping;
214
214
 
@@ -225,22 +225,22 @@ export interface TestsetCreateParams {
225
225
 
226
226
  export namespace TestsetCreateParams {
227
227
  /**
228
- * Maps top-level keys of the Testcase schema to their roles (input/label).
229
- * Unmapped fields are treated as metadata.
228
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
229
+ * output). Unmapped fields are treated as metadata.
230
230
  */
231
231
  export interface FieldMapping {
232
232
  /**
233
- * Fields that represent inputs to the AI system.
233
+ * Fields that represent expected outputs.
234
234
  */
235
- inputs: Array<string>;
235
+ expected: Array<string>;
236
236
 
237
237
  /**
238
- * Fields that represent expected outputs/labels.
238
+ * Fields that represent inputs to the AI system.
239
239
  */
240
- labels: Array<string>;
240
+ inputs: Array<string>;
241
241
 
242
242
  /**
243
- * Fields that are not inputs or labels.
243
+ * Fields that are not inputs or expected outputs.
244
244
  */
245
245
  metadata: Array<string>;
246
246
  }
@@ -253,8 +253,8 @@ export interface TestsetUpdateParams {
253
253
  description?: string;
254
254
 
255
255
  /**
256
- * Maps top-level keys of the Testcase schema to their roles (input/label).
257
- * Unmapped fields are treated as metadata.
256
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
257
+ * output). Unmapped fields are treated as metadata.
258
258
  */
259
259
  fieldMapping?: TestsetUpdateParams.FieldMapping;
260
260
 
@@ -271,22 +271,22 @@ export interface TestsetUpdateParams {
271
271
 
272
272
  export namespace TestsetUpdateParams {
273
273
  /**
274
- * Maps top-level keys of the Testcase schema to their roles (input/label).
275
- * Unmapped fields are treated as metadata.
274
+ * Maps top-level keys of the Testcase schema to their roles (input/expected
275
+ * output). Unmapped fields are treated as metadata.
276
276
  */
277
277
  export interface FieldMapping {
278
278
  /**
279
- * Fields that represent inputs to the AI system.
279
+ * Fields that represent expected outputs.
280
280
  */
281
- inputs: Array<string>;
281
+ expected: Array<string>;
282
282
 
283
283
  /**
284
- * Fields that represent expected outputs/labels.
284
+ * Fields that represent inputs to the AI system.
285
285
  */
286
- labels: Array<string>;
286
+ inputs: Array<string>;
287
287
 
288
288
  /**
289
- * Fields that are not inputs or labels.
289
+ * Fields that are not inputs or expected outputs.
290
290
  */
291
291
  metadata: Array<string>;
292
292
  }
package/src/version.ts CHANGED
@@ -1 +1 @@
1
- export const VERSION = '1.0.0-alpha.7'; // x-release-please-version
1
+ export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
package/version.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "1.0.0-alpha.7";
1
+ export declare const VERSION = "1.0.0-alpha.9";
2
2
  //# sourceMappingURL=version.d.mts.map