scorecard-ai 1.0.0-alpha.8 → 1.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +25 -37
  3. package/client.d.mts +3 -2
  4. package/client.d.mts.map +1 -1
  5. package/client.d.ts +3 -2
  6. package/client.d.ts.map +1 -1
  7. package/client.js +15 -0
  8. package/client.js.map +1 -1
  9. package/client.mjs +15 -0
  10. package/client.mjs.map +1 -1
  11. package/core/pagination.d.mts +1 -1
  12. package/core/pagination.d.mts.map +1 -1
  13. package/core/pagination.d.ts +1 -1
  14. package/core/pagination.d.ts.map +1 -1
  15. package/index.d.mts +1 -0
  16. package/index.d.mts.map +1 -1
  17. package/index.d.ts +1 -0
  18. package/index.d.ts.map +1 -1
  19. package/index.js +3 -1
  20. package/index.js.map +1 -1
  21. package/index.mjs +1 -0
  22. package/index.mjs.map +1 -1
  23. package/internal/detect-platform.js +3 -3
  24. package/internal/detect-platform.js.map +1 -1
  25. package/internal/detect-platform.mjs +3 -3
  26. package/internal/detect-platform.mjs.map +1 -1
  27. package/internal/shim-types.d.mts +11 -22
  28. package/internal/shim-types.d.mts.map +1 -0
  29. package/internal/shim-types.d.ts +11 -22
  30. package/internal/shim-types.d.ts.map +1 -0
  31. package/internal/shim-types.js +4 -0
  32. package/internal/shim-types.js.map +1 -0
  33. package/internal/shim-types.mjs +3 -0
  34. package/internal/shim-types.mjs.map +1 -0
  35. package/internal/shims.d.mts +2 -2
  36. package/internal/shims.d.mts.map +1 -1
  37. package/internal/shims.d.ts +2 -2
  38. package/internal/shims.d.ts.map +1 -1
  39. package/internal/uploads.js.map +1 -1
  40. package/internal/uploads.mjs.map +1 -1
  41. package/lib/runAndEvaluate.d.mts +49 -9
  42. package/lib/runAndEvaluate.d.mts.map +1 -1
  43. package/lib/runAndEvaluate.d.ts +49 -9
  44. package/lib/runAndEvaluate.d.ts.map +1 -1
  45. package/lib/runAndEvaluate.js +62 -19
  46. package/lib/runAndEvaluate.js.map +1 -1
  47. package/lib/runAndEvaluate.mjs +62 -19
  48. package/lib/runAndEvaluate.mjs.map +1 -1
  49. package/package.json +1 -4
  50. package/resources/index.d.mts +1 -1
  51. package/resources/index.d.mts.map +1 -1
  52. package/resources/index.d.ts +1 -1
  53. package/resources/index.d.ts.map +1 -1
  54. package/resources/index.js.map +1 -1
  55. package/resources/index.mjs.map +1 -1
  56. package/resources/runs.d.mts +6 -6
  57. package/resources/runs.d.mts.map +1 -1
  58. package/resources/runs.d.ts +6 -6
  59. package/resources/runs.d.ts.map +1 -1
  60. package/resources/runs.js +1 -1
  61. package/resources/runs.mjs +1 -1
  62. package/resources/system-configs.d.mts +2 -9
  63. package/resources/system-configs.d.mts.map +1 -1
  64. package/resources/system-configs.d.ts +2 -9
  65. package/resources/system-configs.d.ts.map +1 -1
  66. package/resources/system-configs.js +2 -4
  67. package/resources/system-configs.js.map +1 -1
  68. package/resources/system-configs.mjs +2 -4
  69. package/resources/system-configs.mjs.map +1 -1
  70. package/src/client.ts +14 -2
  71. package/src/core/pagination.ts +1 -1
  72. package/src/index.ts +2 -0
  73. package/src/internal/detect-platform.ts +3 -3
  74. package/src/internal/shim-types.ts +26 -0
  75. package/src/internal/shims.ts +2 -2
  76. package/src/internal/uploads.ts +1 -1
  77. package/src/lib/runAndEvaluate.ts +133 -30
  78. package/src/resources/index.ts +0 -1
  79. package/src/resources/runs.ts +6 -6
  80. package/src/resources/system-configs.ts +2 -16
  81. package/src/version.ts +1 -1
  82. package/version.d.mts +1 -1
  83. package/version.d.ts +1 -1
  84. package/version.js +1 -1
  85. package/version.mjs +1 -1
  86. package/src/internal/shim-types.d.ts +0 -28
@@ -1,49 +1,152 @@
1
1
  import { Scorecard } from '../client';
2
+ import { SystemConfig, Testcase } from '../resources';
3
+
4
+ type RunAndEvaluateArgs<SystemInput extends Record<string, any>, SystemOutput extends Record<string, any>> =
5
+ // Project and metrics are always required
6
+ {
7
+ /**
8
+ * The ID of the Project to run the system on.
9
+ */
10
+ projectId: string;
11
+
12
+ /**
13
+ * The IDs of the Metrics to use for evaluation.
14
+ */
15
+ metricIds: Array<string>;
16
+ } & (
17
+ | // If system config is provided, the system function receives a system config
18
+ {
19
+ /**
20
+ * The ID of the System Configuration to use for the run.
21
+ */
22
+ systemConfigId: string;
23
+
24
+ /**
25
+ * The system function to run on the Testset.
26
+ */
27
+ system: (testcaseInput: SystemInput, systemConfig: SystemConfig) => Promise<SystemOutput>;
28
+ }
29
+ // Otherwise, the system function receives only the testcase input
30
+ | {
31
+ /**
32
+ * The system function to run on the Testset.
33
+ */
34
+ system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
35
+ }
36
+ ) &
37
+ // If testset is not provided, you must pass in all the testcases manually
38
+ (| {
39
+ /**
40
+ * The ID of the Scorecard Testset to run the system on.
41
+ */
42
+ testsetId: string;
43
+ }
44
+ | {
45
+ /**
46
+ * The list of test cases to run the system on. Can be a list of Scorecard Testcases or a list of inputs and expected outputs.
47
+ */
48
+ testcases:
49
+ | Array<{
50
+ inputs: SystemInput;
51
+ expected: Record<string, unknown>;
52
+ }>
53
+ | Array<Testcase>;
54
+ }
55
+ );
56
+
57
+ /**
58
+ * Returns an async generator over the given Testset or Testcases.
59
+ */
60
+ async function* testcaseIterator<SystemInput extends Record<string, any>>(
61
+ scorecard: Scorecard,
62
+ args: RunAndEvaluateArgs<SystemInput, any>,
63
+ ): AsyncGenerator<{
64
+ testcaseId: string | null;
65
+ inputs: SystemInput;
66
+ expected: Record<string, unknown>;
67
+ }> {
68
+ if ('testsetId' in args) {
69
+ for await (const testcase of scorecard.testcases.list(args.testsetId)) {
70
+ yield {
71
+ ...testcase,
72
+ testcaseId: testcase.id,
73
+ inputs: testcase.inputs as SystemInput,
74
+ };
75
+ }
76
+ } else {
77
+ for (const testcase of args.testcases) {
78
+ yield {
79
+ ...testcase,
80
+ testcaseId: 'id' in testcase ? testcase.id : null,
81
+ inputs: testcase.inputs as SystemInput,
82
+ };
83
+ }
84
+ }
85
+ }
2
86
 
3
87
  /**
4
88
  * Runs a system on a Testset and records the results in Scorecard.
5
89
  *
6
90
  * @param scorecard The Scorecard client
7
- * @param projectId The ID of the Project to run the system on.
8
- * @param testsetId The ID of the Testset to run the system on.
9
- * @param metricIds The IDs of the Metrics to use for evaluation.
10
- * @param system The system to run on the Testset.
91
+ * @param args.projectId The ID of the Project to run the system on.
92
+ * @param args.testsetId The optional ID of the Testset to run the system on. Either this or `args.testcases` must be provided.
93
+ * @param args.testcases The optional list of Testcases to run the system on. Either this or `args.testsetId` must be provided.
94
+ * @param args.metricIds The IDs of the Metrics to use for evaluation.
95
+ * @param args.systemConfigId The optional ID of the System Configuration to associate with the Run.
96
+ * @param args.system The system to run on the Testset.
97
+ * @param options.runInParallel Whether to call `args.system` in parallel. False (sequential) by default.
11
98
  */
12
- export async function runAndEvaluate<SystemInput extends Object, SystemOutput extends Object>(
99
+ export async function runAndEvaluate<
100
+ SystemInput extends Record<string, any>,
101
+ SystemOutput extends Record<string, any>,
102
+ >(
13
103
  scorecard: Scorecard,
14
- {
15
- projectId,
16
- testsetId,
17
- metricIds,
18
- system,
19
- }: {
20
- projectId: string;
21
- testsetId: string;
22
- metricIds: Array<string>;
23
- system: (testcaseInput: SystemInput) => Promise<SystemOutput>;
104
+ args: RunAndEvaluateArgs<SystemInput, SystemOutput>,
105
+ options: {
106
+ runInParallel: boolean;
107
+ } = {
108
+ runInParallel: false,
24
109
  },
25
110
  ): Promise<Pick<Scorecard.Runs.Run, 'id'> & { url: string }> {
26
- const run = await scorecard.runs.create(projectId, {
27
- testsetId,
28
- metricIds,
111
+ const hasSystemConfig = 'systemConfigId' in args;
112
+ const hasTestset = 'testsetId' in args;
113
+
114
+ const runPromise = scorecard.runs.create(args.projectId, {
115
+ testsetId: hasTestset ? args.testsetId : null,
116
+ metricIds: args.metricIds,
117
+ ...(hasSystemConfig ?
118
+ {
119
+ systemConfigId: args.systemConfigId,
120
+ }
121
+ : null),
29
122
  });
123
+ const systemConfig = hasSystemConfig ? await scorecard.systemConfigs.get(args.systemConfigId) : null;
124
+ const run = await runPromise;
125
+
126
+ const recordPromises: Array<Promise<unknown>> = [];
127
+
128
+ for await (const { testcaseId, inputs, expected } of testcaseIterator(scorecard, args)) {
129
+ const modelResponsePromise = hasSystemConfig ? args.system(inputs, systemConfig!) : args.system(inputs);
130
+
131
+ function createRecord(outputs: SystemOutput): Promise<unknown> {
132
+ return scorecard.records.create(run.id, {
133
+ inputs,
134
+ expected,
135
+ outputs,
136
+ ...(testcaseId != null ? { testcaseId } : null),
137
+ });
138
+ }
30
139
 
31
- // Run each Testcase sequentially
32
- const recordPromises: Array<Promise<any>> = [];
33
- for await (const testcase of scorecard.testcases.list(run.testsetId)) {
34
- const modelResponse = await system(testcase.inputs as SystemInput);
35
- const promise = scorecard.records.create(run.id, {
36
- testcaseId: testcase.id,
37
- inputs: testcase.inputs,
38
- expected: testcase.expected,
39
- outputs: modelResponse as Record<string, unknown>,
40
- });
41
- recordPromises.push(promise);
140
+ if (options.runInParallel) {
141
+ recordPromises.push(modelResponsePromise.then(createRecord));
142
+ } else {
143
+ recordPromises.push(createRecord(await modelResponsePromise));
144
+ }
42
145
  }
43
146
  // Wait until all the Records are created
44
147
  await Promise.all(recordPromises);
45
148
 
46
- const runUrl = `https://app.getscorecard.ai/projects/${projectId}/runs/grades/${run.id}`;
149
+ const runUrl = `${scorecard.baseAppURL}/projects/${args.projectId}/runs/${run.id}`;
47
150
 
48
151
  return { id: run.id, url: runUrl };
49
152
  }
@@ -16,7 +16,6 @@ export {
16
16
  type SystemConfig,
17
17
  type SystemConfigCreateParams,
18
18
  type SystemConfigListParams,
19
- type SystemConfigGetParams,
20
19
  type SystemConfigsPaginatedResponse,
21
20
  } from './system-configs';
22
21
  export {
@@ -13,8 +13,8 @@ export class Runs extends APIResource {
13
13
  * ```ts
14
14
  * const run = await client.runs.create('314', {
15
15
  * metricIds: ['789', '101'],
16
- * testsetId: '246',
17
16
  * systemConfigId: '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
17
+ * testsetId: '246',
18
18
  * });
19
19
  * ```
20
20
  */
@@ -52,7 +52,7 @@ export interface Run {
52
52
  /**
53
53
  * The ID of the Testset this Run is testing.
54
54
  */
55
- testsetId: string;
55
+ testsetId: string | null;
56
56
 
57
57
  /**
58
58
  * The ID of the system configuration this Run is using.
@@ -67,14 +67,14 @@ export interface RunCreateParams {
67
67
  metricIds: Array<string>;
68
68
 
69
69
  /**
70
- * The ID of the Testset this Run is testing.
70
+ * The ID of the system configuration this Run is using.
71
71
  */
72
- testsetId: string;
72
+ systemConfigId?: string;
73
73
 
74
74
  /**
75
- * The ID of the system configuration this Run is using.
75
+ * The ID of the Testset this Run is testing.
76
76
  */
77
- systemConfigId?: string;
77
+ testsetId?: string | null;
78
78
  }
79
79
 
80
80
  export declare namespace Runs {
@@ -81,17 +81,11 @@ export class SystemConfigs extends APIResource {
81
81
  * ```ts
82
82
  * const systemConfig = await client.systemConfigs.get(
83
83
  * '87654321-4d3b-4ae4-8c7a-4b6e2a19ccf0',
84
- * { systemId: '12345678-0a8b-4f66-b6f3-2ddcfa097257' },
85
84
  * );
86
85
  * ```
87
86
  */
88
- get(
89
- systemConfigID: string,
90
- params: SystemConfigGetParams,
91
- options?: RequestOptions,
92
- ): APIPromise<SystemConfig> {
93
- const { systemId } = params;
94
- return this._client.get(path`/systems/${systemId}/configs/${systemConfigID}`, options);
87
+ get(systemConfigID: string, options?: RequestOptions): APIPromise<SystemConfig> {
88
+ return this._client.get(path`/systems/configs/${systemConfigID}`, options);
95
89
  }
96
90
  }
97
91
 
@@ -185,19 +179,11 @@ export namespace SystemConfigCreateParams {
185
179
 
186
180
  export interface SystemConfigListParams extends PaginatedResponseParams {}
187
181
 
188
- export interface SystemConfigGetParams {
189
- /**
190
- * The ID of the system the configuration belongs to.
191
- */
192
- systemId: string;
193
- }
194
-
195
182
  export declare namespace SystemConfigs {
196
183
  export {
197
184
  type SystemConfig as SystemConfig,
198
185
  type SystemConfigsPaginatedResponse as SystemConfigsPaginatedResponse,
199
186
  type SystemConfigCreateParams as SystemConfigCreateParams,
200
187
  type SystemConfigListParams as SystemConfigListParams,
201
- type SystemConfigGetParams as SystemConfigGetParams,
202
188
  };
203
189
  }
package/src/version.ts CHANGED
@@ -1 +1 @@
1
- export const VERSION = '1.0.0-alpha.8'; // x-release-please-version
1
+ export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
package/version.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "1.0.0-alpha.8";
1
+ export declare const VERSION = "1.0.0-alpha.9";
2
2
  //# sourceMappingURL=version.d.mts.map
package/version.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "1.0.0-alpha.8";
1
+ export declare const VERSION = "1.0.0-alpha.9";
2
2
  //# sourceMappingURL=version.d.ts.map
package/version.js CHANGED
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
- exports.VERSION = '1.0.0-alpha.8'; // x-release-please-version
4
+ exports.VERSION = '1.0.0-alpha.9'; // x-release-please-version
5
5
  //# sourceMappingURL=version.js.map
package/version.mjs CHANGED
@@ -1,2 +1,2 @@
1
- export const VERSION = '1.0.0-alpha.8'; // x-release-please-version
1
+ export const VERSION = '1.0.0-alpha.9'; // x-release-please-version
2
2
  //# sourceMappingURL=version.mjs.map
@@ -1,28 +0,0 @@
1
- // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- /**
4
- * Shims for types that we can't always rely on being available globally.
5
- *
6
- * Note: these only exist at the type-level, there is no corresponding runtime
7
- * version for any of these symbols.
8
- */
9
-
10
- /**
11
- * In order to properly access the global `NodeJS` type, if it's available, we
12
- * need to make use of declaration shadowing. Without this, any checks for the
13
- * presence of `NodeJS.ReadableStream` will fail.
14
- */
15
- declare namespace NodeJS {
16
- interface ReadableStream {}
17
- }
18
-
19
- type HasProperties<T> = keyof T extends never ? false : true;
20
-
21
- // @ts-ignore
22
- type _ReadableStream<R = any> =
23
- // @ts-ignore
24
- HasProperties<NodeJS.ReadableStream> extends true ? NodeJS.ReadableStream<R> : ReadableStream<R>;
25
-
26
- // @ts-ignore
27
- declare const _ReadableStream: unknown extends typeof ReadableStream ? never : typeof ReadableStream;
28
- export { _ReadableStream as ReadableStream };