openai 4.96.2 → 4.98.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/README.md +1 -1
- package/index.d.mts +5 -2
- package/index.d.ts +5 -2
- package/index.d.ts.map +1 -1
- package/index.js +3 -0
- package/index.js.map +1 -1
- package/index.mjs +3 -0
- package/index.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/audio/speech.d.ts +13 -1
- package/resources/audio/speech.d.ts.map +1 -1
- package/resources/audio/speech.js +12 -0
- package/resources/audio/speech.js.map +1 -1
- package/resources/audio/speech.mjs +12 -0
- package/resources/audio/speech.mjs.map +1 -1
- package/resources/audio/transcriptions.d.ts +9 -0
- package/resources/audio/transcriptions.d.ts.map +1 -1
- package/resources/audio/transcriptions.js.map +1 -1
- package/resources/audio/transcriptions.mjs.map +1 -1
- package/resources/audio/translations.d.ts +8 -0
- package/resources/audio/translations.d.ts.map +1 -1
- package/resources/audio/translations.js.map +1 -1
- package/resources/audio/translations.mjs.map +1 -1
- package/resources/beta/assistants.d.ts +36 -0
- package/resources/beta/assistants.d.ts.map +1 -1
- package/resources/beta/assistants.js +28 -0
- package/resources/beta/assistants.js.map +1 -1
- package/resources/beta/assistants.mjs +28 -0
- package/resources/beta/assistants.mjs.map +1 -1
- package/resources/beta/realtime/sessions.d.ts +6 -0
- package/resources/beta/realtime/sessions.d.ts.map +1 -1
- package/resources/beta/realtime/sessions.js +6 -0
- package/resources/beta/realtime/sessions.js.map +1 -1
- package/resources/beta/realtime/sessions.mjs +6 -0
- package/resources/beta/realtime/sessions.mjs.map +1 -1
- package/resources/beta/realtime/transcription-sessions.d.ts +6 -0
- package/resources/beta/realtime/transcription-sessions.d.ts.map +1 -1
- package/resources/beta/realtime/transcription-sessions.js +6 -0
- package/resources/beta/realtime/transcription-sessions.js.map +1 -1
- package/resources/beta/realtime/transcription-sessions.mjs +6 -0
- package/resources/beta/realtime/transcription-sessions.mjs.map +1 -1
- package/resources/beta/threads/messages.d.ts +43 -0
- package/resources/beta/threads/messages.d.ts.map +1 -1
- package/resources/beta/threads/messages.js +33 -0
- package/resources/beta/threads/messages.js.map +1 -1
- package/resources/beta/threads/messages.mjs +33 -0
- package/resources/beta/threads/messages.mjs.map +1 -1
- package/resources/beta/threads/runs/runs.d.ts +52 -0
- package/resources/beta/threads/runs/runs.d.ts.map +1 -1
- package/resources/beta/threads/runs/runs.js +24 -0
- package/resources/beta/threads/runs/runs.js.map +1 -1
- package/resources/beta/threads/runs/runs.mjs +24 -0
- package/resources/beta/threads/runs/runs.mjs.map +1 -1
- package/resources/beta/threads/runs/steps.d.ts +21 -0
- package/resources/beta/threads/runs/steps.d.ts.map +1 -1
- package/resources/beta/threads/runs/steps.js.map +1 -1
- package/resources/beta/threads/runs/steps.mjs.map +1 -1
- package/resources/beta/threads/threads.d.ts +33 -0
- package/resources/beta/threads/threads.d.ts.map +1 -1
- package/resources/beta/threads/threads.js +21 -0
- package/resources/beta/threads/threads.js.map +1 -1
- package/resources/beta/threads/threads.mjs +21 -0
- package/resources/beta/threads/threads.mjs.map +1 -1
- package/resources/chat/completions/completions.d.ts +42 -4
- package/resources/chat/completions/completions.d.ts.map +1 -1
- package/resources/chat/completions/completions.js +20 -0
- package/resources/chat/completions/completions.js.map +1 -1
- package/resources/chat/completions/completions.mjs +20 -0
- package/resources/chat/completions/completions.mjs.map +1 -1
- package/resources/chat/completions/messages.d.ts +10 -0
- package/resources/chat/completions/messages.d.ts.map +1 -1
- package/resources/chat/completions/messages.js.map +1 -1
- package/resources/chat/completions/messages.mjs.map +1 -1
- package/resources/completions.d.ts +8 -0
- package/resources/completions.d.ts.map +1 -1
- package/resources/completions.js.map +1 -1
- package/resources/completions.mjs.map +1 -1
- package/resources/embeddings.d.ts +9 -0
- package/resources/embeddings.d.ts.map +1 -1
- package/resources/embeddings.js +9 -0
- package/resources/embeddings.js.map +1 -1
- package/resources/embeddings.mjs +9 -0
- package/resources/embeddings.mjs.map +1 -1
- package/resources/evals/evals.d.ts +62 -533
- package/resources/evals/evals.d.ts.map +1 -1
- package/resources/evals/evals.js.map +1 -1
- package/resources/evals/evals.mjs.map +1 -1
- package/resources/evals/index.d.ts +1 -1
- package/resources/evals/index.d.ts.map +1 -1
- package/resources/evals/index.js.map +1 -1
- package/resources/evals/index.mjs.map +1 -1
- package/resources/fine-tuning/alpha/alpha.d.ts +10 -0
- package/resources/fine-tuning/alpha/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/alpha.js +39 -0
- package/resources/fine-tuning/alpha/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha/alpha.mjs +12 -0
- package/resources/fine-tuning/alpha/alpha.mjs.map +1 -0
- package/resources/fine-tuning/alpha/graders.d.ts +107 -0
- package/resources/fine-tuning/alpha/graders.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/graders.js +50 -0
- package/resources/fine-tuning/alpha/graders.js.map +1 -0
- package/resources/fine-tuning/alpha/graders.mjs +46 -0
- package/resources/fine-tuning/alpha/graders.mjs.map +1 -0
- package/resources/fine-tuning/alpha/index.d.ts +3 -0
- package/resources/fine-tuning/alpha/index.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/index.js +9 -0
- package/resources/fine-tuning/alpha/index.js.map +1 -0
- package/resources/fine-tuning/alpha/index.mjs +4 -0
- package/resources/fine-tuning/alpha/index.mjs.map +1 -0
- package/resources/fine-tuning/alpha.d.ts +2 -0
- package/resources/fine-tuning/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha.js +19 -0
- package/resources/fine-tuning/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha.mjs +3 -0
- package/resources/fine-tuning/alpha.mjs.map +1 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts +28 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.js +20 -0
- package/resources/fine-tuning/checkpoints/permissions.js.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.mjs +20 -0
- package/resources/fine-tuning/checkpoints/permissions.mjs.map +1 -1
- package/resources/fine-tuning/fine-tuning.d.ts +8 -0
- package/resources/fine-tuning/fine-tuning.d.ts.map +1 -1
- package/resources/fine-tuning/fine-tuning.js +8 -0
- package/resources/fine-tuning/fine-tuning.js.map +1 -1
- package/resources/fine-tuning/fine-tuning.mjs +8 -0
- package/resources/fine-tuning/fine-tuning.mjs.map +1 -1
- package/resources/fine-tuning/index.d.ts +2 -0
- package/resources/fine-tuning/index.d.ts.map +1 -1
- package/resources/fine-tuning/index.js +11 -7
- package/resources/fine-tuning/index.js.map +1 -1
- package/resources/fine-tuning/index.mjs +2 -0
- package/resources/fine-tuning/index.mjs.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.d.ts +10 -0
- package/resources/fine-tuning/jobs/checkpoints.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.js.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.mjs.map +1 -1
- package/resources/fine-tuning/jobs/jobs.d.ts +80 -146
- package/resources/fine-tuning/jobs/jobs.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/jobs.js +48 -0
- package/resources/fine-tuning/jobs/jobs.js.map +1 -1
- package/resources/fine-tuning/jobs/jobs.mjs +48 -0
- package/resources/fine-tuning/jobs/jobs.mjs.map +1 -1
- package/resources/fine-tuning/methods.d.ts +120 -0
- package/resources/fine-tuning/methods.d.ts.map +1 -0
- package/resources/fine-tuning/methods.js +9 -0
- package/resources/fine-tuning/methods.js.map +1 -0
- package/resources/fine-tuning/methods.mjs +5 -0
- package/resources/fine-tuning/methods.mjs.map +1 -0
- package/resources/graders/grader-models.d.ts +234 -0
- package/resources/graders/grader-models.d.ts.map +1 -0
- package/resources/graders/grader-models.js +9 -0
- package/resources/graders/grader-models.js.map +1 -0
- package/resources/graders/grader-models.mjs +5 -0
- package/resources/graders/grader-models.mjs.map +1 -0
- package/resources/graders/graders.d.ts +10 -0
- package/resources/graders/graders.d.ts.map +1 -0
- package/resources/graders/graders.js +39 -0
- package/resources/graders/graders.js.map +1 -0
- package/resources/graders/graders.mjs +12 -0
- package/resources/graders/graders.mjs.map +1 -0
- package/resources/graders/index.d.ts +3 -0
- package/resources/graders/index.d.ts.map +1 -0
- package/resources/graders/index.js +9 -0
- package/resources/graders/index.js.map +1 -0
- package/resources/graders/index.mjs +4 -0
- package/resources/graders/index.mjs.map +1 -0
- package/resources/graders.d.ts +2 -0
- package/resources/graders.d.ts.map +1 -0
- package/resources/graders.js +19 -0
- package/resources/graders.js.map +1 -0
- package/resources/graders.mjs +3 -0
- package/resources/graders.mjs.map +1 -0
- package/resources/images.d.ts +40 -5
- package/resources/images.d.ts.map +1 -1
- package/resources/images.js +22 -0
- package/resources/images.js.map +1 -1
- package/resources/images.mjs +22 -0
- package/resources/images.mjs.map +1 -1
- package/resources/index.d.ts +2 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -0
- package/resources/index.mjs.map +1 -1
- package/resources/responses/input-items.d.ts +10 -0
- package/resources/responses/input-items.d.ts.map +1 -1
- package/resources/responses/input-items.js.map +1 -1
- package/resources/responses/input-items.mjs.map +1 -1
- package/resources/responses/responses.d.ts +70 -29
- package/resources/responses/responses.d.ts.map +1 -1
- package/resources/responses/responses.js +7 -0
- package/resources/responses/responses.js.map +1 -1
- package/resources/responses/responses.mjs +7 -0
- package/resources/responses/responses.mjs.map +1 -1
- package/src/index.ts +5 -6
- package/src/resources/audio/speech.ts +13 -1
- package/src/resources/audio/transcriptions.ts +9 -0
- package/src/resources/audio/translations.ts +8 -0
- package/src/resources/beta/assistants.ts +36 -0
- package/src/resources/beta/realtime/sessions.ts +6 -0
- package/src/resources/beta/realtime/transcription-sessions.ts +6 -0
- package/src/resources/beta/threads/messages.ts +43 -0
- package/src/resources/beta/threads/runs/runs.ts +52 -0
- package/src/resources/beta/threads/runs/steps.ts +21 -0
- package/src/resources/beta/threads/threads.ts +33 -0
- package/src/resources/chat/completions/completions.ts +42 -4
- package/src/resources/chat/completions/messages.ts +10 -0
- package/src/resources/completions.ts +8 -0
- package/src/resources/embeddings.ts +9 -0
- package/src/resources/evals/evals.ts +78 -654
- package/src/resources/evals/index.ts +0 -3
- package/src/resources/fine-tuning/alpha/alpha.ts +27 -0
- package/src/resources/fine-tuning/alpha/graders.ts +168 -0
- package/src/resources/fine-tuning/alpha/index.ts +10 -0
- package/src/resources/fine-tuning/alpha.ts +3 -0
- package/src/resources/fine-tuning/checkpoints/permissions.ts +28 -0
- package/src/resources/fine-tuning/fine-tuning.ts +28 -0
- package/src/resources/fine-tuning/index.ts +10 -0
- package/src/resources/fine-tuning/jobs/checkpoints.ts +10 -0
- package/src/resources/fine-tuning/jobs/jobs.ts +86 -162
- package/src/resources/fine-tuning/methods.ts +152 -0
- package/src/resources/graders/grader-models.ts +296 -0
- package/src/resources/graders/graders.ts +31 -0
- package/src/resources/graders/index.ts +12 -0
- package/src/resources/graders.ts +3 -0
- package/src/resources/images.ts +41 -5
- package/src/resources/index.ts +1 -3
- package/src/resources/responses/input-items.ts +10 -0
- package/src/resources/responses/responses.ts +72 -29
- package/src/version.ts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -4,6 +4,7 @@ import { APIResource } from '../../resource';
|
|
|
4
4
|
import { isRequestOptions } from '../../core';
|
|
5
5
|
import * as Core from '../../core';
|
|
6
6
|
import * as Shared from '../shared';
|
|
7
|
+
import * as GraderModelsAPI from '../graders/grader-models';
|
|
7
8
|
import * as ResponsesAPI from '../responses/responses';
|
|
8
9
|
import * as RunsAPI from './runs/runs';
|
|
9
10
|
import {
|
|
@@ -103,83 +104,6 @@ export interface EvalCustomDataSourceConfig {
|
|
|
103
104
|
type: 'custom';
|
|
104
105
|
}
|
|
105
106
|
|
|
106
|
-
/**
|
|
107
|
-
* A LabelModelGrader object which uses a model to assign labels to each item in
|
|
108
|
-
* the evaluation.
|
|
109
|
-
*/
|
|
110
|
-
export interface EvalLabelModelGrader {
|
|
111
|
-
input: Array<EvalLabelModelGrader.Input>;
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* The labels to assign to each item in the evaluation.
|
|
115
|
-
*/
|
|
116
|
-
labels: Array<string>;
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* The model to use for the evaluation. Must support structured outputs.
|
|
120
|
-
*/
|
|
121
|
-
model: string;
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* The name of the grader.
|
|
125
|
-
*/
|
|
126
|
-
name: string;
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* The labels that indicate a passing result. Must be a subset of labels.
|
|
130
|
-
*/
|
|
131
|
-
passing_labels: Array<string>;
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* The object type, which is always `label_model`.
|
|
135
|
-
*/
|
|
136
|
-
type: 'label_model';
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
export namespace EvalLabelModelGrader {
|
|
140
|
-
/**
|
|
141
|
-
* A message input to the model with a role indicating instruction following
|
|
142
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
143
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
144
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
145
|
-
* interactions.
|
|
146
|
-
*/
|
|
147
|
-
export interface Input {
|
|
148
|
-
/**
|
|
149
|
-
* Text inputs to the model - can contain template strings.
|
|
150
|
-
*/
|
|
151
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
155
|
-
* `developer`.
|
|
156
|
-
*/
|
|
157
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* The type of the message input. Always `message`.
|
|
161
|
-
*/
|
|
162
|
-
type?: 'message';
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
export namespace Input {
|
|
166
|
-
/**
|
|
167
|
-
* A text output from the model.
|
|
168
|
-
*/
|
|
169
|
-
export interface OutputText {
|
|
170
|
-
/**
|
|
171
|
-
* The text output from the model.
|
|
172
|
-
*/
|
|
173
|
-
text: string;
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* The type of the output text. Always `output_text`.
|
|
177
|
-
*/
|
|
178
|
-
type: 'output_text';
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
107
|
/**
|
|
184
108
|
* A StoredCompletionsDataSourceConfig which specifies the metadata property of
|
|
185
109
|
* your stored completions query. This is usually metadata like `usecase=chatbot`
|
|
@@ -210,83 +134,6 @@ export interface EvalStoredCompletionsDataSourceConfig {
|
|
|
210
134
|
metadata?: Shared.Metadata | null;
|
|
211
135
|
}
|
|
212
136
|
|
|
213
|
-
/**
|
|
214
|
-
* A StringCheckGrader object that performs a string comparison between input and
|
|
215
|
-
* reference using a specified operation.
|
|
216
|
-
*/
|
|
217
|
-
export interface EvalStringCheckGrader {
|
|
218
|
-
/**
|
|
219
|
-
* The input text. This may include template strings.
|
|
220
|
-
*/
|
|
221
|
-
input: string;
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* The name of the grader.
|
|
225
|
-
*/
|
|
226
|
-
name: string;
|
|
227
|
-
|
|
228
|
-
/**
|
|
229
|
-
* The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
|
|
230
|
-
*/
|
|
231
|
-
operation: 'eq' | 'ne' | 'like' | 'ilike';
|
|
232
|
-
|
|
233
|
-
/**
|
|
234
|
-
* The reference text. This may include template strings.
|
|
235
|
-
*/
|
|
236
|
-
reference: string;
|
|
237
|
-
|
|
238
|
-
/**
|
|
239
|
-
* The object type, which is always `string_check`.
|
|
240
|
-
*/
|
|
241
|
-
type: 'string_check';
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
/**
|
|
245
|
-
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
246
|
-
*/
|
|
247
|
-
export interface EvalTextSimilarityGrader {
|
|
248
|
-
/**
|
|
249
|
-
* The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`,
|
|
250
|
-
* `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
|
|
251
|
-
*/
|
|
252
|
-
evaluation_metric:
|
|
253
|
-
| 'fuzzy_match'
|
|
254
|
-
| 'bleu'
|
|
255
|
-
| 'gleu'
|
|
256
|
-
| 'meteor'
|
|
257
|
-
| 'rouge_1'
|
|
258
|
-
| 'rouge_2'
|
|
259
|
-
| 'rouge_3'
|
|
260
|
-
| 'rouge_4'
|
|
261
|
-
| 'rouge_5'
|
|
262
|
-
| 'rouge_l';
|
|
263
|
-
|
|
264
|
-
/**
|
|
265
|
-
* The text being graded.
|
|
266
|
-
*/
|
|
267
|
-
input: string;
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* A float score where a value greater than or equal indicates a passing grade.
|
|
271
|
-
*/
|
|
272
|
-
pass_threshold: number;
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* The text being graded against.
|
|
276
|
-
*/
|
|
277
|
-
reference: string;
|
|
278
|
-
|
|
279
|
-
/**
|
|
280
|
-
* The type of grader.
|
|
281
|
-
*/
|
|
282
|
-
type: 'text_similarity';
|
|
283
|
-
|
|
284
|
-
/**
|
|
285
|
-
* The name of the grader.
|
|
286
|
-
*/
|
|
287
|
-
name?: string;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
137
|
/**
|
|
291
138
|
* An Eval object with a data source config and testing criteria. An Eval
|
|
292
139
|
* represents a task to be done for your LLM integration. Like:
|
|
@@ -335,39 +182,29 @@ export interface EvalCreateResponse {
|
|
|
335
182
|
* A list of testing criteria.
|
|
336
183
|
*/
|
|
337
184
|
testing_criteria: Array<
|
|
338
|
-
|
|
|
339
|
-
|
|
|
340
|
-
|
|
|
341
|
-
| EvalCreateResponse.
|
|
342
|
-
| EvalCreateResponse.
|
|
185
|
+
| GraderModelsAPI.LabelModelGrader
|
|
186
|
+
| GraderModelsAPI.StringCheckGrader
|
|
187
|
+
| EvalCreateResponse.EvalGraderTextSimilarity
|
|
188
|
+
| EvalCreateResponse.EvalGraderPython
|
|
189
|
+
| EvalCreateResponse.EvalGraderScoreModel
|
|
343
190
|
>;
|
|
344
191
|
}
|
|
345
192
|
|
|
346
193
|
export namespace EvalCreateResponse {
|
|
347
194
|
/**
|
|
348
|
-
* A
|
|
195
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
349
196
|
*/
|
|
350
|
-
export interface
|
|
351
|
-
/**
|
|
352
|
-
* The name of the grader.
|
|
353
|
-
*/
|
|
354
|
-
name: string;
|
|
355
|
-
|
|
197
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
356
198
|
/**
|
|
357
|
-
* The
|
|
358
|
-
*/
|
|
359
|
-
source: string;
|
|
360
|
-
|
|
361
|
-
/**
|
|
362
|
-
* The object type, which is always `python`.
|
|
363
|
-
*/
|
|
364
|
-
type: 'python';
|
|
365
|
-
|
|
366
|
-
/**
|
|
367
|
-
* The image tag to use for the python script.
|
|
199
|
+
* The threshold for the score.
|
|
368
200
|
*/
|
|
369
|
-
|
|
201
|
+
pass_threshold: number;
|
|
202
|
+
}
|
|
370
203
|
|
|
204
|
+
/**
|
|
205
|
+
* A PythonGrader object that runs a python script on the input.
|
|
206
|
+
*/
|
|
207
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
371
208
|
/**
|
|
372
209
|
* The threshold for the score.
|
|
373
210
|
*/
|
|
@@ -377,85 +214,11 @@ export namespace EvalCreateResponse {
|
|
|
377
214
|
/**
|
|
378
215
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
379
216
|
*/
|
|
380
|
-
export interface
|
|
381
|
-
/**
|
|
382
|
-
* The input text. This may include template strings.
|
|
383
|
-
*/
|
|
384
|
-
input: Array<ScoreModel.Input>;
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* The model to use for the evaluation.
|
|
388
|
-
*/
|
|
389
|
-
model: string;
|
|
390
|
-
|
|
391
|
-
/**
|
|
392
|
-
* The name of the grader.
|
|
393
|
-
*/
|
|
394
|
-
name: string;
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* The object type, which is always `score_model`.
|
|
398
|
-
*/
|
|
399
|
-
type: 'score_model';
|
|
400
|
-
|
|
217
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
401
218
|
/**
|
|
402
219
|
* The threshold for the score.
|
|
403
220
|
*/
|
|
404
221
|
pass_threshold?: number;
|
|
405
|
-
|
|
406
|
-
/**
|
|
407
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
408
|
-
*/
|
|
409
|
-
range?: Array<number>;
|
|
410
|
-
|
|
411
|
-
/**
|
|
412
|
-
* The sampling parameters for the model.
|
|
413
|
-
*/
|
|
414
|
-
sampling_params?: unknown;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
export namespace ScoreModel {
|
|
418
|
-
/**
|
|
419
|
-
* A message input to the model with a role indicating instruction following
|
|
420
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
421
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
422
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
423
|
-
* interactions.
|
|
424
|
-
*/
|
|
425
|
-
export interface Input {
|
|
426
|
-
/**
|
|
427
|
-
* Text inputs to the model - can contain template strings.
|
|
428
|
-
*/
|
|
429
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
430
|
-
|
|
431
|
-
/**
|
|
432
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
433
|
-
* `developer`.
|
|
434
|
-
*/
|
|
435
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
436
|
-
|
|
437
|
-
/**
|
|
438
|
-
* The type of the message input. Always `message`.
|
|
439
|
-
*/
|
|
440
|
-
type?: 'message';
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
export namespace Input {
|
|
444
|
-
/**
|
|
445
|
-
* A text output from the model.
|
|
446
|
-
*/
|
|
447
|
-
export interface OutputText {
|
|
448
|
-
/**
|
|
449
|
-
* The text output from the model.
|
|
450
|
-
*/
|
|
451
|
-
text: string;
|
|
452
|
-
|
|
453
|
-
/**
|
|
454
|
-
* The type of the output text. Always `output_text`.
|
|
455
|
-
*/
|
|
456
|
-
type: 'output_text';
|
|
457
|
-
}
|
|
458
|
-
}
|
|
459
222
|
}
|
|
460
223
|
}
|
|
461
224
|
|
|
@@ -507,39 +270,29 @@ export interface EvalRetrieveResponse {
|
|
|
507
270
|
* A list of testing criteria.
|
|
508
271
|
*/
|
|
509
272
|
testing_criteria: Array<
|
|
510
|
-
|
|
|
511
|
-
|
|
|
512
|
-
|
|
|
513
|
-
| EvalRetrieveResponse.
|
|
514
|
-
| EvalRetrieveResponse.
|
|
273
|
+
| GraderModelsAPI.LabelModelGrader
|
|
274
|
+
| GraderModelsAPI.StringCheckGrader
|
|
275
|
+
| EvalRetrieveResponse.EvalGraderTextSimilarity
|
|
276
|
+
| EvalRetrieveResponse.EvalGraderPython
|
|
277
|
+
| EvalRetrieveResponse.EvalGraderScoreModel
|
|
515
278
|
>;
|
|
516
279
|
}
|
|
517
280
|
|
|
518
281
|
export namespace EvalRetrieveResponse {
|
|
519
282
|
/**
|
|
520
|
-
* A
|
|
283
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
521
284
|
*/
|
|
522
|
-
export interface
|
|
285
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
523
286
|
/**
|
|
524
|
-
* The
|
|
525
|
-
*/
|
|
526
|
-
name: string;
|
|
527
|
-
|
|
528
|
-
/**
|
|
529
|
-
* The source code of the python script.
|
|
530
|
-
*/
|
|
531
|
-
source: string;
|
|
532
|
-
|
|
533
|
-
/**
|
|
534
|
-
* The object type, which is always `python`.
|
|
535
|
-
*/
|
|
536
|
-
type: 'python';
|
|
537
|
-
|
|
538
|
-
/**
|
|
539
|
-
* The image tag to use for the python script.
|
|
287
|
+
* The threshold for the score.
|
|
540
288
|
*/
|
|
541
|
-
|
|
289
|
+
pass_threshold: number;
|
|
290
|
+
}
|
|
542
291
|
|
|
292
|
+
/**
|
|
293
|
+
* A PythonGrader object that runs a python script on the input.
|
|
294
|
+
*/
|
|
295
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
543
296
|
/**
|
|
544
297
|
* The threshold for the score.
|
|
545
298
|
*/
|
|
@@ -549,85 +302,11 @@ export namespace EvalRetrieveResponse {
|
|
|
549
302
|
/**
|
|
550
303
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
551
304
|
*/
|
|
552
|
-
export interface
|
|
553
|
-
/**
|
|
554
|
-
* The input text. This may include template strings.
|
|
555
|
-
*/
|
|
556
|
-
input: Array<ScoreModel.Input>;
|
|
557
|
-
|
|
558
|
-
/**
|
|
559
|
-
* The model to use for the evaluation.
|
|
560
|
-
*/
|
|
561
|
-
model: string;
|
|
562
|
-
|
|
563
|
-
/**
|
|
564
|
-
* The name of the grader.
|
|
565
|
-
*/
|
|
566
|
-
name: string;
|
|
567
|
-
|
|
568
|
-
/**
|
|
569
|
-
* The object type, which is always `score_model`.
|
|
570
|
-
*/
|
|
571
|
-
type: 'score_model';
|
|
572
|
-
|
|
305
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
573
306
|
/**
|
|
574
307
|
* The threshold for the score.
|
|
575
308
|
*/
|
|
576
309
|
pass_threshold?: number;
|
|
577
|
-
|
|
578
|
-
/**
|
|
579
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
580
|
-
*/
|
|
581
|
-
range?: Array<number>;
|
|
582
|
-
|
|
583
|
-
/**
|
|
584
|
-
* The sampling parameters for the model.
|
|
585
|
-
*/
|
|
586
|
-
sampling_params?: unknown;
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
export namespace ScoreModel {
|
|
590
|
-
/**
|
|
591
|
-
* A message input to the model with a role indicating instruction following
|
|
592
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
593
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
594
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
595
|
-
* interactions.
|
|
596
|
-
*/
|
|
597
|
-
export interface Input {
|
|
598
|
-
/**
|
|
599
|
-
* Text inputs to the model - can contain template strings.
|
|
600
|
-
*/
|
|
601
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
602
|
-
|
|
603
|
-
/**
|
|
604
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
605
|
-
* `developer`.
|
|
606
|
-
*/
|
|
607
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
608
|
-
|
|
609
|
-
/**
|
|
610
|
-
* The type of the message input. Always `message`.
|
|
611
|
-
*/
|
|
612
|
-
type?: 'message';
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
export namespace Input {
|
|
616
|
-
/**
|
|
617
|
-
* A text output from the model.
|
|
618
|
-
*/
|
|
619
|
-
export interface OutputText {
|
|
620
|
-
/**
|
|
621
|
-
* The text output from the model.
|
|
622
|
-
*/
|
|
623
|
-
text: string;
|
|
624
|
-
|
|
625
|
-
/**
|
|
626
|
-
* The type of the output text. Always `output_text`.
|
|
627
|
-
*/
|
|
628
|
-
type: 'output_text';
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
310
|
}
|
|
632
311
|
}
|
|
633
312
|
|
|
@@ -679,39 +358,29 @@ export interface EvalUpdateResponse {
|
|
|
679
358
|
* A list of testing criteria.
|
|
680
359
|
*/
|
|
681
360
|
testing_criteria: Array<
|
|
682
|
-
|
|
|
683
|
-
|
|
|
684
|
-
|
|
|
685
|
-
| EvalUpdateResponse.
|
|
686
|
-
| EvalUpdateResponse.
|
|
361
|
+
| GraderModelsAPI.LabelModelGrader
|
|
362
|
+
| GraderModelsAPI.StringCheckGrader
|
|
363
|
+
| EvalUpdateResponse.EvalGraderTextSimilarity
|
|
364
|
+
| EvalUpdateResponse.EvalGraderPython
|
|
365
|
+
| EvalUpdateResponse.EvalGraderScoreModel
|
|
687
366
|
>;
|
|
688
367
|
}
|
|
689
368
|
|
|
690
369
|
export namespace EvalUpdateResponse {
|
|
691
370
|
/**
|
|
692
|
-
* A
|
|
371
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
693
372
|
*/
|
|
694
|
-
export interface
|
|
695
|
-
/**
|
|
696
|
-
* The name of the grader.
|
|
697
|
-
*/
|
|
698
|
-
name: string;
|
|
699
|
-
|
|
700
|
-
/**
|
|
701
|
-
* The source code of the python script.
|
|
702
|
-
*/
|
|
703
|
-
source: string;
|
|
704
|
-
|
|
705
|
-
/**
|
|
706
|
-
* The object type, which is always `python`.
|
|
707
|
-
*/
|
|
708
|
-
type: 'python';
|
|
709
|
-
|
|
373
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
710
374
|
/**
|
|
711
|
-
* The
|
|
375
|
+
* The threshold for the score.
|
|
712
376
|
*/
|
|
713
|
-
|
|
377
|
+
pass_threshold: number;
|
|
378
|
+
}
|
|
714
379
|
|
|
380
|
+
/**
|
|
381
|
+
* A PythonGrader object that runs a python script on the input.
|
|
382
|
+
*/
|
|
383
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
715
384
|
/**
|
|
716
385
|
* The threshold for the score.
|
|
717
386
|
*/
|
|
@@ -721,85 +390,11 @@ export namespace EvalUpdateResponse {
|
|
|
721
390
|
/**
|
|
722
391
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
723
392
|
*/
|
|
724
|
-
export interface
|
|
725
|
-
/**
|
|
726
|
-
* The input text. This may include template strings.
|
|
727
|
-
*/
|
|
728
|
-
input: Array<ScoreModel.Input>;
|
|
729
|
-
|
|
730
|
-
/**
|
|
731
|
-
* The model to use for the evaluation.
|
|
732
|
-
*/
|
|
733
|
-
model: string;
|
|
734
|
-
|
|
735
|
-
/**
|
|
736
|
-
* The name of the grader.
|
|
737
|
-
*/
|
|
738
|
-
name: string;
|
|
739
|
-
|
|
740
|
-
/**
|
|
741
|
-
* The object type, which is always `score_model`.
|
|
742
|
-
*/
|
|
743
|
-
type: 'score_model';
|
|
744
|
-
|
|
393
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
745
394
|
/**
|
|
746
395
|
* The threshold for the score.
|
|
747
396
|
*/
|
|
748
397
|
pass_threshold?: number;
|
|
749
|
-
|
|
750
|
-
/**
|
|
751
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
752
|
-
*/
|
|
753
|
-
range?: Array<number>;
|
|
754
|
-
|
|
755
|
-
/**
|
|
756
|
-
* The sampling parameters for the model.
|
|
757
|
-
*/
|
|
758
|
-
sampling_params?: unknown;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
export namespace ScoreModel {
|
|
762
|
-
/**
|
|
763
|
-
* A message input to the model with a role indicating instruction following
|
|
764
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
765
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
766
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
767
|
-
* interactions.
|
|
768
|
-
*/
|
|
769
|
-
export interface Input {
|
|
770
|
-
/**
|
|
771
|
-
* Text inputs to the model - can contain template strings.
|
|
772
|
-
*/
|
|
773
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
774
|
-
|
|
775
|
-
/**
|
|
776
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
777
|
-
* `developer`.
|
|
778
|
-
*/
|
|
779
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
780
|
-
|
|
781
|
-
/**
|
|
782
|
-
* The type of the message input. Always `message`.
|
|
783
|
-
*/
|
|
784
|
-
type?: 'message';
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
export namespace Input {
|
|
788
|
-
/**
|
|
789
|
-
* A text output from the model.
|
|
790
|
-
*/
|
|
791
|
-
export interface OutputText {
|
|
792
|
-
/**
|
|
793
|
-
* The text output from the model.
|
|
794
|
-
*/
|
|
795
|
-
text: string;
|
|
796
|
-
|
|
797
|
-
/**
|
|
798
|
-
* The type of the output text. Always `output_text`.
|
|
799
|
-
*/
|
|
800
|
-
type: 'output_text';
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
398
|
}
|
|
804
399
|
}
|
|
805
400
|
|
|
@@ -851,39 +446,29 @@ export interface EvalListResponse {
|
|
|
851
446
|
* A list of testing criteria.
|
|
852
447
|
*/
|
|
853
448
|
testing_criteria: Array<
|
|
854
|
-
|
|
|
855
|
-
|
|
|
856
|
-
|
|
|
857
|
-
| EvalListResponse.
|
|
858
|
-
| EvalListResponse.
|
|
449
|
+
| GraderModelsAPI.LabelModelGrader
|
|
450
|
+
| GraderModelsAPI.StringCheckGrader
|
|
451
|
+
| EvalListResponse.EvalGraderTextSimilarity
|
|
452
|
+
| EvalListResponse.EvalGraderPython
|
|
453
|
+
| EvalListResponse.EvalGraderScoreModel
|
|
859
454
|
>;
|
|
860
455
|
}
|
|
861
456
|
|
|
862
457
|
export namespace EvalListResponse {
|
|
863
458
|
/**
|
|
864
|
-
* A
|
|
459
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
865
460
|
*/
|
|
866
|
-
export interface
|
|
867
|
-
/**
|
|
868
|
-
* The name of the grader.
|
|
869
|
-
*/
|
|
870
|
-
name: string;
|
|
871
|
-
|
|
872
|
-
/**
|
|
873
|
-
* The source code of the python script.
|
|
874
|
-
*/
|
|
875
|
-
source: string;
|
|
876
|
-
|
|
461
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
877
462
|
/**
|
|
878
|
-
* The
|
|
879
|
-
*/
|
|
880
|
-
type: 'python';
|
|
881
|
-
|
|
882
|
-
/**
|
|
883
|
-
* The image tag to use for the python script.
|
|
463
|
+
* The threshold for the score.
|
|
884
464
|
*/
|
|
885
|
-
|
|
465
|
+
pass_threshold: number;
|
|
466
|
+
}
|
|
886
467
|
|
|
468
|
+
/**
|
|
469
|
+
* A PythonGrader object that runs a python script on the input.
|
|
470
|
+
*/
|
|
471
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
887
472
|
/**
|
|
888
473
|
* The threshold for the score.
|
|
889
474
|
*/
|
|
@@ -893,85 +478,11 @@ export namespace EvalListResponse {
|
|
|
893
478
|
/**
|
|
894
479
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
895
480
|
*/
|
|
896
|
-
export interface
|
|
897
|
-
/**
|
|
898
|
-
* The input text. This may include template strings.
|
|
899
|
-
*/
|
|
900
|
-
input: Array<ScoreModel.Input>;
|
|
901
|
-
|
|
902
|
-
/**
|
|
903
|
-
* The model to use for the evaluation.
|
|
904
|
-
*/
|
|
905
|
-
model: string;
|
|
906
|
-
|
|
907
|
-
/**
|
|
908
|
-
* The name of the grader.
|
|
909
|
-
*/
|
|
910
|
-
name: string;
|
|
911
|
-
|
|
912
|
-
/**
|
|
913
|
-
* The object type, which is always `score_model`.
|
|
914
|
-
*/
|
|
915
|
-
type: 'score_model';
|
|
916
|
-
|
|
481
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
917
482
|
/**
|
|
918
483
|
* The threshold for the score.
|
|
919
484
|
*/
|
|
920
485
|
pass_threshold?: number;
|
|
921
|
-
|
|
922
|
-
/**
|
|
923
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
924
|
-
*/
|
|
925
|
-
range?: Array<number>;
|
|
926
|
-
|
|
927
|
-
/**
|
|
928
|
-
* The sampling parameters for the model.
|
|
929
|
-
*/
|
|
930
|
-
sampling_params?: unknown;
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
export namespace ScoreModel {
|
|
934
|
-
/**
|
|
935
|
-
* A message input to the model with a role indicating instruction following
|
|
936
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
937
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
938
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
939
|
-
* interactions.
|
|
940
|
-
*/
|
|
941
|
-
export interface Input {
|
|
942
|
-
/**
|
|
943
|
-
* Text inputs to the model - can contain template strings.
|
|
944
|
-
*/
|
|
945
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
946
|
-
|
|
947
|
-
/**
|
|
948
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
949
|
-
* `developer`.
|
|
950
|
-
*/
|
|
951
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
952
|
-
|
|
953
|
-
/**
|
|
954
|
-
* The type of the message input. Always `message`.
|
|
955
|
-
*/
|
|
956
|
-
type?: 'message';
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
export namespace Input {
|
|
960
|
-
/**
|
|
961
|
-
* A text output from the model.
|
|
962
|
-
*/
|
|
963
|
-
export interface OutputText {
|
|
964
|
-
/**
|
|
965
|
-
* The text output from the model.
|
|
966
|
-
*/
|
|
967
|
-
text: string;
|
|
968
|
-
|
|
969
|
-
/**
|
|
970
|
-
* The type of the output text. Always `output_text`.
|
|
971
|
-
*/
|
|
972
|
-
type: 'output_text';
|
|
973
|
-
}
|
|
974
|
-
}
|
|
975
486
|
}
|
|
976
487
|
}
|
|
977
488
|
|
|
@@ -987,15 +498,15 @@ export interface EvalCreateParams {
|
|
|
987
498
|
/**
|
|
988
499
|
* The configuration for the data source used for the evaluation runs.
|
|
989
500
|
*/
|
|
990
|
-
data_source_config: EvalCreateParams.Custom | EvalCreateParams.
|
|
501
|
+
data_source_config: EvalCreateParams.Custom | EvalCreateParams.StoredCompletions;
|
|
991
502
|
|
|
992
503
|
/**
|
|
993
504
|
* A list of graders for all eval runs in this group.
|
|
994
505
|
*/
|
|
995
506
|
testing_criteria: Array<
|
|
996
507
|
| EvalCreateParams.LabelModel
|
|
997
|
-
|
|
|
998
|
-
|
|
|
508
|
+
| GraderModelsAPI.StringCheckGrader
|
|
509
|
+
| EvalCreateParams.TextSimilarity
|
|
999
510
|
| EvalCreateParams.Python
|
|
1000
511
|
| EvalCreateParams.ScoreModel
|
|
1001
512
|
>;
|
|
@@ -1048,14 +559,14 @@ export namespace EvalCreateParams {
|
|
|
1048
559
|
* completions query. This is usually metadata like `usecase=chatbot` or
|
|
1049
560
|
* `prompt-version=v2`, etc.
|
|
1050
561
|
*/
|
|
1051
|
-
export interface
|
|
562
|
+
export interface StoredCompletions {
|
|
1052
563
|
/**
|
|
1053
|
-
* The type of data source. Always `
|
|
564
|
+
* The type of data source. Always `stored_completions`.
|
|
1054
565
|
*/
|
|
1055
|
-
type: '
|
|
566
|
+
type: 'stored_completions';
|
|
1056
567
|
|
|
1057
568
|
/**
|
|
1058
|
-
* Metadata filters for the
|
|
569
|
+
* Metadata filters for the stored completions data source.
|
|
1059
570
|
*/
|
|
1060
571
|
metadata?: Record<string, unknown>;
|
|
1061
572
|
}
|
|
@@ -1154,29 +665,19 @@ export namespace EvalCreateParams {
|
|
|
1154
665
|
}
|
|
1155
666
|
|
|
1156
667
|
/**
|
|
1157
|
-
* A
|
|
668
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
1158
669
|
*/
|
|
1159
|
-
export interface
|
|
670
|
+
export interface TextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
1160
671
|
/**
|
|
1161
|
-
* The
|
|
1162
|
-
*/
|
|
1163
|
-
name: string;
|
|
1164
|
-
|
|
1165
|
-
/**
|
|
1166
|
-
* The source code of the python script.
|
|
1167
|
-
*/
|
|
1168
|
-
source: string;
|
|
1169
|
-
|
|
1170
|
-
/**
|
|
1171
|
-
* The object type, which is always `python`.
|
|
1172
|
-
*/
|
|
1173
|
-
type: 'python';
|
|
1174
|
-
|
|
1175
|
-
/**
|
|
1176
|
-
* The image tag to use for the python script.
|
|
672
|
+
* The threshold for the score.
|
|
1177
673
|
*/
|
|
1178
|
-
|
|
674
|
+
pass_threshold: number;
|
|
675
|
+
}
|
|
1179
676
|
|
|
677
|
+
/**
|
|
678
|
+
* A PythonGrader object that runs a python script on the input.
|
|
679
|
+
*/
|
|
680
|
+
export interface Python extends GraderModelsAPI.PythonGrader {
|
|
1180
681
|
/**
|
|
1181
682
|
* The threshold for the score.
|
|
1182
683
|
*/
|
|
@@ -1186,85 +687,11 @@ export namespace EvalCreateParams {
|
|
|
1186
687
|
/**
|
|
1187
688
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
1188
689
|
*/
|
|
1189
|
-
export interface ScoreModel {
|
|
1190
|
-
/**
|
|
1191
|
-
* The input text. This may include template strings.
|
|
1192
|
-
*/
|
|
1193
|
-
input: Array<ScoreModel.Input>;
|
|
1194
|
-
|
|
1195
|
-
/**
|
|
1196
|
-
* The model to use for the evaluation.
|
|
1197
|
-
*/
|
|
1198
|
-
model: string;
|
|
1199
|
-
|
|
1200
|
-
/**
|
|
1201
|
-
* The name of the grader.
|
|
1202
|
-
*/
|
|
1203
|
-
name: string;
|
|
1204
|
-
|
|
1205
|
-
/**
|
|
1206
|
-
* The object type, which is always `score_model`.
|
|
1207
|
-
*/
|
|
1208
|
-
type: 'score_model';
|
|
1209
|
-
|
|
690
|
+
export interface ScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
1210
691
|
/**
|
|
1211
692
|
* The threshold for the score.
|
|
1212
693
|
*/
|
|
1213
694
|
pass_threshold?: number;
|
|
1214
|
-
|
|
1215
|
-
/**
|
|
1216
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
1217
|
-
*/
|
|
1218
|
-
range?: Array<number>;
|
|
1219
|
-
|
|
1220
|
-
/**
|
|
1221
|
-
* The sampling parameters for the model.
|
|
1222
|
-
*/
|
|
1223
|
-
sampling_params?: unknown;
|
|
1224
|
-
}
|
|
1225
|
-
|
|
1226
|
-
export namespace ScoreModel {
|
|
1227
|
-
/**
|
|
1228
|
-
* A message input to the model with a role indicating instruction following
|
|
1229
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
1230
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
1231
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
1232
|
-
* interactions.
|
|
1233
|
-
*/
|
|
1234
|
-
export interface Input {
|
|
1235
|
-
/**
|
|
1236
|
-
* Text inputs to the model - can contain template strings.
|
|
1237
|
-
*/
|
|
1238
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
1239
|
-
|
|
1240
|
-
/**
|
|
1241
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
1242
|
-
* `developer`.
|
|
1243
|
-
*/
|
|
1244
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
1245
|
-
|
|
1246
|
-
/**
|
|
1247
|
-
* The type of the message input. Always `message`.
|
|
1248
|
-
*/
|
|
1249
|
-
type?: 'message';
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
export namespace Input {
|
|
1253
|
-
/**
|
|
1254
|
-
* A text output from the model.
|
|
1255
|
-
*/
|
|
1256
|
-
export interface OutputText {
|
|
1257
|
-
/**
|
|
1258
|
-
* The text output from the model.
|
|
1259
|
-
*/
|
|
1260
|
-
text: string;
|
|
1261
|
-
|
|
1262
|
-
/**
|
|
1263
|
-
* The type of the output text. Always `output_text`.
|
|
1264
|
-
*/
|
|
1265
|
-
type: 'output_text';
|
|
1266
|
-
}
|
|
1267
|
-
}
|
|
1268
695
|
}
|
|
1269
696
|
}
|
|
1270
697
|
|
|
@@ -1306,10 +733,7 @@ Evals.RunListResponsesPage = RunListResponsesPage;
|
|
|
1306
733
|
export declare namespace Evals {
|
|
1307
734
|
export {
|
|
1308
735
|
type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig,
|
|
1309
|
-
type EvalLabelModelGrader as EvalLabelModelGrader,
|
|
1310
736
|
type EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
|
|
1311
|
-
type EvalStringCheckGrader as EvalStringCheckGrader,
|
|
1312
|
-
type EvalTextSimilarityGrader as EvalTextSimilarityGrader,
|
|
1313
737
|
type EvalCreateResponse as EvalCreateResponse,
|
|
1314
738
|
type EvalRetrieveResponse as EvalRetrieveResponse,
|
|
1315
739
|
type EvalUpdateResponse as EvalUpdateResponse,
|