openai 4.97.0 → 4.99.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/index.d.mts +5 -2
- package/index.d.ts +5 -2
- package/index.d.ts.map +1 -1
- package/index.js +3 -0
- package/index.js.map +1 -1
- package/index.mjs +3 -0
- package/index.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/audio/speech.d.ts +12 -0
- package/resources/audio/speech.d.ts.map +1 -1
- package/resources/audio/speech.js +12 -0
- package/resources/audio/speech.js.map +1 -1
- package/resources/audio/speech.mjs +12 -0
- package/resources/audio/speech.mjs.map +1 -1
- package/resources/audio/transcriptions.d.ts +38 -0
- package/resources/audio/transcriptions.d.ts.map +1 -1
- package/resources/audio/transcriptions.js.map +1 -1
- package/resources/audio/transcriptions.mjs.map +1 -1
- package/resources/audio/translations.d.ts +8 -0
- package/resources/audio/translations.d.ts.map +1 -1
- package/resources/audio/translations.js.map +1 -1
- package/resources/audio/translations.mjs.map +1 -1
- package/resources/beta/assistants.d.ts +36 -0
- package/resources/beta/assistants.d.ts.map +1 -1
- package/resources/beta/assistants.js +28 -0
- package/resources/beta/assistants.js.map +1 -1
- package/resources/beta/assistants.mjs +28 -0
- package/resources/beta/assistants.mjs.map +1 -1
- package/resources/beta/realtime/sessions.d.ts +6 -0
- package/resources/beta/realtime/sessions.d.ts.map +1 -1
- package/resources/beta/realtime/sessions.js +6 -0
- package/resources/beta/realtime/sessions.js.map +1 -1
- package/resources/beta/realtime/sessions.mjs +6 -0
- package/resources/beta/realtime/sessions.mjs.map +1 -1
- package/resources/beta/realtime/transcription-sessions.d.ts +6 -0
- package/resources/beta/realtime/transcription-sessions.d.ts.map +1 -1
- package/resources/beta/realtime/transcription-sessions.js +6 -0
- package/resources/beta/realtime/transcription-sessions.js.map +1 -1
- package/resources/beta/realtime/transcription-sessions.mjs +6 -0
- package/resources/beta/realtime/transcription-sessions.mjs.map +1 -1
- package/resources/beta/threads/messages.d.ts +43 -0
- package/resources/beta/threads/messages.d.ts.map +1 -1
- package/resources/beta/threads/messages.js +33 -0
- package/resources/beta/threads/messages.js.map +1 -1
- package/resources/beta/threads/messages.mjs +33 -0
- package/resources/beta/threads/messages.mjs.map +1 -1
- package/resources/beta/threads/runs/runs.d.ts +52 -0
- package/resources/beta/threads/runs/runs.d.ts.map +1 -1
- package/resources/beta/threads/runs/runs.js +24 -0
- package/resources/beta/threads/runs/runs.js.map +1 -1
- package/resources/beta/threads/runs/runs.mjs +24 -0
- package/resources/beta/threads/runs/runs.mjs.map +1 -1
- package/resources/beta/threads/runs/steps.d.ts +21 -0
- package/resources/beta/threads/runs/steps.d.ts.map +1 -1
- package/resources/beta/threads/runs/steps.js.map +1 -1
- package/resources/beta/threads/runs/steps.mjs.map +1 -1
- package/resources/beta/threads/threads.d.ts +33 -0
- package/resources/beta/threads/threads.d.ts.map +1 -1
- package/resources/beta/threads/threads.js +21 -0
- package/resources/beta/threads/threads.js.map +1 -1
- package/resources/beta/threads/threads.mjs +21 -0
- package/resources/beta/threads/threads.mjs.map +1 -1
- package/resources/chat/completions/completions.d.ts +38 -0
- package/resources/chat/completions/completions.d.ts.map +1 -1
- package/resources/chat/completions/completions.js +20 -0
- package/resources/chat/completions/completions.js.map +1 -1
- package/resources/chat/completions/completions.mjs +20 -0
- package/resources/chat/completions/completions.mjs.map +1 -1
- package/resources/chat/completions/messages.d.ts +10 -0
- package/resources/chat/completions/messages.d.ts.map +1 -1
- package/resources/chat/completions/messages.js.map +1 -1
- package/resources/chat/completions/messages.mjs.map +1 -1
- package/resources/completions.d.ts +8 -0
- package/resources/completions.d.ts.map +1 -1
- package/resources/completions.js.map +1 -1
- package/resources/completions.mjs.map +1 -1
- package/resources/embeddings.d.ts +13 -3
- package/resources/embeddings.d.ts.map +1 -1
- package/resources/embeddings.js +9 -0
- package/resources/embeddings.js.map +1 -1
- package/resources/embeddings.mjs +9 -0
- package/resources/embeddings.mjs.map +1 -1
- package/resources/evals/evals.d.ts +164 -519
- package/resources/evals/evals.d.ts.map +1 -1
- package/resources/evals/evals.js.map +1 -1
- package/resources/evals/evals.mjs.map +1 -1
- package/resources/evals/index.d.ts +1 -1
- package/resources/evals/index.d.ts.map +1 -1
- package/resources/evals/index.js.map +1 -1
- package/resources/evals/index.mjs.map +1 -1
- package/resources/evals/runs/runs.d.ts +64 -69
- package/resources/evals/runs/runs.d.ts.map +1 -1
- package/resources/evals/runs/runs.js.map +1 -1
- package/resources/evals/runs/runs.mjs.map +1 -1
- package/resources/fine-tuning/alpha/alpha.d.ts +10 -0
- package/resources/fine-tuning/alpha/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/alpha.js +39 -0
- package/resources/fine-tuning/alpha/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha/alpha.mjs +12 -0
- package/resources/fine-tuning/alpha/alpha.mjs.map +1 -0
- package/resources/fine-tuning/alpha/graders.d.ts +107 -0
- package/resources/fine-tuning/alpha/graders.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/graders.js +50 -0
- package/resources/fine-tuning/alpha/graders.js.map +1 -0
- package/resources/fine-tuning/alpha/graders.mjs +46 -0
- package/resources/fine-tuning/alpha/graders.mjs.map +1 -0
- package/resources/fine-tuning/alpha/index.d.ts +3 -0
- package/resources/fine-tuning/alpha/index.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/index.js +9 -0
- package/resources/fine-tuning/alpha/index.js.map +1 -0
- package/resources/fine-tuning/alpha/index.mjs +4 -0
- package/resources/fine-tuning/alpha/index.mjs.map +1 -0
- package/resources/fine-tuning/alpha.d.ts +2 -0
- package/resources/fine-tuning/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha.js +19 -0
- package/resources/fine-tuning/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha.mjs +3 -0
- package/resources/fine-tuning/alpha.mjs.map +1 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts +28 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.js +20 -0
- package/resources/fine-tuning/checkpoints/permissions.js.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.mjs +20 -0
- package/resources/fine-tuning/checkpoints/permissions.mjs.map +1 -1
- package/resources/fine-tuning/fine-tuning.d.ts +8 -0
- package/resources/fine-tuning/fine-tuning.d.ts.map +1 -1
- package/resources/fine-tuning/fine-tuning.js +8 -0
- package/resources/fine-tuning/fine-tuning.js.map +1 -1
- package/resources/fine-tuning/fine-tuning.mjs +8 -0
- package/resources/fine-tuning/fine-tuning.mjs.map +1 -1
- package/resources/fine-tuning/index.d.ts +2 -0
- package/resources/fine-tuning/index.d.ts.map +1 -1
- package/resources/fine-tuning/index.js +11 -7
- package/resources/fine-tuning/index.js.map +1 -1
- package/resources/fine-tuning/index.mjs +2 -0
- package/resources/fine-tuning/index.mjs.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.d.ts +10 -0
- package/resources/fine-tuning/jobs/checkpoints.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.js.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.mjs.map +1 -1
- package/resources/fine-tuning/jobs/jobs.d.ts +78 -145
- package/resources/fine-tuning/jobs/jobs.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/jobs.js +48 -0
- package/resources/fine-tuning/jobs/jobs.js.map +1 -1
- package/resources/fine-tuning/jobs/jobs.mjs +48 -0
- package/resources/fine-tuning/jobs/jobs.mjs.map +1 -1
- package/resources/fine-tuning/methods.d.ts +120 -0
- package/resources/fine-tuning/methods.d.ts.map +1 -0
- package/resources/fine-tuning/methods.js +9 -0
- package/resources/fine-tuning/methods.js.map +1 -0
- package/resources/fine-tuning/methods.mjs +5 -0
- package/resources/fine-tuning/methods.mjs.map +1 -0
- package/resources/graders/grader-models.d.ts +234 -0
- package/resources/graders/grader-models.d.ts.map +1 -0
- package/resources/graders/grader-models.js +9 -0
- package/resources/graders/grader-models.js.map +1 -0
- package/resources/graders/grader-models.mjs +5 -0
- package/resources/graders/grader-models.mjs.map +1 -0
- package/resources/graders/graders.d.ts +10 -0
- package/resources/graders/graders.d.ts.map +1 -0
- package/resources/graders/graders.js +39 -0
- package/resources/graders/graders.js.map +1 -0
- package/resources/graders/graders.mjs +12 -0
- package/resources/graders/graders.mjs.map +1 -0
- package/resources/graders/index.d.ts +3 -0
- package/resources/graders/index.d.ts.map +1 -0
- package/resources/graders/index.js +9 -0
- package/resources/graders/index.js.map +1 -0
- package/resources/graders/index.mjs +4 -0
- package/resources/graders/index.mjs.map +1 -0
- package/resources/graders.d.ts +2 -0
- package/resources/graders.d.ts.map +1 -0
- package/resources/graders.js +19 -0
- package/resources/graders.js.map +1 -0
- package/resources/graders.mjs +3 -0
- package/resources/graders.mjs.map +1 -0
- package/resources/images.d.ts +22 -0
- package/resources/images.d.ts.map +1 -1
- package/resources/images.js +22 -0
- package/resources/images.js.map +1 -1
- package/resources/images.mjs +22 -0
- package/resources/images.mjs.map +1 -1
- package/resources/index.d.ts +2 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -0
- package/resources/index.mjs.map +1 -1
- package/resources/responses/input-items.d.ts +10 -0
- package/resources/responses/input-items.d.ts.map +1 -1
- package/resources/responses/input-items.js.map +1 -1
- package/resources/responses/input-items.mjs.map +1 -1
- package/resources/responses/responses.d.ts +22 -0
- package/resources/responses/responses.d.ts.map +1 -1
- package/resources/responses/responses.js +7 -0
- package/resources/responses/responses.js.map +1 -1
- package/resources/responses/responses.mjs +7 -0
- package/resources/responses/responses.mjs.map +1 -1
- package/resources/shared.d.ts +1 -1
- package/resources/shared.d.ts.map +1 -1
- package/src/index.ts +5 -6
- package/src/resources/audio/speech.ts +12 -0
- package/src/resources/audio/transcriptions.ts +43 -0
- package/src/resources/audio/translations.ts +8 -0
- package/src/resources/beta/assistants.ts +36 -0
- package/src/resources/beta/realtime/sessions.ts +6 -0
- package/src/resources/beta/realtime/transcription-sessions.ts +6 -0
- package/src/resources/beta/threads/messages.ts +43 -0
- package/src/resources/beta/threads/runs/runs.ts +52 -0
- package/src/resources/beta/threads/runs/steps.ts +21 -0
- package/src/resources/beta/threads/threads.ts +33 -0
- package/src/resources/chat/completions/completions.ts +38 -0
- package/src/resources/chat/completions/messages.ts +10 -0
- package/src/resources/completions.ts +8 -0
- package/src/resources/embeddings.ts +13 -3
- package/src/resources/evals/evals.ts +194 -628
- package/src/resources/evals/index.ts +0 -3
- package/src/resources/evals/runs/runs.ts +69 -77
- package/src/resources/fine-tuning/alpha/alpha.ts +27 -0
- package/src/resources/fine-tuning/alpha/graders.ts +168 -0
- package/src/resources/fine-tuning/alpha/index.ts +10 -0
- package/src/resources/fine-tuning/alpha.ts +3 -0
- package/src/resources/fine-tuning/checkpoints/permissions.ts +28 -0
- package/src/resources/fine-tuning/fine-tuning.ts +28 -0
- package/src/resources/fine-tuning/index.ts +10 -0
- package/src/resources/fine-tuning/jobs/checkpoints.ts +10 -0
- package/src/resources/fine-tuning/jobs/jobs.ts +84 -161
- package/src/resources/fine-tuning/methods.ts +152 -0
- package/src/resources/graders/grader-models.ts +296 -0
- package/src/resources/graders/graders.ts +31 -0
- package/src/resources/graders/index.ts +12 -0
- package/src/resources/graders.ts +3 -0
- package/src/resources/images.ts +22 -0
- package/src/resources/index.ts +1 -3
- package/src/resources/responses/input-items.ts +10 -0
- package/src/resources/responses/responses.ts +22 -0
- package/src/resources/shared.ts +1 -0
- package/src/version.ts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -4,6 +4,7 @@ import { APIResource } from '../../resource';
|
|
|
4
4
|
import { isRequestOptions } from '../../core';
|
|
5
5
|
import * as Core from '../../core';
|
|
6
6
|
import * as Shared from '../shared';
|
|
7
|
+
import * as GraderModelsAPI from '../graders/grader-models';
|
|
7
8
|
import * as ResponsesAPI from '../responses/responses';
|
|
8
9
|
import * as RunsAPI from './runs/runs';
|
|
9
10
|
import {
|
|
@@ -104,88 +105,7 @@ export interface EvalCustomDataSourceConfig {
|
|
|
104
105
|
}
|
|
105
106
|
|
|
106
107
|
/**
|
|
107
|
-
*
|
|
108
|
-
* the evaluation.
|
|
109
|
-
*/
|
|
110
|
-
export interface EvalLabelModelGrader {
|
|
111
|
-
input: Array<EvalLabelModelGrader.Input>;
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* The labels to assign to each item in the evaluation.
|
|
115
|
-
*/
|
|
116
|
-
labels: Array<string>;
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* The model to use for the evaluation. Must support structured outputs.
|
|
120
|
-
*/
|
|
121
|
-
model: string;
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* The name of the grader.
|
|
125
|
-
*/
|
|
126
|
-
name: string;
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* The labels that indicate a passing result. Must be a subset of labels.
|
|
130
|
-
*/
|
|
131
|
-
passing_labels: Array<string>;
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* The object type, which is always `label_model`.
|
|
135
|
-
*/
|
|
136
|
-
type: 'label_model';
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
export namespace EvalLabelModelGrader {
|
|
140
|
-
/**
|
|
141
|
-
* A message input to the model with a role indicating instruction following
|
|
142
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
143
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
144
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
145
|
-
* interactions.
|
|
146
|
-
*/
|
|
147
|
-
export interface Input {
|
|
148
|
-
/**
|
|
149
|
-
* Text inputs to the model - can contain template strings.
|
|
150
|
-
*/
|
|
151
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
155
|
-
* `developer`.
|
|
156
|
-
*/
|
|
157
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* The type of the message input. Always `message`.
|
|
161
|
-
*/
|
|
162
|
-
type?: 'message';
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
export namespace Input {
|
|
166
|
-
/**
|
|
167
|
-
* A text output from the model.
|
|
168
|
-
*/
|
|
169
|
-
export interface OutputText {
|
|
170
|
-
/**
|
|
171
|
-
* The text output from the model.
|
|
172
|
-
*/
|
|
173
|
-
text: string;
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* The type of the output text. Always `output_text`.
|
|
177
|
-
*/
|
|
178
|
-
type: 'output_text';
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* A StoredCompletionsDataSourceConfig which specifies the metadata property of
|
|
185
|
-
* your stored completions query. This is usually metadata like `usecase=chatbot`
|
|
186
|
-
* or `prompt-version=v2`, etc. The schema returned by this data source config is
|
|
187
|
-
* used to defined what variables are available in your evals. `item` and `sample`
|
|
188
|
-
* are both defined when using this data source config.
|
|
108
|
+
* @deprecated Deprecated in favor of LogsDataSourceConfig.
|
|
189
109
|
*/
|
|
190
110
|
export interface EvalStoredCompletionsDataSourceConfig {
|
|
191
111
|
/**
|
|
@@ -195,9 +115,9 @@ export interface EvalStoredCompletionsDataSourceConfig {
|
|
|
195
115
|
schema: Record<string, unknown>;
|
|
196
116
|
|
|
197
117
|
/**
|
|
198
|
-
* The type of data source. Always `
|
|
118
|
+
* The type of data source. Always `stored-completions`.
|
|
199
119
|
*/
|
|
200
|
-
type: '
|
|
120
|
+
type: 'stored-completions';
|
|
201
121
|
|
|
202
122
|
/**
|
|
203
123
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
@@ -210,83 +130,6 @@ export interface EvalStoredCompletionsDataSourceConfig {
|
|
|
210
130
|
metadata?: Shared.Metadata | null;
|
|
211
131
|
}
|
|
212
132
|
|
|
213
|
-
/**
|
|
214
|
-
* A StringCheckGrader object that performs a string comparison between input and
|
|
215
|
-
* reference using a specified operation.
|
|
216
|
-
*/
|
|
217
|
-
export interface EvalStringCheckGrader {
|
|
218
|
-
/**
|
|
219
|
-
* The input text. This may include template strings.
|
|
220
|
-
*/
|
|
221
|
-
input: string;
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* The name of the grader.
|
|
225
|
-
*/
|
|
226
|
-
name: string;
|
|
227
|
-
|
|
228
|
-
/**
|
|
229
|
-
* The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
|
|
230
|
-
*/
|
|
231
|
-
operation: 'eq' | 'ne' | 'like' | 'ilike';
|
|
232
|
-
|
|
233
|
-
/**
|
|
234
|
-
* The reference text. This may include template strings.
|
|
235
|
-
*/
|
|
236
|
-
reference: string;
|
|
237
|
-
|
|
238
|
-
/**
|
|
239
|
-
* The object type, which is always `string_check`.
|
|
240
|
-
*/
|
|
241
|
-
type: 'string_check';
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
/**
|
|
245
|
-
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
246
|
-
*/
|
|
247
|
-
export interface EvalTextSimilarityGrader {
|
|
248
|
-
/**
|
|
249
|
-
* The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`,
|
|
250
|
-
* `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
|
|
251
|
-
*/
|
|
252
|
-
evaluation_metric:
|
|
253
|
-
| 'fuzzy_match'
|
|
254
|
-
| 'bleu'
|
|
255
|
-
| 'gleu'
|
|
256
|
-
| 'meteor'
|
|
257
|
-
| 'rouge_1'
|
|
258
|
-
| 'rouge_2'
|
|
259
|
-
| 'rouge_3'
|
|
260
|
-
| 'rouge_4'
|
|
261
|
-
| 'rouge_5'
|
|
262
|
-
| 'rouge_l';
|
|
263
|
-
|
|
264
|
-
/**
|
|
265
|
-
* The text being graded.
|
|
266
|
-
*/
|
|
267
|
-
input: string;
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* A float score where a value greater than or equal indicates a passing grade.
|
|
271
|
-
*/
|
|
272
|
-
pass_threshold: number;
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* The text being graded against.
|
|
276
|
-
*/
|
|
277
|
-
reference: string;
|
|
278
|
-
|
|
279
|
-
/**
|
|
280
|
-
* The type of grader.
|
|
281
|
-
*/
|
|
282
|
-
type: 'text_similarity';
|
|
283
|
-
|
|
284
|
-
/**
|
|
285
|
-
* The name of the grader.
|
|
286
|
-
*/
|
|
287
|
-
name?: string;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
133
|
/**
|
|
291
134
|
* An Eval object with a data source config and testing criteria. An Eval
|
|
292
135
|
* represents a task to be done for your LLM integration. Like:
|
|
@@ -309,7 +152,10 @@ export interface EvalCreateResponse {
|
|
|
309
152
|
/**
|
|
310
153
|
* Configuration of data sources used in runs of the evaluation.
|
|
311
154
|
*/
|
|
312
|
-
data_source_config:
|
|
155
|
+
data_source_config:
|
|
156
|
+
| EvalCustomDataSourceConfig
|
|
157
|
+
| EvalCreateResponse.Logs
|
|
158
|
+
| EvalStoredCompletionsDataSourceConfig;
|
|
313
159
|
|
|
314
160
|
/**
|
|
315
161
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
@@ -335,39 +181,59 @@ export interface EvalCreateResponse {
|
|
|
335
181
|
* A list of testing criteria.
|
|
336
182
|
*/
|
|
337
183
|
testing_criteria: Array<
|
|
338
|
-
|
|
|
339
|
-
|
|
|
340
|
-
|
|
|
341
|
-
| EvalCreateResponse.
|
|
342
|
-
| EvalCreateResponse.
|
|
184
|
+
| GraderModelsAPI.LabelModelGrader
|
|
185
|
+
| GraderModelsAPI.StringCheckGrader
|
|
186
|
+
| EvalCreateResponse.EvalGraderTextSimilarity
|
|
187
|
+
| EvalCreateResponse.EvalGraderPython
|
|
188
|
+
| EvalCreateResponse.EvalGraderScoreModel
|
|
343
189
|
>;
|
|
344
190
|
}
|
|
345
191
|
|
|
346
192
|
export namespace EvalCreateResponse {
|
|
347
193
|
/**
|
|
348
|
-
* A
|
|
194
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
195
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
196
|
+
* schema returned by this data source config is used to defined what variables are
|
|
197
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
198
|
+
* data source config.
|
|
349
199
|
*/
|
|
350
|
-
export interface
|
|
200
|
+
export interface Logs {
|
|
351
201
|
/**
|
|
352
|
-
* The
|
|
202
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
203
|
+
* [here](https://json-schema.org/).
|
|
353
204
|
*/
|
|
354
|
-
|
|
205
|
+
schema: Record<string, unknown>;
|
|
355
206
|
|
|
356
207
|
/**
|
|
357
|
-
* The
|
|
208
|
+
* The type of data source. Always `logs`.
|
|
358
209
|
*/
|
|
359
|
-
|
|
210
|
+
type: 'logs';
|
|
360
211
|
|
|
361
212
|
/**
|
|
362
|
-
*
|
|
213
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
214
|
+
* for storing additional information about the object in a structured format, and
|
|
215
|
+
* querying for objects via API or the dashboard.
|
|
216
|
+
*
|
|
217
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
218
|
+
* a maximum length of 512 characters.
|
|
363
219
|
*/
|
|
364
|
-
|
|
220
|
+
metadata?: Shared.Metadata | null;
|
|
221
|
+
}
|
|
365
222
|
|
|
223
|
+
/**
|
|
224
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
225
|
+
*/
|
|
226
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
366
227
|
/**
|
|
367
|
-
* The
|
|
228
|
+
* The threshold for the score.
|
|
368
229
|
*/
|
|
369
|
-
|
|
230
|
+
pass_threshold: number;
|
|
231
|
+
}
|
|
370
232
|
|
|
233
|
+
/**
|
|
234
|
+
* A PythonGrader object that runs a python script on the input.
|
|
235
|
+
*/
|
|
236
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
371
237
|
/**
|
|
372
238
|
* The threshold for the score.
|
|
373
239
|
*/
|
|
@@ -377,85 +243,11 @@ export namespace EvalCreateResponse {
|
|
|
377
243
|
/**
|
|
378
244
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
379
245
|
*/
|
|
380
|
-
export interface
|
|
381
|
-
/**
|
|
382
|
-
* The input text. This may include template strings.
|
|
383
|
-
*/
|
|
384
|
-
input: Array<ScoreModel.Input>;
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* The model to use for the evaluation.
|
|
388
|
-
*/
|
|
389
|
-
model: string;
|
|
390
|
-
|
|
391
|
-
/**
|
|
392
|
-
* The name of the grader.
|
|
393
|
-
*/
|
|
394
|
-
name: string;
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* The object type, which is always `score_model`.
|
|
398
|
-
*/
|
|
399
|
-
type: 'score_model';
|
|
400
|
-
|
|
246
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
401
247
|
/**
|
|
402
248
|
* The threshold for the score.
|
|
403
249
|
*/
|
|
404
250
|
pass_threshold?: number;
|
|
405
|
-
|
|
406
|
-
/**
|
|
407
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
408
|
-
*/
|
|
409
|
-
range?: Array<number>;
|
|
410
|
-
|
|
411
|
-
/**
|
|
412
|
-
* The sampling parameters for the model.
|
|
413
|
-
*/
|
|
414
|
-
sampling_params?: unknown;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
export namespace ScoreModel {
|
|
418
|
-
/**
|
|
419
|
-
* A message input to the model with a role indicating instruction following
|
|
420
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
421
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
422
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
423
|
-
* interactions.
|
|
424
|
-
*/
|
|
425
|
-
export interface Input {
|
|
426
|
-
/**
|
|
427
|
-
* Text inputs to the model - can contain template strings.
|
|
428
|
-
*/
|
|
429
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
430
|
-
|
|
431
|
-
/**
|
|
432
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
433
|
-
* `developer`.
|
|
434
|
-
*/
|
|
435
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
436
|
-
|
|
437
|
-
/**
|
|
438
|
-
* The type of the message input. Always `message`.
|
|
439
|
-
*/
|
|
440
|
-
type?: 'message';
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
export namespace Input {
|
|
444
|
-
/**
|
|
445
|
-
* A text output from the model.
|
|
446
|
-
*/
|
|
447
|
-
export interface OutputText {
|
|
448
|
-
/**
|
|
449
|
-
* The text output from the model.
|
|
450
|
-
*/
|
|
451
|
-
text: string;
|
|
452
|
-
|
|
453
|
-
/**
|
|
454
|
-
* The type of the output text. Always `output_text`.
|
|
455
|
-
*/
|
|
456
|
-
type: 'output_text';
|
|
457
|
-
}
|
|
458
|
-
}
|
|
459
251
|
}
|
|
460
252
|
}
|
|
461
253
|
|
|
@@ -481,7 +273,10 @@ export interface EvalRetrieveResponse {
|
|
|
481
273
|
/**
|
|
482
274
|
* Configuration of data sources used in runs of the evaluation.
|
|
483
275
|
*/
|
|
484
|
-
data_source_config:
|
|
276
|
+
data_source_config:
|
|
277
|
+
| EvalCustomDataSourceConfig
|
|
278
|
+
| EvalRetrieveResponse.Logs
|
|
279
|
+
| EvalStoredCompletionsDataSourceConfig;
|
|
485
280
|
|
|
486
281
|
/**
|
|
487
282
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
@@ -507,39 +302,59 @@ export interface EvalRetrieveResponse {
|
|
|
507
302
|
* A list of testing criteria.
|
|
508
303
|
*/
|
|
509
304
|
testing_criteria: Array<
|
|
510
|
-
|
|
|
511
|
-
|
|
|
512
|
-
|
|
|
513
|
-
| EvalRetrieveResponse.
|
|
514
|
-
| EvalRetrieveResponse.
|
|
305
|
+
| GraderModelsAPI.LabelModelGrader
|
|
306
|
+
| GraderModelsAPI.StringCheckGrader
|
|
307
|
+
| EvalRetrieveResponse.EvalGraderTextSimilarity
|
|
308
|
+
| EvalRetrieveResponse.EvalGraderPython
|
|
309
|
+
| EvalRetrieveResponse.EvalGraderScoreModel
|
|
515
310
|
>;
|
|
516
311
|
}
|
|
517
312
|
|
|
518
313
|
export namespace EvalRetrieveResponse {
|
|
519
314
|
/**
|
|
520
|
-
* A
|
|
315
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
316
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
317
|
+
* schema returned by this data source config is used to defined what variables are
|
|
318
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
319
|
+
* data source config.
|
|
521
320
|
*/
|
|
522
|
-
export interface
|
|
321
|
+
export interface Logs {
|
|
523
322
|
/**
|
|
524
|
-
* The
|
|
323
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
324
|
+
* [here](https://json-schema.org/).
|
|
525
325
|
*/
|
|
526
|
-
|
|
326
|
+
schema: Record<string, unknown>;
|
|
527
327
|
|
|
528
328
|
/**
|
|
529
|
-
* The
|
|
329
|
+
* The type of data source. Always `logs`.
|
|
530
330
|
*/
|
|
531
|
-
|
|
331
|
+
type: 'logs';
|
|
532
332
|
|
|
533
333
|
/**
|
|
534
|
-
*
|
|
334
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
335
|
+
* for storing additional information about the object in a structured format, and
|
|
336
|
+
* querying for objects via API or the dashboard.
|
|
337
|
+
*
|
|
338
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
339
|
+
* a maximum length of 512 characters.
|
|
535
340
|
*/
|
|
536
|
-
|
|
341
|
+
metadata?: Shared.Metadata | null;
|
|
342
|
+
}
|
|
537
343
|
|
|
344
|
+
/**
|
|
345
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
346
|
+
*/
|
|
347
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
538
348
|
/**
|
|
539
|
-
* The
|
|
349
|
+
* The threshold for the score.
|
|
540
350
|
*/
|
|
541
|
-
|
|
351
|
+
pass_threshold: number;
|
|
352
|
+
}
|
|
542
353
|
|
|
354
|
+
/**
|
|
355
|
+
* A PythonGrader object that runs a python script on the input.
|
|
356
|
+
*/
|
|
357
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
543
358
|
/**
|
|
544
359
|
* The threshold for the score.
|
|
545
360
|
*/
|
|
@@ -549,85 +364,11 @@ export namespace EvalRetrieveResponse {
|
|
|
549
364
|
/**
|
|
550
365
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
551
366
|
*/
|
|
552
|
-
export interface
|
|
553
|
-
/**
|
|
554
|
-
* The input text. This may include template strings.
|
|
555
|
-
*/
|
|
556
|
-
input: Array<ScoreModel.Input>;
|
|
557
|
-
|
|
558
|
-
/**
|
|
559
|
-
* The model to use for the evaluation.
|
|
560
|
-
*/
|
|
561
|
-
model: string;
|
|
562
|
-
|
|
563
|
-
/**
|
|
564
|
-
* The name of the grader.
|
|
565
|
-
*/
|
|
566
|
-
name: string;
|
|
567
|
-
|
|
568
|
-
/**
|
|
569
|
-
* The object type, which is always `score_model`.
|
|
570
|
-
*/
|
|
571
|
-
type: 'score_model';
|
|
572
|
-
|
|
367
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
573
368
|
/**
|
|
574
369
|
* The threshold for the score.
|
|
575
370
|
*/
|
|
576
371
|
pass_threshold?: number;
|
|
577
|
-
|
|
578
|
-
/**
|
|
579
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
580
|
-
*/
|
|
581
|
-
range?: Array<number>;
|
|
582
|
-
|
|
583
|
-
/**
|
|
584
|
-
* The sampling parameters for the model.
|
|
585
|
-
*/
|
|
586
|
-
sampling_params?: unknown;
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
export namespace ScoreModel {
|
|
590
|
-
/**
|
|
591
|
-
* A message input to the model with a role indicating instruction following
|
|
592
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
593
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
594
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
595
|
-
* interactions.
|
|
596
|
-
*/
|
|
597
|
-
export interface Input {
|
|
598
|
-
/**
|
|
599
|
-
* Text inputs to the model - can contain template strings.
|
|
600
|
-
*/
|
|
601
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
602
|
-
|
|
603
|
-
/**
|
|
604
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
605
|
-
* `developer`.
|
|
606
|
-
*/
|
|
607
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
608
|
-
|
|
609
|
-
/**
|
|
610
|
-
* The type of the message input. Always `message`.
|
|
611
|
-
*/
|
|
612
|
-
type?: 'message';
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
export namespace Input {
|
|
616
|
-
/**
|
|
617
|
-
* A text output from the model.
|
|
618
|
-
*/
|
|
619
|
-
export interface OutputText {
|
|
620
|
-
/**
|
|
621
|
-
* The text output from the model.
|
|
622
|
-
*/
|
|
623
|
-
text: string;
|
|
624
|
-
|
|
625
|
-
/**
|
|
626
|
-
* The type of the output text. Always `output_text`.
|
|
627
|
-
*/
|
|
628
|
-
type: 'output_text';
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
372
|
}
|
|
632
373
|
}
|
|
633
374
|
|
|
@@ -653,7 +394,10 @@ export interface EvalUpdateResponse {
|
|
|
653
394
|
/**
|
|
654
395
|
* Configuration of data sources used in runs of the evaluation.
|
|
655
396
|
*/
|
|
656
|
-
data_source_config:
|
|
397
|
+
data_source_config:
|
|
398
|
+
| EvalCustomDataSourceConfig
|
|
399
|
+
| EvalUpdateResponse.Logs
|
|
400
|
+
| EvalStoredCompletionsDataSourceConfig;
|
|
657
401
|
|
|
658
402
|
/**
|
|
659
403
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
@@ -679,39 +423,59 @@ export interface EvalUpdateResponse {
|
|
|
679
423
|
* A list of testing criteria.
|
|
680
424
|
*/
|
|
681
425
|
testing_criteria: Array<
|
|
682
|
-
|
|
|
683
|
-
|
|
|
684
|
-
|
|
|
685
|
-
| EvalUpdateResponse.
|
|
686
|
-
| EvalUpdateResponse.
|
|
426
|
+
| GraderModelsAPI.LabelModelGrader
|
|
427
|
+
| GraderModelsAPI.StringCheckGrader
|
|
428
|
+
| EvalUpdateResponse.EvalGraderTextSimilarity
|
|
429
|
+
| EvalUpdateResponse.EvalGraderPython
|
|
430
|
+
| EvalUpdateResponse.EvalGraderScoreModel
|
|
687
431
|
>;
|
|
688
432
|
}
|
|
689
433
|
|
|
690
434
|
export namespace EvalUpdateResponse {
|
|
691
435
|
/**
|
|
692
|
-
* A
|
|
436
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
437
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
438
|
+
* schema returned by this data source config is used to defined what variables are
|
|
439
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
440
|
+
* data source config.
|
|
693
441
|
*/
|
|
694
|
-
export interface
|
|
442
|
+
export interface Logs {
|
|
695
443
|
/**
|
|
696
|
-
* The
|
|
444
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
445
|
+
* [here](https://json-schema.org/).
|
|
697
446
|
*/
|
|
698
|
-
|
|
447
|
+
schema: Record<string, unknown>;
|
|
699
448
|
|
|
700
449
|
/**
|
|
701
|
-
* The
|
|
450
|
+
* The type of data source. Always `logs`.
|
|
702
451
|
*/
|
|
703
|
-
|
|
452
|
+
type: 'logs';
|
|
704
453
|
|
|
705
454
|
/**
|
|
706
|
-
*
|
|
455
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
456
|
+
* for storing additional information about the object in a structured format, and
|
|
457
|
+
* querying for objects via API or the dashboard.
|
|
458
|
+
*
|
|
459
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
460
|
+
* a maximum length of 512 characters.
|
|
707
461
|
*/
|
|
708
|
-
|
|
462
|
+
metadata?: Shared.Metadata | null;
|
|
463
|
+
}
|
|
709
464
|
|
|
465
|
+
/**
|
|
466
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
467
|
+
*/
|
|
468
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
710
469
|
/**
|
|
711
|
-
* The
|
|
470
|
+
* The threshold for the score.
|
|
712
471
|
*/
|
|
713
|
-
|
|
472
|
+
pass_threshold: number;
|
|
473
|
+
}
|
|
714
474
|
|
|
475
|
+
/**
|
|
476
|
+
* A PythonGrader object that runs a python script on the input.
|
|
477
|
+
*/
|
|
478
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
715
479
|
/**
|
|
716
480
|
* The threshold for the score.
|
|
717
481
|
*/
|
|
@@ -721,85 +485,11 @@ export namespace EvalUpdateResponse {
|
|
|
721
485
|
/**
|
|
722
486
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
723
487
|
*/
|
|
724
|
-
export interface
|
|
725
|
-
/**
|
|
726
|
-
* The input text. This may include template strings.
|
|
727
|
-
*/
|
|
728
|
-
input: Array<ScoreModel.Input>;
|
|
729
|
-
|
|
730
|
-
/**
|
|
731
|
-
* The model to use for the evaluation.
|
|
732
|
-
*/
|
|
733
|
-
model: string;
|
|
734
|
-
|
|
735
|
-
/**
|
|
736
|
-
* The name of the grader.
|
|
737
|
-
*/
|
|
738
|
-
name: string;
|
|
739
|
-
|
|
740
|
-
/**
|
|
741
|
-
* The object type, which is always `score_model`.
|
|
742
|
-
*/
|
|
743
|
-
type: 'score_model';
|
|
744
|
-
|
|
488
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
745
489
|
/**
|
|
746
490
|
* The threshold for the score.
|
|
747
491
|
*/
|
|
748
492
|
pass_threshold?: number;
|
|
749
|
-
|
|
750
|
-
/**
|
|
751
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
752
|
-
*/
|
|
753
|
-
range?: Array<number>;
|
|
754
|
-
|
|
755
|
-
/**
|
|
756
|
-
* The sampling parameters for the model.
|
|
757
|
-
*/
|
|
758
|
-
sampling_params?: unknown;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
export namespace ScoreModel {
|
|
762
|
-
/**
|
|
763
|
-
* A message input to the model with a role indicating instruction following
|
|
764
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
765
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
766
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
767
|
-
* interactions.
|
|
768
|
-
*/
|
|
769
|
-
export interface Input {
|
|
770
|
-
/**
|
|
771
|
-
* Text inputs to the model - can contain template strings.
|
|
772
|
-
*/
|
|
773
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
774
|
-
|
|
775
|
-
/**
|
|
776
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
777
|
-
* `developer`.
|
|
778
|
-
*/
|
|
779
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
780
|
-
|
|
781
|
-
/**
|
|
782
|
-
* The type of the message input. Always `message`.
|
|
783
|
-
*/
|
|
784
|
-
type?: 'message';
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
export namespace Input {
|
|
788
|
-
/**
|
|
789
|
-
* A text output from the model.
|
|
790
|
-
*/
|
|
791
|
-
export interface OutputText {
|
|
792
|
-
/**
|
|
793
|
-
* The text output from the model.
|
|
794
|
-
*/
|
|
795
|
-
text: string;
|
|
796
|
-
|
|
797
|
-
/**
|
|
798
|
-
* The type of the output text. Always `output_text`.
|
|
799
|
-
*/
|
|
800
|
-
type: 'output_text';
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
493
|
}
|
|
804
494
|
}
|
|
805
495
|
|
|
@@ -825,7 +515,10 @@ export interface EvalListResponse {
|
|
|
825
515
|
/**
|
|
826
516
|
* Configuration of data sources used in runs of the evaluation.
|
|
827
517
|
*/
|
|
828
|
-
data_source_config:
|
|
518
|
+
data_source_config:
|
|
519
|
+
| EvalCustomDataSourceConfig
|
|
520
|
+
| EvalListResponse.Logs
|
|
521
|
+
| EvalStoredCompletionsDataSourceConfig;
|
|
829
522
|
|
|
830
523
|
/**
|
|
831
524
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
@@ -851,39 +544,59 @@ export interface EvalListResponse {
|
|
|
851
544
|
* A list of testing criteria.
|
|
852
545
|
*/
|
|
853
546
|
testing_criteria: Array<
|
|
854
|
-
|
|
|
855
|
-
|
|
|
856
|
-
|
|
|
857
|
-
| EvalListResponse.
|
|
858
|
-
| EvalListResponse.
|
|
547
|
+
| GraderModelsAPI.LabelModelGrader
|
|
548
|
+
| GraderModelsAPI.StringCheckGrader
|
|
549
|
+
| EvalListResponse.EvalGraderTextSimilarity
|
|
550
|
+
| EvalListResponse.EvalGraderPython
|
|
551
|
+
| EvalListResponse.EvalGraderScoreModel
|
|
859
552
|
>;
|
|
860
553
|
}
|
|
861
554
|
|
|
862
555
|
export namespace EvalListResponse {
|
|
863
556
|
/**
|
|
864
|
-
* A
|
|
557
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
558
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
559
|
+
* schema returned by this data source config is used to defined what variables are
|
|
560
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
561
|
+
* data source config.
|
|
865
562
|
*/
|
|
866
|
-
export interface
|
|
563
|
+
export interface Logs {
|
|
867
564
|
/**
|
|
868
|
-
* The
|
|
565
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
566
|
+
* [here](https://json-schema.org/).
|
|
869
567
|
*/
|
|
870
|
-
|
|
568
|
+
schema: Record<string, unknown>;
|
|
871
569
|
|
|
872
570
|
/**
|
|
873
|
-
* The
|
|
571
|
+
* The type of data source. Always `logs`.
|
|
874
572
|
*/
|
|
875
|
-
|
|
573
|
+
type: 'logs';
|
|
876
574
|
|
|
877
575
|
/**
|
|
878
|
-
*
|
|
576
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
577
|
+
* for storing additional information about the object in a structured format, and
|
|
578
|
+
* querying for objects via API or the dashboard.
|
|
579
|
+
*
|
|
580
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
581
|
+
* a maximum length of 512 characters.
|
|
879
582
|
*/
|
|
880
|
-
|
|
583
|
+
metadata?: Shared.Metadata | null;
|
|
584
|
+
}
|
|
881
585
|
|
|
586
|
+
/**
|
|
587
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
588
|
+
*/
|
|
589
|
+
export interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
882
590
|
/**
|
|
883
|
-
* The
|
|
591
|
+
* The threshold for the score.
|
|
884
592
|
*/
|
|
885
|
-
|
|
593
|
+
pass_threshold: number;
|
|
594
|
+
}
|
|
886
595
|
|
|
596
|
+
/**
|
|
597
|
+
* A PythonGrader object that runs a python script on the input.
|
|
598
|
+
*/
|
|
599
|
+
export interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
887
600
|
/**
|
|
888
601
|
* The threshold for the score.
|
|
889
602
|
*/
|
|
@@ -893,85 +606,11 @@ export namespace EvalListResponse {
|
|
|
893
606
|
/**
|
|
894
607
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
895
608
|
*/
|
|
896
|
-
export interface
|
|
897
|
-
/**
|
|
898
|
-
* The input text. This may include template strings.
|
|
899
|
-
*/
|
|
900
|
-
input: Array<ScoreModel.Input>;
|
|
901
|
-
|
|
902
|
-
/**
|
|
903
|
-
* The model to use for the evaluation.
|
|
904
|
-
*/
|
|
905
|
-
model: string;
|
|
906
|
-
|
|
907
|
-
/**
|
|
908
|
-
* The name of the grader.
|
|
909
|
-
*/
|
|
910
|
-
name: string;
|
|
911
|
-
|
|
912
|
-
/**
|
|
913
|
-
* The object type, which is always `score_model`.
|
|
914
|
-
*/
|
|
915
|
-
type: 'score_model';
|
|
916
|
-
|
|
609
|
+
export interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
917
610
|
/**
|
|
918
611
|
* The threshold for the score.
|
|
919
612
|
*/
|
|
920
613
|
pass_threshold?: number;
|
|
921
|
-
|
|
922
|
-
/**
|
|
923
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
924
|
-
*/
|
|
925
|
-
range?: Array<number>;
|
|
926
|
-
|
|
927
|
-
/**
|
|
928
|
-
* The sampling parameters for the model.
|
|
929
|
-
*/
|
|
930
|
-
sampling_params?: unknown;
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
export namespace ScoreModel {
|
|
934
|
-
/**
|
|
935
|
-
* A message input to the model with a role indicating instruction following
|
|
936
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
937
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
938
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
939
|
-
* interactions.
|
|
940
|
-
*/
|
|
941
|
-
export interface Input {
|
|
942
|
-
/**
|
|
943
|
-
* Text inputs to the model - can contain template strings.
|
|
944
|
-
*/
|
|
945
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
946
|
-
|
|
947
|
-
/**
|
|
948
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
949
|
-
* `developer`.
|
|
950
|
-
*/
|
|
951
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
952
|
-
|
|
953
|
-
/**
|
|
954
|
-
* The type of the message input. Always `message`.
|
|
955
|
-
*/
|
|
956
|
-
type?: 'message';
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
export namespace Input {
|
|
960
|
-
/**
|
|
961
|
-
* A text output from the model.
|
|
962
|
-
*/
|
|
963
|
-
export interface OutputText {
|
|
964
|
-
/**
|
|
965
|
-
* The text output from the model.
|
|
966
|
-
*/
|
|
967
|
-
text: string;
|
|
968
|
-
|
|
969
|
-
/**
|
|
970
|
-
* The type of the output text. Always `output_text`.
|
|
971
|
-
*/
|
|
972
|
-
type: 'output_text';
|
|
973
|
-
}
|
|
974
|
-
}
|
|
975
614
|
}
|
|
976
615
|
}
|
|
977
616
|
|
|
@@ -987,15 +626,15 @@ export interface EvalCreateParams {
|
|
|
987
626
|
/**
|
|
988
627
|
* The configuration for the data source used for the evaluation runs.
|
|
989
628
|
*/
|
|
990
|
-
data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs;
|
|
629
|
+
data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs | EvalCreateParams.StoredCompletions;
|
|
991
630
|
|
|
992
631
|
/**
|
|
993
632
|
* A list of graders for all eval runs in this group.
|
|
994
633
|
*/
|
|
995
634
|
testing_criteria: Array<
|
|
996
635
|
| EvalCreateParams.LabelModel
|
|
997
|
-
|
|
|
998
|
-
|
|
|
636
|
+
| GraderModelsAPI.StringCheckGrader
|
|
637
|
+
| EvalCreateParams.TextSimilarity
|
|
999
638
|
| EvalCreateParams.Python
|
|
1000
639
|
| EvalCreateParams.ScoreModel
|
|
1001
640
|
>;
|
|
@@ -1044,9 +683,8 @@ export namespace EvalCreateParams {
|
|
|
1044
683
|
}
|
|
1045
684
|
|
|
1046
685
|
/**
|
|
1047
|
-
* A data source config which specifies the metadata property of your
|
|
1048
|
-
*
|
|
1049
|
-
* `prompt-version=v2`, etc.
|
|
686
|
+
* A data source config which specifies the metadata property of your logs query.
|
|
687
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
|
|
1050
688
|
*/
|
|
1051
689
|
export interface Logs {
|
|
1052
690
|
/**
|
|
@@ -1060,6 +698,21 @@ export namespace EvalCreateParams {
|
|
|
1060
698
|
metadata?: Record<string, unknown>;
|
|
1061
699
|
}
|
|
1062
700
|
|
|
701
|
+
/**
|
|
702
|
+
* Deprecated in favor of LogsDataSourceConfig.
|
|
703
|
+
*/
|
|
704
|
+
export interface StoredCompletions {
|
|
705
|
+
/**
|
|
706
|
+
* The type of data source. Always `stored-completions`.
|
|
707
|
+
*/
|
|
708
|
+
type: 'stored-completions';
|
|
709
|
+
|
|
710
|
+
/**
|
|
711
|
+
* Metadata filters for the stored completions data source.
|
|
712
|
+
*/
|
|
713
|
+
metadata?: Record<string, unknown>;
|
|
714
|
+
}
|
|
715
|
+
|
|
1063
716
|
/**
|
|
1064
717
|
* A LabelModelGrader object which uses a model to assign labels to each item in
|
|
1065
718
|
* the evaluation.
|
|
@@ -1154,29 +807,19 @@ export namespace EvalCreateParams {
|
|
|
1154
807
|
}
|
|
1155
808
|
|
|
1156
809
|
/**
|
|
1157
|
-
* A
|
|
810
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
1158
811
|
*/
|
|
1159
|
-
export interface
|
|
812
|
+
export interface TextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
1160
813
|
/**
|
|
1161
|
-
* The
|
|
1162
|
-
*/
|
|
1163
|
-
name: string;
|
|
1164
|
-
|
|
1165
|
-
/**
|
|
1166
|
-
* The source code of the python script.
|
|
1167
|
-
*/
|
|
1168
|
-
source: string;
|
|
1169
|
-
|
|
1170
|
-
/**
|
|
1171
|
-
* The object type, which is always `python`.
|
|
1172
|
-
*/
|
|
1173
|
-
type: 'python';
|
|
1174
|
-
|
|
1175
|
-
/**
|
|
1176
|
-
* The image tag to use for the python script.
|
|
814
|
+
* The threshold for the score.
|
|
1177
815
|
*/
|
|
1178
|
-
|
|
816
|
+
pass_threshold: number;
|
|
817
|
+
}
|
|
1179
818
|
|
|
819
|
+
/**
|
|
820
|
+
* A PythonGrader object that runs a python script on the input.
|
|
821
|
+
*/
|
|
822
|
+
export interface Python extends GraderModelsAPI.PythonGrader {
|
|
1180
823
|
/**
|
|
1181
824
|
* The threshold for the score.
|
|
1182
825
|
*/
|
|
@@ -1186,85 +829,11 @@ export namespace EvalCreateParams {
|
|
|
1186
829
|
/**
|
|
1187
830
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
1188
831
|
*/
|
|
1189
|
-
export interface ScoreModel {
|
|
1190
|
-
/**
|
|
1191
|
-
* The input text. This may include template strings.
|
|
1192
|
-
*/
|
|
1193
|
-
input: Array<ScoreModel.Input>;
|
|
1194
|
-
|
|
1195
|
-
/**
|
|
1196
|
-
* The model to use for the evaluation.
|
|
1197
|
-
*/
|
|
1198
|
-
model: string;
|
|
1199
|
-
|
|
1200
|
-
/**
|
|
1201
|
-
* The name of the grader.
|
|
1202
|
-
*/
|
|
1203
|
-
name: string;
|
|
1204
|
-
|
|
1205
|
-
/**
|
|
1206
|
-
* The object type, which is always `score_model`.
|
|
1207
|
-
*/
|
|
1208
|
-
type: 'score_model';
|
|
1209
|
-
|
|
832
|
+
export interface ScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
1210
833
|
/**
|
|
1211
834
|
* The threshold for the score.
|
|
1212
835
|
*/
|
|
1213
836
|
pass_threshold?: number;
|
|
1214
|
-
|
|
1215
|
-
/**
|
|
1216
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
1217
|
-
*/
|
|
1218
|
-
range?: Array<number>;
|
|
1219
|
-
|
|
1220
|
-
/**
|
|
1221
|
-
* The sampling parameters for the model.
|
|
1222
|
-
*/
|
|
1223
|
-
sampling_params?: unknown;
|
|
1224
|
-
}
|
|
1225
|
-
|
|
1226
|
-
export namespace ScoreModel {
|
|
1227
|
-
/**
|
|
1228
|
-
* A message input to the model with a role indicating instruction following
|
|
1229
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
1230
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
1231
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
1232
|
-
* interactions.
|
|
1233
|
-
*/
|
|
1234
|
-
export interface Input {
|
|
1235
|
-
/**
|
|
1236
|
-
* Text inputs to the model - can contain template strings.
|
|
1237
|
-
*/
|
|
1238
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
1239
|
-
|
|
1240
|
-
/**
|
|
1241
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
1242
|
-
* `developer`.
|
|
1243
|
-
*/
|
|
1244
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
1245
|
-
|
|
1246
|
-
/**
|
|
1247
|
-
* The type of the message input. Always `message`.
|
|
1248
|
-
*/
|
|
1249
|
-
type?: 'message';
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
export namespace Input {
|
|
1253
|
-
/**
|
|
1254
|
-
* A text output from the model.
|
|
1255
|
-
*/
|
|
1256
|
-
export interface OutputText {
|
|
1257
|
-
/**
|
|
1258
|
-
* The text output from the model.
|
|
1259
|
-
*/
|
|
1260
|
-
text: string;
|
|
1261
|
-
|
|
1262
|
-
/**
|
|
1263
|
-
* The type of the output text. Always `output_text`.
|
|
1264
|
-
*/
|
|
1265
|
-
type: 'output_text';
|
|
1266
|
-
}
|
|
1267
|
-
}
|
|
1268
837
|
}
|
|
1269
838
|
}
|
|
1270
839
|
|
|
@@ -1306,10 +875,7 @@ Evals.RunListResponsesPage = RunListResponsesPage;
|
|
|
1306
875
|
export declare namespace Evals {
|
|
1307
876
|
export {
|
|
1308
877
|
type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig,
|
|
1309
|
-
type EvalLabelModelGrader as EvalLabelModelGrader,
|
|
1310
878
|
type EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
|
|
1311
|
-
type EvalStringCheckGrader as EvalStringCheckGrader,
|
|
1312
|
-
type EvalTextSimilarityGrader as EvalTextSimilarityGrader,
|
|
1313
879
|
type EvalCreateResponse as EvalCreateResponse,
|
|
1314
880
|
type EvalRetrieveResponse as EvalRetrieveResponse,
|
|
1315
881
|
type EvalUpdateResponse as EvalUpdateResponse,
|