openai 4.97.0 → 4.99.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/index.d.mts +5 -2
- package/index.d.ts +5 -2
- package/index.d.ts.map +1 -1
- package/index.js +3 -0
- package/index.js.map +1 -1
- package/index.mjs +3 -0
- package/index.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/audio/speech.d.ts +12 -0
- package/resources/audio/speech.d.ts.map +1 -1
- package/resources/audio/speech.js +12 -0
- package/resources/audio/speech.js.map +1 -1
- package/resources/audio/speech.mjs +12 -0
- package/resources/audio/speech.mjs.map +1 -1
- package/resources/audio/transcriptions.d.ts +38 -0
- package/resources/audio/transcriptions.d.ts.map +1 -1
- package/resources/audio/transcriptions.js.map +1 -1
- package/resources/audio/transcriptions.mjs.map +1 -1
- package/resources/audio/translations.d.ts +8 -0
- package/resources/audio/translations.d.ts.map +1 -1
- package/resources/audio/translations.js.map +1 -1
- package/resources/audio/translations.mjs.map +1 -1
- package/resources/beta/assistants.d.ts +36 -0
- package/resources/beta/assistants.d.ts.map +1 -1
- package/resources/beta/assistants.js +28 -0
- package/resources/beta/assistants.js.map +1 -1
- package/resources/beta/assistants.mjs +28 -0
- package/resources/beta/assistants.mjs.map +1 -1
- package/resources/beta/realtime/sessions.d.ts +6 -0
- package/resources/beta/realtime/sessions.d.ts.map +1 -1
- package/resources/beta/realtime/sessions.js +6 -0
- package/resources/beta/realtime/sessions.js.map +1 -1
- package/resources/beta/realtime/sessions.mjs +6 -0
- package/resources/beta/realtime/sessions.mjs.map +1 -1
- package/resources/beta/realtime/transcription-sessions.d.ts +6 -0
- package/resources/beta/realtime/transcription-sessions.d.ts.map +1 -1
- package/resources/beta/realtime/transcription-sessions.js +6 -0
- package/resources/beta/realtime/transcription-sessions.js.map +1 -1
- package/resources/beta/realtime/transcription-sessions.mjs +6 -0
- package/resources/beta/realtime/transcription-sessions.mjs.map +1 -1
- package/resources/beta/threads/messages.d.ts +43 -0
- package/resources/beta/threads/messages.d.ts.map +1 -1
- package/resources/beta/threads/messages.js +33 -0
- package/resources/beta/threads/messages.js.map +1 -1
- package/resources/beta/threads/messages.mjs +33 -0
- package/resources/beta/threads/messages.mjs.map +1 -1
- package/resources/beta/threads/runs/runs.d.ts +52 -0
- package/resources/beta/threads/runs/runs.d.ts.map +1 -1
- package/resources/beta/threads/runs/runs.js +24 -0
- package/resources/beta/threads/runs/runs.js.map +1 -1
- package/resources/beta/threads/runs/runs.mjs +24 -0
- package/resources/beta/threads/runs/runs.mjs.map +1 -1
- package/resources/beta/threads/runs/steps.d.ts +21 -0
- package/resources/beta/threads/runs/steps.d.ts.map +1 -1
- package/resources/beta/threads/runs/steps.js.map +1 -1
- package/resources/beta/threads/runs/steps.mjs.map +1 -1
- package/resources/beta/threads/threads.d.ts +33 -0
- package/resources/beta/threads/threads.d.ts.map +1 -1
- package/resources/beta/threads/threads.js +21 -0
- package/resources/beta/threads/threads.js.map +1 -1
- package/resources/beta/threads/threads.mjs +21 -0
- package/resources/beta/threads/threads.mjs.map +1 -1
- package/resources/chat/completions/completions.d.ts +38 -0
- package/resources/chat/completions/completions.d.ts.map +1 -1
- package/resources/chat/completions/completions.js +20 -0
- package/resources/chat/completions/completions.js.map +1 -1
- package/resources/chat/completions/completions.mjs +20 -0
- package/resources/chat/completions/completions.mjs.map +1 -1
- package/resources/chat/completions/messages.d.ts +10 -0
- package/resources/chat/completions/messages.d.ts.map +1 -1
- package/resources/chat/completions/messages.js.map +1 -1
- package/resources/chat/completions/messages.mjs.map +1 -1
- package/resources/completions.d.ts +8 -0
- package/resources/completions.d.ts.map +1 -1
- package/resources/completions.js.map +1 -1
- package/resources/completions.mjs.map +1 -1
- package/resources/embeddings.d.ts +13 -3
- package/resources/embeddings.d.ts.map +1 -1
- package/resources/embeddings.js +9 -0
- package/resources/embeddings.js.map +1 -1
- package/resources/embeddings.mjs +9 -0
- package/resources/embeddings.mjs.map +1 -1
- package/resources/evals/evals.d.ts +164 -519
- package/resources/evals/evals.d.ts.map +1 -1
- package/resources/evals/evals.js.map +1 -1
- package/resources/evals/evals.mjs.map +1 -1
- package/resources/evals/index.d.ts +1 -1
- package/resources/evals/index.d.ts.map +1 -1
- package/resources/evals/index.js.map +1 -1
- package/resources/evals/index.mjs.map +1 -1
- package/resources/evals/runs/runs.d.ts +64 -69
- package/resources/evals/runs/runs.d.ts.map +1 -1
- package/resources/evals/runs/runs.js.map +1 -1
- package/resources/evals/runs/runs.mjs.map +1 -1
- package/resources/fine-tuning/alpha/alpha.d.ts +10 -0
- package/resources/fine-tuning/alpha/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/alpha.js +39 -0
- package/resources/fine-tuning/alpha/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha/alpha.mjs +12 -0
- package/resources/fine-tuning/alpha/alpha.mjs.map +1 -0
- package/resources/fine-tuning/alpha/graders.d.ts +107 -0
- package/resources/fine-tuning/alpha/graders.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/graders.js +50 -0
- package/resources/fine-tuning/alpha/graders.js.map +1 -0
- package/resources/fine-tuning/alpha/graders.mjs +46 -0
- package/resources/fine-tuning/alpha/graders.mjs.map +1 -0
- package/resources/fine-tuning/alpha/index.d.ts +3 -0
- package/resources/fine-tuning/alpha/index.d.ts.map +1 -0
- package/resources/fine-tuning/alpha/index.js +9 -0
- package/resources/fine-tuning/alpha/index.js.map +1 -0
- package/resources/fine-tuning/alpha/index.mjs +4 -0
- package/resources/fine-tuning/alpha/index.mjs.map +1 -0
- package/resources/fine-tuning/alpha.d.ts +2 -0
- package/resources/fine-tuning/alpha.d.ts.map +1 -0
- package/resources/fine-tuning/alpha.js +19 -0
- package/resources/fine-tuning/alpha.js.map +1 -0
- package/resources/fine-tuning/alpha.mjs +3 -0
- package/resources/fine-tuning/alpha.mjs.map +1 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts +28 -0
- package/resources/fine-tuning/checkpoints/permissions.d.ts.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.js +20 -0
- package/resources/fine-tuning/checkpoints/permissions.js.map +1 -1
- package/resources/fine-tuning/checkpoints/permissions.mjs +20 -0
- package/resources/fine-tuning/checkpoints/permissions.mjs.map +1 -1
- package/resources/fine-tuning/fine-tuning.d.ts +8 -0
- package/resources/fine-tuning/fine-tuning.d.ts.map +1 -1
- package/resources/fine-tuning/fine-tuning.js +8 -0
- package/resources/fine-tuning/fine-tuning.js.map +1 -1
- package/resources/fine-tuning/fine-tuning.mjs +8 -0
- package/resources/fine-tuning/fine-tuning.mjs.map +1 -1
- package/resources/fine-tuning/index.d.ts +2 -0
- package/resources/fine-tuning/index.d.ts.map +1 -1
- package/resources/fine-tuning/index.js +11 -7
- package/resources/fine-tuning/index.js.map +1 -1
- package/resources/fine-tuning/index.mjs +2 -0
- package/resources/fine-tuning/index.mjs.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.d.ts +10 -0
- package/resources/fine-tuning/jobs/checkpoints.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.js.map +1 -1
- package/resources/fine-tuning/jobs/checkpoints.mjs.map +1 -1
- package/resources/fine-tuning/jobs/jobs.d.ts +78 -145
- package/resources/fine-tuning/jobs/jobs.d.ts.map +1 -1
- package/resources/fine-tuning/jobs/jobs.js +48 -0
- package/resources/fine-tuning/jobs/jobs.js.map +1 -1
- package/resources/fine-tuning/jobs/jobs.mjs +48 -0
- package/resources/fine-tuning/jobs/jobs.mjs.map +1 -1
- package/resources/fine-tuning/methods.d.ts +120 -0
- package/resources/fine-tuning/methods.d.ts.map +1 -0
- package/resources/fine-tuning/methods.js +9 -0
- package/resources/fine-tuning/methods.js.map +1 -0
- package/resources/fine-tuning/methods.mjs +5 -0
- package/resources/fine-tuning/methods.mjs.map +1 -0
- package/resources/graders/grader-models.d.ts +234 -0
- package/resources/graders/grader-models.d.ts.map +1 -0
- package/resources/graders/grader-models.js +9 -0
- package/resources/graders/grader-models.js.map +1 -0
- package/resources/graders/grader-models.mjs +5 -0
- package/resources/graders/grader-models.mjs.map +1 -0
- package/resources/graders/graders.d.ts +10 -0
- package/resources/graders/graders.d.ts.map +1 -0
- package/resources/graders/graders.js +39 -0
- package/resources/graders/graders.js.map +1 -0
- package/resources/graders/graders.mjs +12 -0
- package/resources/graders/graders.mjs.map +1 -0
- package/resources/graders/index.d.ts +3 -0
- package/resources/graders/index.d.ts.map +1 -0
- package/resources/graders/index.js +9 -0
- package/resources/graders/index.js.map +1 -0
- package/resources/graders/index.mjs +4 -0
- package/resources/graders/index.mjs.map +1 -0
- package/resources/graders.d.ts +2 -0
- package/resources/graders.d.ts.map +1 -0
- package/resources/graders.js +19 -0
- package/resources/graders.js.map +1 -0
- package/resources/graders.mjs +3 -0
- package/resources/graders.mjs.map +1 -0
- package/resources/images.d.ts +22 -0
- package/resources/images.d.ts.map +1 -1
- package/resources/images.js +22 -0
- package/resources/images.js.map +1 -1
- package/resources/images.mjs +22 -0
- package/resources/images.mjs.map +1 -1
- package/resources/index.d.ts +2 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -0
- package/resources/index.mjs.map +1 -1
- package/resources/responses/input-items.d.ts +10 -0
- package/resources/responses/input-items.d.ts.map +1 -1
- package/resources/responses/input-items.js.map +1 -1
- package/resources/responses/input-items.mjs.map +1 -1
- package/resources/responses/responses.d.ts +22 -0
- package/resources/responses/responses.d.ts.map +1 -1
- package/resources/responses/responses.js +7 -0
- package/resources/responses/responses.js.map +1 -1
- package/resources/responses/responses.mjs +7 -0
- package/resources/responses/responses.mjs.map +1 -1
- package/resources/shared.d.ts +1 -1
- package/resources/shared.d.ts.map +1 -1
- package/src/index.ts +5 -6
- package/src/resources/audio/speech.ts +12 -0
- package/src/resources/audio/transcriptions.ts +43 -0
- package/src/resources/audio/translations.ts +8 -0
- package/src/resources/beta/assistants.ts +36 -0
- package/src/resources/beta/realtime/sessions.ts +6 -0
- package/src/resources/beta/realtime/transcription-sessions.ts +6 -0
- package/src/resources/beta/threads/messages.ts +43 -0
- package/src/resources/beta/threads/runs/runs.ts +52 -0
- package/src/resources/beta/threads/runs/steps.ts +21 -0
- package/src/resources/beta/threads/threads.ts +33 -0
- package/src/resources/chat/completions/completions.ts +38 -0
- package/src/resources/chat/completions/messages.ts +10 -0
- package/src/resources/completions.ts +8 -0
- package/src/resources/embeddings.ts +13 -3
- package/src/resources/evals/evals.ts +194 -628
- package/src/resources/evals/index.ts +0 -3
- package/src/resources/evals/runs/runs.ts +69 -77
- package/src/resources/fine-tuning/alpha/alpha.ts +27 -0
- package/src/resources/fine-tuning/alpha/graders.ts +168 -0
- package/src/resources/fine-tuning/alpha/index.ts +10 -0
- package/src/resources/fine-tuning/alpha.ts +3 -0
- package/src/resources/fine-tuning/checkpoints/permissions.ts +28 -0
- package/src/resources/fine-tuning/fine-tuning.ts +28 -0
- package/src/resources/fine-tuning/index.ts +10 -0
- package/src/resources/fine-tuning/jobs/checkpoints.ts +10 -0
- package/src/resources/fine-tuning/jobs/jobs.ts +84 -161
- package/src/resources/fine-tuning/methods.ts +152 -0
- package/src/resources/graders/grader-models.ts +296 -0
- package/src/resources/graders/graders.ts +31 -0
- package/src/resources/graders/index.ts +12 -0
- package/src/resources/graders.ts +3 -0
- package/src/resources/images.ts +22 -0
- package/src/resources/index.ts +1 -3
- package/src/resources/responses/input-items.ts +10 -0
- package/src/resources/responses/responses.ts +22 -0
- package/src/resources/shared.ts +1 -0
- package/src/version.ts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { APIResource } from "../../resource.js";
|
|
2
2
|
import * as Core from "../../core.js";
|
|
3
3
|
import * as Shared from "../shared.js";
|
|
4
|
+
import * as GraderModelsAPI from "../graders/grader-models.js";
|
|
4
5
|
import * as ResponsesAPI from "../responses/responses.js";
|
|
5
6
|
import * as RunsAPI from "./runs/runs.js";
|
|
6
7
|
import { CreateEvalCompletionsRunDataSource, CreateEvalJSONLRunDataSource, EvalAPIError, RunCancelResponse, RunCreateParams, RunCreateResponse, RunDeleteResponse, RunListParams, RunListResponse, RunListResponsesPage, RunRetrieveResponse, Runs } from "./runs/runs.js";
|
|
@@ -55,77 +56,7 @@ export interface EvalCustomDataSourceConfig {
|
|
|
55
56
|
type: 'custom';
|
|
56
57
|
}
|
|
57
58
|
/**
|
|
58
|
-
*
|
|
59
|
-
* the evaluation.
|
|
60
|
-
*/
|
|
61
|
-
export interface EvalLabelModelGrader {
|
|
62
|
-
input: Array<EvalLabelModelGrader.Input>;
|
|
63
|
-
/**
|
|
64
|
-
* The labels to assign to each item in the evaluation.
|
|
65
|
-
*/
|
|
66
|
-
labels: Array<string>;
|
|
67
|
-
/**
|
|
68
|
-
* The model to use for the evaluation. Must support structured outputs.
|
|
69
|
-
*/
|
|
70
|
-
model: string;
|
|
71
|
-
/**
|
|
72
|
-
* The name of the grader.
|
|
73
|
-
*/
|
|
74
|
-
name: string;
|
|
75
|
-
/**
|
|
76
|
-
* The labels that indicate a passing result. Must be a subset of labels.
|
|
77
|
-
*/
|
|
78
|
-
passing_labels: Array<string>;
|
|
79
|
-
/**
|
|
80
|
-
* The object type, which is always `label_model`.
|
|
81
|
-
*/
|
|
82
|
-
type: 'label_model';
|
|
83
|
-
}
|
|
84
|
-
export declare namespace EvalLabelModelGrader {
|
|
85
|
-
/**
|
|
86
|
-
* A message input to the model with a role indicating instruction following
|
|
87
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
88
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
89
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
90
|
-
* interactions.
|
|
91
|
-
*/
|
|
92
|
-
interface Input {
|
|
93
|
-
/**
|
|
94
|
-
* Text inputs to the model - can contain template strings.
|
|
95
|
-
*/
|
|
96
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
97
|
-
/**
|
|
98
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
99
|
-
* `developer`.
|
|
100
|
-
*/
|
|
101
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
102
|
-
/**
|
|
103
|
-
* The type of the message input. Always `message`.
|
|
104
|
-
*/
|
|
105
|
-
type?: 'message';
|
|
106
|
-
}
|
|
107
|
-
namespace Input {
|
|
108
|
-
/**
|
|
109
|
-
* A text output from the model.
|
|
110
|
-
*/
|
|
111
|
-
interface OutputText {
|
|
112
|
-
/**
|
|
113
|
-
* The text output from the model.
|
|
114
|
-
*/
|
|
115
|
-
text: string;
|
|
116
|
-
/**
|
|
117
|
-
* The type of the output text. Always `output_text`.
|
|
118
|
-
*/
|
|
119
|
-
type: 'output_text';
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
/**
|
|
124
|
-
* A StoredCompletionsDataSourceConfig which specifies the metadata property of
|
|
125
|
-
* your stored completions query. This is usually metadata like `usecase=chatbot`
|
|
126
|
-
* or `prompt-version=v2`, etc. The schema returned by this data source config is
|
|
127
|
-
* used to defined what variables are available in your evals. `item` and `sample`
|
|
128
|
-
* are both defined when using this data source config.
|
|
59
|
+
* @deprecated Deprecated in favor of LogsDataSourceConfig.
|
|
129
60
|
*/
|
|
130
61
|
export interface EvalStoredCompletionsDataSourceConfig {
|
|
131
62
|
/**
|
|
@@ -134,9 +65,9 @@ export interface EvalStoredCompletionsDataSourceConfig {
|
|
|
134
65
|
*/
|
|
135
66
|
schema: Record<string, unknown>;
|
|
136
67
|
/**
|
|
137
|
-
* The type of data source. Always `
|
|
68
|
+
* The type of data source. Always `stored-completions`.
|
|
138
69
|
*/
|
|
139
|
-
type: '
|
|
70
|
+
type: 'stored-completions';
|
|
140
71
|
/**
|
|
141
72
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
142
73
|
* for storing additional information about the object in a structured format, and
|
|
@@ -147,62 +78,6 @@ export interface EvalStoredCompletionsDataSourceConfig {
|
|
|
147
78
|
*/
|
|
148
79
|
metadata?: Shared.Metadata | null;
|
|
149
80
|
}
|
|
150
|
-
/**
|
|
151
|
-
* A StringCheckGrader object that performs a string comparison between input and
|
|
152
|
-
* reference using a specified operation.
|
|
153
|
-
*/
|
|
154
|
-
export interface EvalStringCheckGrader {
|
|
155
|
-
/**
|
|
156
|
-
* The input text. This may include template strings.
|
|
157
|
-
*/
|
|
158
|
-
input: string;
|
|
159
|
-
/**
|
|
160
|
-
* The name of the grader.
|
|
161
|
-
*/
|
|
162
|
-
name: string;
|
|
163
|
-
/**
|
|
164
|
-
* The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
|
|
165
|
-
*/
|
|
166
|
-
operation: 'eq' | 'ne' | 'like' | 'ilike';
|
|
167
|
-
/**
|
|
168
|
-
* The reference text. This may include template strings.
|
|
169
|
-
*/
|
|
170
|
-
reference: string;
|
|
171
|
-
/**
|
|
172
|
-
* The object type, which is always `string_check`.
|
|
173
|
-
*/
|
|
174
|
-
type: 'string_check';
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
178
|
-
*/
|
|
179
|
-
export interface EvalTextSimilarityGrader {
|
|
180
|
-
/**
|
|
181
|
-
* The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`,
|
|
182
|
-
* `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
|
|
183
|
-
*/
|
|
184
|
-
evaluation_metric: 'fuzzy_match' | 'bleu' | 'gleu' | 'meteor' | 'rouge_1' | 'rouge_2' | 'rouge_3' | 'rouge_4' | 'rouge_5' | 'rouge_l';
|
|
185
|
-
/**
|
|
186
|
-
* The text being graded.
|
|
187
|
-
*/
|
|
188
|
-
input: string;
|
|
189
|
-
/**
|
|
190
|
-
* A float score where a value greater than or equal indicates a passing grade.
|
|
191
|
-
*/
|
|
192
|
-
pass_threshold: number;
|
|
193
|
-
/**
|
|
194
|
-
* The text being graded against.
|
|
195
|
-
*/
|
|
196
|
-
reference: string;
|
|
197
|
-
/**
|
|
198
|
-
* The type of grader.
|
|
199
|
-
*/
|
|
200
|
-
type: 'text_similarity';
|
|
201
|
-
/**
|
|
202
|
-
* The name of the grader.
|
|
203
|
-
*/
|
|
204
|
-
name?: string;
|
|
205
|
-
}
|
|
206
81
|
/**
|
|
207
82
|
* An Eval object with a data source config and testing criteria. An Eval
|
|
208
83
|
* represents a task to be done for your LLM integration. Like:
|
|
@@ -223,7 +98,7 @@ export interface EvalCreateResponse {
|
|
|
223
98
|
/**
|
|
224
99
|
* Configuration of data sources used in runs of the evaluation.
|
|
225
100
|
*/
|
|
226
|
-
data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig;
|
|
101
|
+
data_source_config: EvalCustomDataSourceConfig | EvalCreateResponse.Logs | EvalStoredCompletionsDataSourceConfig;
|
|
227
102
|
/**
|
|
228
103
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
229
104
|
* for storing additional information about the object in a structured format, and
|
|
@@ -244,29 +119,49 @@ export interface EvalCreateResponse {
|
|
|
244
119
|
/**
|
|
245
120
|
* A list of testing criteria.
|
|
246
121
|
*/
|
|
247
|
-
testing_criteria: Array<
|
|
122
|
+
testing_criteria: Array<GraderModelsAPI.LabelModelGrader | GraderModelsAPI.StringCheckGrader | EvalCreateResponse.EvalGraderTextSimilarity | EvalCreateResponse.EvalGraderPython | EvalCreateResponse.EvalGraderScoreModel>;
|
|
248
123
|
}
|
|
249
124
|
export declare namespace EvalCreateResponse {
|
|
250
125
|
/**
|
|
251
|
-
* A
|
|
126
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
127
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
128
|
+
* schema returned by this data source config is used to defined what variables are
|
|
129
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
130
|
+
* data source config.
|
|
252
131
|
*/
|
|
253
|
-
interface
|
|
132
|
+
interface Logs {
|
|
254
133
|
/**
|
|
255
|
-
* The
|
|
134
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
135
|
+
* [here](https://json-schema.org/).
|
|
256
136
|
*/
|
|
257
|
-
|
|
137
|
+
schema: Record<string, unknown>;
|
|
258
138
|
/**
|
|
259
|
-
* The
|
|
139
|
+
* The type of data source. Always `logs`.
|
|
260
140
|
*/
|
|
261
|
-
|
|
141
|
+
type: 'logs';
|
|
262
142
|
/**
|
|
263
|
-
*
|
|
143
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
144
|
+
* for storing additional information about the object in a structured format, and
|
|
145
|
+
* querying for objects via API or the dashboard.
|
|
146
|
+
*
|
|
147
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
148
|
+
* a maximum length of 512 characters.
|
|
264
149
|
*/
|
|
265
|
-
|
|
150
|
+
metadata?: Shared.Metadata | null;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
154
|
+
*/
|
|
155
|
+
interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
266
156
|
/**
|
|
267
|
-
* The
|
|
157
|
+
* The threshold for the score.
|
|
268
158
|
*/
|
|
269
|
-
|
|
159
|
+
pass_threshold: number;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* A PythonGrader object that runs a python script on the input.
|
|
163
|
+
*/
|
|
164
|
+
interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
270
165
|
/**
|
|
271
166
|
* The threshold for the score.
|
|
272
167
|
*/
|
|
@@ -275,74 +170,11 @@ export declare namespace EvalCreateResponse {
|
|
|
275
170
|
/**
|
|
276
171
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
277
172
|
*/
|
|
278
|
-
interface
|
|
279
|
-
/**
|
|
280
|
-
* The input text. This may include template strings.
|
|
281
|
-
*/
|
|
282
|
-
input: Array<ScoreModel.Input>;
|
|
283
|
-
/**
|
|
284
|
-
* The model to use for the evaluation.
|
|
285
|
-
*/
|
|
286
|
-
model: string;
|
|
287
|
-
/**
|
|
288
|
-
* The name of the grader.
|
|
289
|
-
*/
|
|
290
|
-
name: string;
|
|
291
|
-
/**
|
|
292
|
-
* The object type, which is always `score_model`.
|
|
293
|
-
*/
|
|
294
|
-
type: 'score_model';
|
|
173
|
+
interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
295
174
|
/**
|
|
296
175
|
* The threshold for the score.
|
|
297
176
|
*/
|
|
298
177
|
pass_threshold?: number;
|
|
299
|
-
/**
|
|
300
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
301
|
-
*/
|
|
302
|
-
range?: Array<number>;
|
|
303
|
-
/**
|
|
304
|
-
* The sampling parameters for the model.
|
|
305
|
-
*/
|
|
306
|
-
sampling_params?: unknown;
|
|
307
|
-
}
|
|
308
|
-
namespace ScoreModel {
|
|
309
|
-
/**
|
|
310
|
-
* A message input to the model with a role indicating instruction following
|
|
311
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
312
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
313
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
314
|
-
* interactions.
|
|
315
|
-
*/
|
|
316
|
-
interface Input {
|
|
317
|
-
/**
|
|
318
|
-
* Text inputs to the model - can contain template strings.
|
|
319
|
-
*/
|
|
320
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
321
|
-
/**
|
|
322
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
323
|
-
* `developer`.
|
|
324
|
-
*/
|
|
325
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
326
|
-
/**
|
|
327
|
-
* The type of the message input. Always `message`.
|
|
328
|
-
*/
|
|
329
|
-
type?: 'message';
|
|
330
|
-
}
|
|
331
|
-
namespace Input {
|
|
332
|
-
/**
|
|
333
|
-
* A text output from the model.
|
|
334
|
-
*/
|
|
335
|
-
interface OutputText {
|
|
336
|
-
/**
|
|
337
|
-
* The text output from the model.
|
|
338
|
-
*/
|
|
339
|
-
text: string;
|
|
340
|
-
/**
|
|
341
|
-
* The type of the output text. Always `output_text`.
|
|
342
|
-
*/
|
|
343
|
-
type: 'output_text';
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
178
|
}
|
|
347
179
|
}
|
|
348
180
|
/**
|
|
@@ -365,7 +197,7 @@ export interface EvalRetrieveResponse {
|
|
|
365
197
|
/**
|
|
366
198
|
* Configuration of data sources used in runs of the evaluation.
|
|
367
199
|
*/
|
|
368
|
-
data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig;
|
|
200
|
+
data_source_config: EvalCustomDataSourceConfig | EvalRetrieveResponse.Logs | EvalStoredCompletionsDataSourceConfig;
|
|
369
201
|
/**
|
|
370
202
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
371
203
|
* for storing additional information about the object in a structured format, and
|
|
@@ -386,29 +218,49 @@ export interface EvalRetrieveResponse {
|
|
|
386
218
|
/**
|
|
387
219
|
* A list of testing criteria.
|
|
388
220
|
*/
|
|
389
|
-
testing_criteria: Array<
|
|
221
|
+
testing_criteria: Array<GraderModelsAPI.LabelModelGrader | GraderModelsAPI.StringCheckGrader | EvalRetrieveResponse.EvalGraderTextSimilarity | EvalRetrieveResponse.EvalGraderPython | EvalRetrieveResponse.EvalGraderScoreModel>;
|
|
390
222
|
}
|
|
391
223
|
export declare namespace EvalRetrieveResponse {
|
|
392
224
|
/**
|
|
393
|
-
* A
|
|
225
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
226
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
227
|
+
* schema returned by this data source config is used to defined what variables are
|
|
228
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
229
|
+
* data source config.
|
|
394
230
|
*/
|
|
395
|
-
interface
|
|
231
|
+
interface Logs {
|
|
396
232
|
/**
|
|
397
|
-
* The
|
|
233
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
234
|
+
* [here](https://json-schema.org/).
|
|
398
235
|
*/
|
|
399
|
-
|
|
236
|
+
schema: Record<string, unknown>;
|
|
400
237
|
/**
|
|
401
|
-
* The
|
|
238
|
+
* The type of data source. Always `logs`.
|
|
402
239
|
*/
|
|
403
|
-
|
|
240
|
+
type: 'logs';
|
|
404
241
|
/**
|
|
405
|
-
*
|
|
242
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
243
|
+
* for storing additional information about the object in a structured format, and
|
|
244
|
+
* querying for objects via API or the dashboard.
|
|
245
|
+
*
|
|
246
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
247
|
+
* a maximum length of 512 characters.
|
|
406
248
|
*/
|
|
407
|
-
|
|
249
|
+
metadata?: Shared.Metadata | null;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
253
|
+
*/
|
|
254
|
+
interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
408
255
|
/**
|
|
409
|
-
* The
|
|
256
|
+
* The threshold for the score.
|
|
410
257
|
*/
|
|
411
|
-
|
|
258
|
+
pass_threshold: number;
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* A PythonGrader object that runs a python script on the input.
|
|
262
|
+
*/
|
|
263
|
+
interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
412
264
|
/**
|
|
413
265
|
* The threshold for the score.
|
|
414
266
|
*/
|
|
@@ -417,74 +269,11 @@ export declare namespace EvalRetrieveResponse {
|
|
|
417
269
|
/**
|
|
418
270
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
419
271
|
*/
|
|
420
|
-
interface
|
|
421
|
-
/**
|
|
422
|
-
* The input text. This may include template strings.
|
|
423
|
-
*/
|
|
424
|
-
input: Array<ScoreModel.Input>;
|
|
425
|
-
/**
|
|
426
|
-
* The model to use for the evaluation.
|
|
427
|
-
*/
|
|
428
|
-
model: string;
|
|
429
|
-
/**
|
|
430
|
-
* The name of the grader.
|
|
431
|
-
*/
|
|
432
|
-
name: string;
|
|
433
|
-
/**
|
|
434
|
-
* The object type, which is always `score_model`.
|
|
435
|
-
*/
|
|
436
|
-
type: 'score_model';
|
|
272
|
+
interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
437
273
|
/**
|
|
438
274
|
* The threshold for the score.
|
|
439
275
|
*/
|
|
440
276
|
pass_threshold?: number;
|
|
441
|
-
/**
|
|
442
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
443
|
-
*/
|
|
444
|
-
range?: Array<number>;
|
|
445
|
-
/**
|
|
446
|
-
* The sampling parameters for the model.
|
|
447
|
-
*/
|
|
448
|
-
sampling_params?: unknown;
|
|
449
|
-
}
|
|
450
|
-
namespace ScoreModel {
|
|
451
|
-
/**
|
|
452
|
-
* A message input to the model with a role indicating instruction following
|
|
453
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
454
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
455
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
456
|
-
* interactions.
|
|
457
|
-
*/
|
|
458
|
-
interface Input {
|
|
459
|
-
/**
|
|
460
|
-
* Text inputs to the model - can contain template strings.
|
|
461
|
-
*/
|
|
462
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
463
|
-
/**
|
|
464
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
465
|
-
* `developer`.
|
|
466
|
-
*/
|
|
467
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
468
|
-
/**
|
|
469
|
-
* The type of the message input. Always `message`.
|
|
470
|
-
*/
|
|
471
|
-
type?: 'message';
|
|
472
|
-
}
|
|
473
|
-
namespace Input {
|
|
474
|
-
/**
|
|
475
|
-
* A text output from the model.
|
|
476
|
-
*/
|
|
477
|
-
interface OutputText {
|
|
478
|
-
/**
|
|
479
|
-
* The text output from the model.
|
|
480
|
-
*/
|
|
481
|
-
text: string;
|
|
482
|
-
/**
|
|
483
|
-
* The type of the output text. Always `output_text`.
|
|
484
|
-
*/
|
|
485
|
-
type: 'output_text';
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
277
|
}
|
|
489
278
|
}
|
|
490
279
|
/**
|
|
@@ -507,7 +296,7 @@ export interface EvalUpdateResponse {
|
|
|
507
296
|
/**
|
|
508
297
|
* Configuration of data sources used in runs of the evaluation.
|
|
509
298
|
*/
|
|
510
|
-
data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig;
|
|
299
|
+
data_source_config: EvalCustomDataSourceConfig | EvalUpdateResponse.Logs | EvalStoredCompletionsDataSourceConfig;
|
|
511
300
|
/**
|
|
512
301
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
513
302
|
* for storing additional information about the object in a structured format, and
|
|
@@ -528,29 +317,49 @@ export interface EvalUpdateResponse {
|
|
|
528
317
|
/**
|
|
529
318
|
* A list of testing criteria.
|
|
530
319
|
*/
|
|
531
|
-
testing_criteria: Array<
|
|
320
|
+
testing_criteria: Array<GraderModelsAPI.LabelModelGrader | GraderModelsAPI.StringCheckGrader | EvalUpdateResponse.EvalGraderTextSimilarity | EvalUpdateResponse.EvalGraderPython | EvalUpdateResponse.EvalGraderScoreModel>;
|
|
532
321
|
}
|
|
533
322
|
export declare namespace EvalUpdateResponse {
|
|
534
323
|
/**
|
|
535
|
-
* A
|
|
324
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
325
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
326
|
+
* schema returned by this data source config is used to defined what variables are
|
|
327
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
328
|
+
* data source config.
|
|
536
329
|
*/
|
|
537
|
-
interface
|
|
330
|
+
interface Logs {
|
|
538
331
|
/**
|
|
539
|
-
* The
|
|
332
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
333
|
+
* [here](https://json-schema.org/).
|
|
540
334
|
*/
|
|
541
|
-
|
|
335
|
+
schema: Record<string, unknown>;
|
|
542
336
|
/**
|
|
543
|
-
* The
|
|
337
|
+
* The type of data source. Always `logs`.
|
|
544
338
|
*/
|
|
545
|
-
|
|
339
|
+
type: 'logs';
|
|
546
340
|
/**
|
|
547
|
-
*
|
|
341
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
342
|
+
* for storing additional information about the object in a structured format, and
|
|
343
|
+
* querying for objects via API or the dashboard.
|
|
344
|
+
*
|
|
345
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
346
|
+
* a maximum length of 512 characters.
|
|
548
347
|
*/
|
|
549
|
-
|
|
348
|
+
metadata?: Shared.Metadata | null;
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
352
|
+
*/
|
|
353
|
+
interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
550
354
|
/**
|
|
551
|
-
* The
|
|
355
|
+
* The threshold for the score.
|
|
552
356
|
*/
|
|
553
|
-
|
|
357
|
+
pass_threshold: number;
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* A PythonGrader object that runs a python script on the input.
|
|
361
|
+
*/
|
|
362
|
+
interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
554
363
|
/**
|
|
555
364
|
* The threshold for the score.
|
|
556
365
|
*/
|
|
@@ -559,74 +368,11 @@ export declare namespace EvalUpdateResponse {
|
|
|
559
368
|
/**
|
|
560
369
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
561
370
|
*/
|
|
562
|
-
interface
|
|
563
|
-
/**
|
|
564
|
-
* The input text. This may include template strings.
|
|
565
|
-
*/
|
|
566
|
-
input: Array<ScoreModel.Input>;
|
|
567
|
-
/**
|
|
568
|
-
* The model to use for the evaluation.
|
|
569
|
-
*/
|
|
570
|
-
model: string;
|
|
571
|
-
/**
|
|
572
|
-
* The name of the grader.
|
|
573
|
-
*/
|
|
574
|
-
name: string;
|
|
575
|
-
/**
|
|
576
|
-
* The object type, which is always `score_model`.
|
|
577
|
-
*/
|
|
578
|
-
type: 'score_model';
|
|
371
|
+
interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
579
372
|
/**
|
|
580
373
|
* The threshold for the score.
|
|
581
374
|
*/
|
|
582
375
|
pass_threshold?: number;
|
|
583
|
-
/**
|
|
584
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
585
|
-
*/
|
|
586
|
-
range?: Array<number>;
|
|
587
|
-
/**
|
|
588
|
-
* The sampling parameters for the model.
|
|
589
|
-
*/
|
|
590
|
-
sampling_params?: unknown;
|
|
591
|
-
}
|
|
592
|
-
namespace ScoreModel {
|
|
593
|
-
/**
|
|
594
|
-
* A message input to the model with a role indicating instruction following
|
|
595
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
596
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
597
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
598
|
-
* interactions.
|
|
599
|
-
*/
|
|
600
|
-
interface Input {
|
|
601
|
-
/**
|
|
602
|
-
* Text inputs to the model - can contain template strings.
|
|
603
|
-
*/
|
|
604
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
605
|
-
/**
|
|
606
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
607
|
-
* `developer`.
|
|
608
|
-
*/
|
|
609
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
610
|
-
/**
|
|
611
|
-
* The type of the message input. Always `message`.
|
|
612
|
-
*/
|
|
613
|
-
type?: 'message';
|
|
614
|
-
}
|
|
615
|
-
namespace Input {
|
|
616
|
-
/**
|
|
617
|
-
* A text output from the model.
|
|
618
|
-
*/
|
|
619
|
-
interface OutputText {
|
|
620
|
-
/**
|
|
621
|
-
* The text output from the model.
|
|
622
|
-
*/
|
|
623
|
-
text: string;
|
|
624
|
-
/**
|
|
625
|
-
* The type of the output text. Always `output_text`.
|
|
626
|
-
*/
|
|
627
|
-
type: 'output_text';
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
376
|
}
|
|
631
377
|
}
|
|
632
378
|
/**
|
|
@@ -649,7 +395,7 @@ export interface EvalListResponse {
|
|
|
649
395
|
/**
|
|
650
396
|
* Configuration of data sources used in runs of the evaluation.
|
|
651
397
|
*/
|
|
652
|
-
data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig;
|
|
398
|
+
data_source_config: EvalCustomDataSourceConfig | EvalListResponse.Logs | EvalStoredCompletionsDataSourceConfig;
|
|
653
399
|
/**
|
|
654
400
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
655
401
|
* for storing additional information about the object in a structured format, and
|
|
@@ -670,29 +416,49 @@ export interface EvalListResponse {
|
|
|
670
416
|
/**
|
|
671
417
|
* A list of testing criteria.
|
|
672
418
|
*/
|
|
673
|
-
testing_criteria: Array<
|
|
419
|
+
testing_criteria: Array<GraderModelsAPI.LabelModelGrader | GraderModelsAPI.StringCheckGrader | EvalListResponse.EvalGraderTextSimilarity | EvalListResponse.EvalGraderPython | EvalListResponse.EvalGraderScoreModel>;
|
|
674
420
|
}
|
|
675
421
|
export declare namespace EvalListResponse {
|
|
676
422
|
/**
|
|
677
|
-
* A
|
|
423
|
+
* A LogsDataSourceConfig which specifies the metadata property of your logs query.
|
|
424
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. The
|
|
425
|
+
* schema returned by this data source config is used to defined what variables are
|
|
426
|
+
* available in your evals. `item` and `sample` are both defined when using this
|
|
427
|
+
* data source config.
|
|
678
428
|
*/
|
|
679
|
-
interface
|
|
429
|
+
interface Logs {
|
|
680
430
|
/**
|
|
681
|
-
* The
|
|
431
|
+
* The json schema for the run data source items. Learn how to build JSON schemas
|
|
432
|
+
* [here](https://json-schema.org/).
|
|
682
433
|
*/
|
|
683
|
-
|
|
434
|
+
schema: Record<string, unknown>;
|
|
684
435
|
/**
|
|
685
|
-
* The
|
|
436
|
+
* The type of data source. Always `logs`.
|
|
686
437
|
*/
|
|
687
|
-
|
|
438
|
+
type: 'logs';
|
|
688
439
|
/**
|
|
689
|
-
*
|
|
440
|
+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
441
|
+
* for storing additional information about the object in a structured format, and
|
|
442
|
+
* querying for objects via API or the dashboard.
|
|
443
|
+
*
|
|
444
|
+
* Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
445
|
+
* a maximum length of 512 characters.
|
|
690
446
|
*/
|
|
691
|
-
|
|
447
|
+
metadata?: Shared.Metadata | null;
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
451
|
+
*/
|
|
452
|
+
interface EvalGraderTextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
692
453
|
/**
|
|
693
|
-
* The
|
|
454
|
+
* The threshold for the score.
|
|
694
455
|
*/
|
|
695
|
-
|
|
456
|
+
pass_threshold: number;
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* A PythonGrader object that runs a python script on the input.
|
|
460
|
+
*/
|
|
461
|
+
interface EvalGraderPython extends GraderModelsAPI.PythonGrader {
|
|
696
462
|
/**
|
|
697
463
|
* The threshold for the score.
|
|
698
464
|
*/
|
|
@@ -701,74 +467,11 @@ export declare namespace EvalListResponse {
|
|
|
701
467
|
/**
|
|
702
468
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
703
469
|
*/
|
|
704
|
-
interface
|
|
705
|
-
/**
|
|
706
|
-
* The input text. This may include template strings.
|
|
707
|
-
*/
|
|
708
|
-
input: Array<ScoreModel.Input>;
|
|
709
|
-
/**
|
|
710
|
-
* The model to use for the evaluation.
|
|
711
|
-
*/
|
|
712
|
-
model: string;
|
|
713
|
-
/**
|
|
714
|
-
* The name of the grader.
|
|
715
|
-
*/
|
|
716
|
-
name: string;
|
|
717
|
-
/**
|
|
718
|
-
* The object type, which is always `score_model`.
|
|
719
|
-
*/
|
|
720
|
-
type: 'score_model';
|
|
470
|
+
interface EvalGraderScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
721
471
|
/**
|
|
722
472
|
* The threshold for the score.
|
|
723
473
|
*/
|
|
724
474
|
pass_threshold?: number;
|
|
725
|
-
/**
|
|
726
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
727
|
-
*/
|
|
728
|
-
range?: Array<number>;
|
|
729
|
-
/**
|
|
730
|
-
* The sampling parameters for the model.
|
|
731
|
-
*/
|
|
732
|
-
sampling_params?: unknown;
|
|
733
|
-
}
|
|
734
|
-
namespace ScoreModel {
|
|
735
|
-
/**
|
|
736
|
-
* A message input to the model with a role indicating instruction following
|
|
737
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
738
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
739
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
740
|
-
* interactions.
|
|
741
|
-
*/
|
|
742
|
-
interface Input {
|
|
743
|
-
/**
|
|
744
|
-
* Text inputs to the model - can contain template strings.
|
|
745
|
-
*/
|
|
746
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
747
|
-
/**
|
|
748
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
749
|
-
* `developer`.
|
|
750
|
-
*/
|
|
751
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
752
|
-
/**
|
|
753
|
-
* The type of the message input. Always `message`.
|
|
754
|
-
*/
|
|
755
|
-
type?: 'message';
|
|
756
|
-
}
|
|
757
|
-
namespace Input {
|
|
758
|
-
/**
|
|
759
|
-
* A text output from the model.
|
|
760
|
-
*/
|
|
761
|
-
interface OutputText {
|
|
762
|
-
/**
|
|
763
|
-
* The text output from the model.
|
|
764
|
-
*/
|
|
765
|
-
text: string;
|
|
766
|
-
/**
|
|
767
|
-
* The type of the output text. Always `output_text`.
|
|
768
|
-
*/
|
|
769
|
-
type: 'output_text';
|
|
770
|
-
}
|
|
771
|
-
}
|
|
772
475
|
}
|
|
773
476
|
}
|
|
774
477
|
export interface EvalDeleteResponse {
|
|
@@ -780,11 +483,11 @@ export interface EvalCreateParams {
|
|
|
780
483
|
/**
|
|
781
484
|
* The configuration for the data source used for the evaluation runs.
|
|
782
485
|
*/
|
|
783
|
-
data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs;
|
|
486
|
+
data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs | EvalCreateParams.StoredCompletions;
|
|
784
487
|
/**
|
|
785
488
|
* A list of graders for all eval runs in this group.
|
|
786
489
|
*/
|
|
787
|
-
testing_criteria: Array<EvalCreateParams.LabelModel |
|
|
490
|
+
testing_criteria: Array<EvalCreateParams.LabelModel | GraderModelsAPI.StringCheckGrader | EvalCreateParams.TextSimilarity | EvalCreateParams.Python | EvalCreateParams.ScoreModel>;
|
|
788
491
|
/**
|
|
789
492
|
* Set of 16 key-value pairs that can be attached to an object. This can be useful
|
|
790
493
|
* for storing additional information about the object in a structured format, and
|
|
@@ -824,9 +527,8 @@ export declare namespace EvalCreateParams {
|
|
|
824
527
|
include_sample_schema?: boolean;
|
|
825
528
|
}
|
|
826
529
|
/**
|
|
827
|
-
* A data source config which specifies the metadata property of your
|
|
828
|
-
*
|
|
829
|
-
* `prompt-version=v2`, etc.
|
|
530
|
+
* A data source config which specifies the metadata property of your logs query.
|
|
531
|
+
* This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
|
|
830
532
|
*/
|
|
831
533
|
interface Logs {
|
|
832
534
|
/**
|
|
@@ -838,6 +540,19 @@ export declare namespace EvalCreateParams {
|
|
|
838
540
|
*/
|
|
839
541
|
metadata?: Record<string, unknown>;
|
|
840
542
|
}
|
|
543
|
+
/**
|
|
544
|
+
* Deprecated in favor of LogsDataSourceConfig.
|
|
545
|
+
*/
|
|
546
|
+
interface StoredCompletions {
|
|
547
|
+
/**
|
|
548
|
+
* The type of data source. Always `stored-completions`.
|
|
549
|
+
*/
|
|
550
|
+
type: 'stored-completions';
|
|
551
|
+
/**
|
|
552
|
+
* Metadata filters for the stored completions data source.
|
|
553
|
+
*/
|
|
554
|
+
metadata?: Record<string, unknown>;
|
|
555
|
+
}
|
|
841
556
|
/**
|
|
842
557
|
* A LabelModelGrader object which uses a model to assign labels to each item in
|
|
843
558
|
* the evaluation.
|
|
@@ -919,25 +634,18 @@ export declare namespace EvalCreateParams {
|
|
|
919
634
|
}
|
|
920
635
|
}
|
|
921
636
|
/**
|
|
922
|
-
* A
|
|
637
|
+
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
923
638
|
*/
|
|
924
|
-
interface
|
|
925
|
-
/**
|
|
926
|
-
* The name of the grader.
|
|
927
|
-
*/
|
|
928
|
-
name: string;
|
|
929
|
-
/**
|
|
930
|
-
* The source code of the python script.
|
|
931
|
-
*/
|
|
932
|
-
source: string;
|
|
639
|
+
interface TextSimilarity extends GraderModelsAPI.TextSimilarityGrader {
|
|
933
640
|
/**
|
|
934
|
-
* The
|
|
935
|
-
*/
|
|
936
|
-
type: 'python';
|
|
937
|
-
/**
|
|
938
|
-
* The image tag to use for the python script.
|
|
641
|
+
* The threshold for the score.
|
|
939
642
|
*/
|
|
940
|
-
|
|
643
|
+
pass_threshold: number;
|
|
644
|
+
}
|
|
645
|
+
/**
|
|
646
|
+
* A PythonGrader object that runs a python script on the input.
|
|
647
|
+
*/
|
|
648
|
+
interface Python extends GraderModelsAPI.PythonGrader {
|
|
941
649
|
/**
|
|
942
650
|
* The threshold for the score.
|
|
943
651
|
*/
|
|
@@ -946,74 +654,11 @@ export declare namespace EvalCreateParams {
|
|
|
946
654
|
/**
|
|
947
655
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
948
656
|
*/
|
|
949
|
-
interface ScoreModel {
|
|
950
|
-
/**
|
|
951
|
-
* The input text. This may include template strings.
|
|
952
|
-
*/
|
|
953
|
-
input: Array<ScoreModel.Input>;
|
|
954
|
-
/**
|
|
955
|
-
* The model to use for the evaluation.
|
|
956
|
-
*/
|
|
957
|
-
model: string;
|
|
958
|
-
/**
|
|
959
|
-
* The name of the grader.
|
|
960
|
-
*/
|
|
961
|
-
name: string;
|
|
962
|
-
/**
|
|
963
|
-
* The object type, which is always `score_model`.
|
|
964
|
-
*/
|
|
965
|
-
type: 'score_model';
|
|
657
|
+
interface ScoreModel extends GraderModelsAPI.ScoreModelGrader {
|
|
966
658
|
/**
|
|
967
659
|
* The threshold for the score.
|
|
968
660
|
*/
|
|
969
661
|
pass_threshold?: number;
|
|
970
|
-
/**
|
|
971
|
-
* The range of the score. Defaults to `[0, 1]`.
|
|
972
|
-
*/
|
|
973
|
-
range?: Array<number>;
|
|
974
|
-
/**
|
|
975
|
-
* The sampling parameters for the model.
|
|
976
|
-
*/
|
|
977
|
-
sampling_params?: unknown;
|
|
978
|
-
}
|
|
979
|
-
namespace ScoreModel {
|
|
980
|
-
/**
|
|
981
|
-
* A message input to the model with a role indicating instruction following
|
|
982
|
-
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
983
|
-
* precedence over instructions given with the `user` role. Messages with the
|
|
984
|
-
* `assistant` role are presumed to have been generated by the model in previous
|
|
985
|
-
* interactions.
|
|
986
|
-
*/
|
|
987
|
-
interface Input {
|
|
988
|
-
/**
|
|
989
|
-
* Text inputs to the model - can contain template strings.
|
|
990
|
-
*/
|
|
991
|
-
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
992
|
-
/**
|
|
993
|
-
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
994
|
-
* `developer`.
|
|
995
|
-
*/
|
|
996
|
-
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
997
|
-
/**
|
|
998
|
-
* The type of the message input. Always `message`.
|
|
999
|
-
*/
|
|
1000
|
-
type?: 'message';
|
|
1001
|
-
}
|
|
1002
|
-
namespace Input {
|
|
1003
|
-
/**
|
|
1004
|
-
* A text output from the model.
|
|
1005
|
-
*/
|
|
1006
|
-
interface OutputText {
|
|
1007
|
-
/**
|
|
1008
|
-
* The text output from the model.
|
|
1009
|
-
*/
|
|
1010
|
-
text: string;
|
|
1011
|
-
/**
|
|
1012
|
-
* The type of the output text. Always `output_text`.
|
|
1013
|
-
*/
|
|
1014
|
-
type: 'output_text';
|
|
1015
|
-
}
|
|
1016
|
-
}
|
|
1017
662
|
}
|
|
1018
663
|
}
|
|
1019
664
|
export interface EvalUpdateParams {
|
|
@@ -1044,7 +689,7 @@ export interface EvalListParams extends CursorPageParams {
|
|
|
1044
689
|
order_by?: 'created_at' | 'updated_at';
|
|
1045
690
|
}
|
|
1046
691
|
export declare namespace Evals {
|
|
1047
|
-
export { type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig, type
|
|
692
|
+
export { type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig, type EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig, type EvalCreateResponse as EvalCreateResponse, type EvalRetrieveResponse as EvalRetrieveResponse, type EvalUpdateResponse as EvalUpdateResponse, type EvalListResponse as EvalListResponse, type EvalDeleteResponse as EvalDeleteResponse, EvalListResponsesPage as EvalListResponsesPage, type EvalCreateParams as EvalCreateParams, type EvalUpdateParams as EvalUpdateParams, type EvalListParams as EvalListParams, };
|
|
1048
693
|
export { Runs as Runs, type CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource, type CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource, type EvalAPIError as EvalAPIError, type RunCreateResponse as RunCreateResponse, type RunRetrieveResponse as RunRetrieveResponse, type RunListResponse as RunListResponse, type RunDeleteResponse as RunDeleteResponse, type RunCancelResponse as RunCancelResponse, RunListResponsesPage as RunListResponsesPage, type RunCreateParams as RunCreateParams, type RunListParams as RunListParams, };
|
|
1049
694
|
}
|
|
1050
695
|
//# sourceMappingURL=evals.d.ts.map
|