arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/METADATA +12 -9
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/RECORD +48 -49
- phoenix/db/bulk_inserter.py +3 -1
- phoenix/experiments/evaluators/base.py +4 -0
- phoenix/experiments/evaluators/code_evaluators.py +80 -0
- phoenix/experiments/evaluators/llm_evaluators.py +77 -1
- phoenix/experiments/evaluators/utils.py +70 -21
- phoenix/experiments/functions.py +14 -14
- phoenix/server/api/context.py +7 -3
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
- phoenix/server/api/dataloaders/experiment_error_rates.py +30 -10
- phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
- phoenix/server/api/input_types/{CreateSpanAnnotationsInput.py → CreateSpanAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{CreateTraceAnnotationsInput.py → CreateTraceAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{PatchAnnotationsInput.py → PatchAnnotationInput.py} +4 -2
- phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
- phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
- phoenix/server/api/openapi/main.py +2 -18
- phoenix/server/api/openapi/schema.py +12 -12
- phoenix/server/api/routers/v1/__init__.py +83 -36
- phoenix/server/api/routers/v1/dataset_examples.py +123 -102
- phoenix/server/api/routers/v1/datasets.py +506 -390
- phoenix/server/api/routers/v1/evaluations.py +66 -73
- phoenix/server/api/routers/v1/experiment_evaluations.py +91 -68
- phoenix/server/api/routers/v1/experiment_runs.py +155 -98
- phoenix/server/api/routers/v1/experiments.py +181 -132
- phoenix/server/api/routers/v1/spans.py +173 -144
- phoenix/server/api/routers/v1/traces.py +128 -115
- phoenix/server/api/types/Experiment.py +2 -2
- phoenix/server/api/types/Inferences.py +1 -2
- phoenix/server/api/types/Model.py +1 -2
- phoenix/server/app.py +177 -152
- phoenix/server/openapi/docs.py +221 -0
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/{components-C8sm_r1F.js → components-DeS0YEmv.js} +2 -2
- phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
- phoenix/server/static/assets/{pages-bN7juCjh.js → pages-hdjlFZhO.js} +275 -198
- phoenix/server/static/assets/{vendor-CUDAPm8e.js → vendor-DPvSDRn3.js} +1 -1
- phoenix/server/static/assets/{vendor-arizeai-Do2HOmcL.js → vendor-arizeai-CkvPT67c.js} +2 -2
- phoenix/server/static/assets/{vendor-codemirror-CrdxOlMs.js → vendor-codemirror-Cqwpwlua.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-PKRvByVe.js → vendor-recharts-5jlNaZuF.js} +1 -1
- phoenix/server/thread_server.py +2 -2
- phoenix/session/client.py +9 -8
- phoenix/trace/dsl/filter.py +40 -25
- phoenix/version.py +1 -1
- phoenix/server/api/routers/v1/pydantic_compat.py +0 -78
- phoenix/server/api/routers/v1/utils.py +0 -95
- phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,7 +6,6 @@ import logging
|
|
|
6
6
|
import zlib
|
|
7
7
|
from asyncio import QueueFull
|
|
8
8
|
from collections import Counter
|
|
9
|
-
from datetime import datetime
|
|
10
9
|
from enum import Enum
|
|
11
10
|
from functools import partial
|
|
12
11
|
from typing import (
|
|
@@ -14,7 +13,6 @@ from typing import (
|
|
|
14
13
|
Awaitable,
|
|
15
14
|
Callable,
|
|
16
15
|
Coroutine,
|
|
17
|
-
Dict,
|
|
18
16
|
FrozenSet,
|
|
19
17
|
Iterator,
|
|
20
18
|
List,
|
|
@@ -28,16 +26,14 @@ from typing import (
|
|
|
28
26
|
|
|
29
27
|
import pandas as pd
|
|
30
28
|
import pyarrow as pa
|
|
31
|
-
from fastapi import APIRouter, BackgroundTasks, HTTPException, Path, Query
|
|
32
|
-
from fastapi.responses import PlainTextResponse, StreamingResponse
|
|
33
29
|
from sqlalchemy import and_, delete, func, select
|
|
34
30
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
31
|
+
from starlette.background import BackgroundTasks
|
|
35
32
|
from starlette.concurrency import run_in_threadpool
|
|
36
33
|
from starlette.datastructures import FormData, UploadFile
|
|
37
34
|
from starlette.requests import Request
|
|
38
|
-
from starlette.responses import Response
|
|
35
|
+
from starlette.responses import JSONResponse, Response
|
|
39
36
|
from starlette.status import (
|
|
40
|
-
HTTP_200_OK,
|
|
41
37
|
HTTP_204_NO_CONTENT,
|
|
42
38
|
HTTP_404_NOT_FOUND,
|
|
43
39
|
HTTP_409_CONFLICT,
|
|
@@ -55,60 +51,79 @@ from phoenix.db.insertion.dataset import (
|
|
|
55
51
|
ExampleContent,
|
|
56
52
|
add_dataset_examples,
|
|
57
53
|
)
|
|
58
|
-
from phoenix.server.api.types.Dataset import Dataset
|
|
54
|
+
from phoenix.server.api.types.Dataset import Dataset
|
|
59
55
|
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
60
|
-
from phoenix.server.api.types.DatasetVersion import DatasetVersion
|
|
56
|
+
from phoenix.server.api.types.DatasetVersion import DatasetVersion
|
|
61
57
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
62
58
|
from phoenix.server.api.utils import delete_projects, delete_traces
|
|
63
59
|
|
|
64
|
-
from .dataset_examples import router as dataset_examples_router
|
|
65
|
-
from .pydantic_compat import V1RoutesBaseModel
|
|
66
|
-
from .utils import (
|
|
67
|
-
PaginatedResponseBody,
|
|
68
|
-
ResponseBody,
|
|
69
|
-
add_errors_to_responses,
|
|
70
|
-
add_text_csv_content_to_responses,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
60
|
logger = logging.getLogger(__name__)
|
|
74
61
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
description
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
62
|
+
NODE_NAME = "Dataset"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def list_datasets(request: Request) -> Response:
|
|
66
|
+
"""
|
|
67
|
+
summary: List datasets with cursor-based pagination
|
|
68
|
+
operationId: listDatasets
|
|
69
|
+
tags:
|
|
70
|
+
- datasets
|
|
71
|
+
parameters:
|
|
72
|
+
- in: query
|
|
73
|
+
name: cursor
|
|
74
|
+
required: false
|
|
75
|
+
schema:
|
|
76
|
+
type: string
|
|
77
|
+
description: Cursor for pagination
|
|
78
|
+
- in: query
|
|
79
|
+
name: limit
|
|
80
|
+
required: false
|
|
81
|
+
schema:
|
|
82
|
+
type: integer
|
|
83
|
+
default: 10
|
|
84
|
+
- in: query
|
|
85
|
+
name: name
|
|
86
|
+
required: false
|
|
87
|
+
schema:
|
|
88
|
+
type: string
|
|
89
|
+
description: match by dataset name
|
|
90
|
+
responses:
|
|
91
|
+
200:
|
|
92
|
+
description: A paginated list of datasets
|
|
93
|
+
content:
|
|
94
|
+
application/json:
|
|
95
|
+
schema:
|
|
96
|
+
type: object
|
|
97
|
+
properties:
|
|
98
|
+
next_cursor:
|
|
99
|
+
type: string
|
|
100
|
+
data:
|
|
101
|
+
type: array
|
|
102
|
+
items:
|
|
103
|
+
type: object
|
|
104
|
+
properties:
|
|
105
|
+
id:
|
|
106
|
+
type: string
|
|
107
|
+
name:
|
|
108
|
+
type: string
|
|
109
|
+
description:
|
|
110
|
+
type: string
|
|
111
|
+
metadata:
|
|
112
|
+
type: object
|
|
113
|
+
created_at:
|
|
114
|
+
type: string
|
|
115
|
+
format: date-time
|
|
116
|
+
updated_at:
|
|
117
|
+
type: string
|
|
118
|
+
format: date-time
|
|
119
|
+
403:
|
|
120
|
+
description: Forbidden
|
|
121
|
+
404:
|
|
122
|
+
description: No datasets found
|
|
123
|
+
"""
|
|
124
|
+
name = request.query_params.get("name")
|
|
125
|
+
cursor = request.query_params.get("cursor")
|
|
126
|
+
limit = int(request.query_params.get("limit", 10))
|
|
112
127
|
async with request.app.state.db() as session:
|
|
113
128
|
query = select(models.Dataset).order_by(models.Dataset.id.desc())
|
|
114
129
|
|
|
@@ -117,8 +132,8 @@ async def list_datasets(
|
|
|
117
132
|
cursor_id = GlobalID.from_id(cursor).node_id
|
|
118
133
|
query = query.filter(models.Dataset.id <= int(cursor_id))
|
|
119
134
|
except ValueError:
|
|
120
|
-
|
|
121
|
-
|
|
135
|
+
return Response(
|
|
136
|
+
content=f"Invalid cursor format: {cursor}",
|
|
122
137
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
123
138
|
)
|
|
124
139
|
if name:
|
|
@@ -129,56 +144,67 @@ async def list_datasets(
|
|
|
129
144
|
datasets = result.scalars().all()
|
|
130
145
|
|
|
131
146
|
if not datasets:
|
|
132
|
-
return
|
|
147
|
+
return JSONResponse(content={"next_cursor": None, "data": []}, status_code=200)
|
|
133
148
|
|
|
134
149
|
next_cursor = None
|
|
135
150
|
if len(datasets) == limit + 1:
|
|
136
|
-
next_cursor = str(GlobalID(
|
|
151
|
+
next_cursor = str(GlobalID(NODE_NAME, str(datasets[-1].id)))
|
|
137
152
|
datasets = datasets[:-1]
|
|
138
153
|
|
|
139
154
|
data = []
|
|
140
155
|
for dataset in datasets:
|
|
141
156
|
data.append(
|
|
142
|
-
|
|
143
|
-
id
|
|
144
|
-
name
|
|
145
|
-
description
|
|
146
|
-
metadata
|
|
147
|
-
created_at
|
|
148
|
-
updated_at
|
|
149
|
-
|
|
157
|
+
{
|
|
158
|
+
"id": str(GlobalID(NODE_NAME, str(dataset.id))),
|
|
159
|
+
"name": dataset.name,
|
|
160
|
+
"description": dataset.description,
|
|
161
|
+
"metadata": dataset.metadata_,
|
|
162
|
+
"created_at": dataset.created_at.isoformat(),
|
|
163
|
+
"updated_at": dataset.updated_at.isoformat(),
|
|
164
|
+
}
|
|
150
165
|
)
|
|
151
166
|
|
|
152
|
-
return
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
"
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
167
|
+
return JSONResponse(content={"next_cursor": next_cursor, "data": data})
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def delete_dataset_by_id(request: Request) -> Response:
|
|
171
|
+
"""
|
|
172
|
+
summary: Delete dataset by ID
|
|
173
|
+
operationId: deleteDatasetById
|
|
174
|
+
tags:
|
|
175
|
+
- datasets
|
|
176
|
+
parameters:
|
|
177
|
+
- in: path
|
|
178
|
+
name: id
|
|
179
|
+
required: true
|
|
180
|
+
schema:
|
|
181
|
+
type: string
|
|
182
|
+
responses:
|
|
183
|
+
204:
|
|
184
|
+
description: Success
|
|
185
|
+
403:
|
|
186
|
+
description: Forbidden
|
|
187
|
+
404:
|
|
188
|
+
description: Dataset not found
|
|
189
|
+
422:
|
|
190
|
+
description: Dataset ID is invalid
|
|
191
|
+
"""
|
|
192
|
+
if id_ := request.path_params.get("id"):
|
|
171
193
|
try:
|
|
172
194
|
dataset_id = from_global_id_with_expected_type(
|
|
173
|
-
GlobalID.from_id(
|
|
174
|
-
|
|
195
|
+
GlobalID.from_id(id_),
|
|
196
|
+
Dataset.__name__,
|
|
175
197
|
)
|
|
176
198
|
except ValueError:
|
|
177
|
-
|
|
178
|
-
|
|
199
|
+
return Response(
|
|
200
|
+
content=f"Invalid Dataset ID: {id_}",
|
|
201
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
179
202
|
)
|
|
180
203
|
else:
|
|
181
|
-
|
|
204
|
+
return Response(
|
|
205
|
+
content="Missing Dataset ID",
|
|
206
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
207
|
+
)
|
|
182
208
|
project_names_stmt = get_project_names_for_datasets(dataset_id)
|
|
183
209
|
eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
|
|
184
210
|
stmt = (
|
|
@@ -188,34 +214,59 @@ async def delete_dataset(
|
|
|
188
214
|
project_names = await session.scalars(project_names_stmt)
|
|
189
215
|
eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
|
|
190
216
|
if (await session.scalar(stmt)) is None:
|
|
191
|
-
|
|
217
|
+
return Response(content="Dataset does not exist", status_code=HTTP_404_NOT_FOUND)
|
|
192
218
|
tasks = BackgroundTasks()
|
|
193
219
|
tasks.add_task(delete_projects, request.app.state.db, *project_names)
|
|
194
220
|
tasks.add_task(delete_traces, request.app.state.db, *eval_trace_ids)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
221
|
+
return Response(status_code=HTTP_204_NO_CONTENT, background=tasks)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
async def get_dataset_by_id(request: Request) -> Response:
|
|
225
|
+
"""
|
|
226
|
+
summary: Get dataset by ID
|
|
227
|
+
operationId: getDatasetById
|
|
228
|
+
tags:
|
|
229
|
+
- datasets
|
|
230
|
+
parameters:
|
|
231
|
+
- in: path
|
|
232
|
+
name: id
|
|
233
|
+
required: true
|
|
234
|
+
schema:
|
|
235
|
+
type: string
|
|
236
|
+
responses:
|
|
237
|
+
200:
|
|
238
|
+
description: Success
|
|
239
|
+
content:
|
|
240
|
+
application/json:
|
|
241
|
+
schema:
|
|
242
|
+
type: object
|
|
243
|
+
properties:
|
|
244
|
+
id:
|
|
245
|
+
type: string
|
|
246
|
+
name:
|
|
247
|
+
type: string
|
|
248
|
+
description:
|
|
249
|
+
type: string
|
|
250
|
+
metadata:
|
|
251
|
+
type: object
|
|
252
|
+
created_at:
|
|
253
|
+
type: string
|
|
254
|
+
format: date-time
|
|
255
|
+
updated_at:
|
|
256
|
+
type: string
|
|
257
|
+
format: date-time
|
|
258
|
+
example_count:
|
|
259
|
+
type: integer
|
|
260
|
+
403:
|
|
261
|
+
description: Forbidden
|
|
262
|
+
404:
|
|
263
|
+
description: Dataset not found
|
|
264
|
+
"""
|
|
265
|
+
dataset_id = GlobalID.from_id(request.path_params["id"])
|
|
266
|
+
|
|
267
|
+
if (type_name := dataset_id.type_name) != NODE_NAME:
|
|
268
|
+
return Response(
|
|
269
|
+
content=f"ID {dataset_id} refers to a f{type_name}", status_code=HTTP_404_NOT_FOUND
|
|
219
270
|
)
|
|
220
271
|
async with request.app.state.db() as session:
|
|
221
272
|
result = await session.execute(
|
|
@@ -227,64 +278,97 @@ async def get_dataset(
|
|
|
227
278
|
dataset = dataset_query[0] if dataset_query else None
|
|
228
279
|
example_count = dataset_query[1] if dataset_query else 0
|
|
229
280
|
if dataset is None:
|
|
230
|
-
|
|
231
|
-
|
|
281
|
+
return Response(
|
|
282
|
+
content=f"Dataset with ID {dataset_id} not found", status_code=HTTP_404_NOT_FOUND
|
|
232
283
|
)
|
|
233
284
|
|
|
234
|
-
|
|
235
|
-
id
|
|
236
|
-
name
|
|
237
|
-
description
|
|
238
|
-
metadata
|
|
239
|
-
created_at
|
|
240
|
-
updated_at
|
|
241
|
-
example_count
|
|
242
|
-
|
|
243
|
-
return
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
285
|
+
output_dict = {
|
|
286
|
+
"id": str(dataset_id),
|
|
287
|
+
"name": dataset.name,
|
|
288
|
+
"description": dataset.description,
|
|
289
|
+
"metadata": dataset.metadata_,
|
|
290
|
+
"created_at": dataset.created_at.isoformat(),
|
|
291
|
+
"updated_at": dataset.updated_at.isoformat(),
|
|
292
|
+
"example_count": example_count,
|
|
293
|
+
}
|
|
294
|
+
return JSONResponse(content={"data": output_dict})
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
async def get_dataset_versions(request: Request) -> Response:
|
|
298
|
+
"""
|
|
299
|
+
summary: Get dataset versions (sorted from latest to oldest)
|
|
300
|
+
operationId: getDatasetVersionsByDatasetId
|
|
301
|
+
tags:
|
|
302
|
+
- datasets
|
|
303
|
+
parameters:
|
|
304
|
+
- in: path
|
|
305
|
+
name: id
|
|
306
|
+
required: true
|
|
307
|
+
description: Dataset ID
|
|
308
|
+
schema:
|
|
309
|
+
type: string
|
|
310
|
+
- in: query
|
|
311
|
+
name: cursor
|
|
312
|
+
description: Cursor for pagination.
|
|
313
|
+
schema:
|
|
314
|
+
type: string
|
|
315
|
+
- in: query
|
|
316
|
+
name: limit
|
|
317
|
+
description: Maximum number versions to return.
|
|
318
|
+
schema:
|
|
319
|
+
type: integer
|
|
320
|
+
default: 10
|
|
321
|
+
responses:
|
|
322
|
+
200:
|
|
323
|
+
description: Success
|
|
324
|
+
content:
|
|
325
|
+
application/json:
|
|
326
|
+
schema:
|
|
327
|
+
type: object
|
|
328
|
+
properties:
|
|
329
|
+
next_cursor:
|
|
330
|
+
type: string
|
|
331
|
+
data:
|
|
332
|
+
type: array
|
|
333
|
+
items:
|
|
334
|
+
type: object
|
|
335
|
+
properties:
|
|
336
|
+
version_id:
|
|
337
|
+
type: string
|
|
338
|
+
description:
|
|
339
|
+
type: string
|
|
340
|
+
metadata:
|
|
341
|
+
type: object
|
|
342
|
+
created_at:
|
|
343
|
+
type: string
|
|
344
|
+
format: date-time
|
|
345
|
+
403:
|
|
346
|
+
description: Forbidden
|
|
347
|
+
422:
|
|
348
|
+
description: Dataset ID, cursor or limit is invalid.
|
|
349
|
+
"""
|
|
350
|
+
if id_ := request.path_params.get("id"):
|
|
275
351
|
try:
|
|
276
352
|
dataset_id = from_global_id_with_expected_type(
|
|
277
|
-
GlobalID.from_id(
|
|
278
|
-
|
|
353
|
+
GlobalID.from_id(id_),
|
|
354
|
+
Dataset.__name__,
|
|
279
355
|
)
|
|
280
356
|
except ValueError:
|
|
281
|
-
|
|
282
|
-
|
|
357
|
+
return Response(
|
|
358
|
+
content=f"Invalid Dataset ID: {id_}",
|
|
283
359
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
284
360
|
)
|
|
285
361
|
else:
|
|
286
|
-
|
|
287
|
-
|
|
362
|
+
return Response(
|
|
363
|
+
content="Missing Dataset ID",
|
|
364
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
365
|
+
)
|
|
366
|
+
try:
|
|
367
|
+
limit = int(request.query_params.get("limit", 10))
|
|
368
|
+
assert limit > 0
|
|
369
|
+
except (ValueError, AssertionError):
|
|
370
|
+
return Response(
|
|
371
|
+
content="Invalid limit parameter",
|
|
288
372
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
289
373
|
)
|
|
290
374
|
stmt = (
|
|
@@ -293,14 +377,15 @@ async def list_dataset_versions(
|
|
|
293
377
|
.order_by(models.DatasetVersion.id.desc())
|
|
294
378
|
.limit(limit + 1)
|
|
295
379
|
)
|
|
296
|
-
if cursor:
|
|
380
|
+
if cursor := request.query_params.get("cursor"):
|
|
297
381
|
try:
|
|
298
382
|
dataset_version_id = from_global_id_with_expected_type(
|
|
299
|
-
GlobalID.from_id(cursor),
|
|
383
|
+
GlobalID.from_id(cursor),
|
|
384
|
+
DatasetVersion.__name__,
|
|
300
385
|
)
|
|
301
386
|
except ValueError:
|
|
302
|
-
|
|
303
|
-
|
|
387
|
+
return Response(
|
|
388
|
+
content=f"Invalid cursor: {cursor}",
|
|
304
389
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
305
390
|
)
|
|
306
391
|
max_dataset_version_id = (
|
|
@@ -311,99 +396,102 @@ async def list_dataset_versions(
|
|
|
311
396
|
stmt = stmt.filter(models.DatasetVersion.id <= max_dataset_version_id)
|
|
312
397
|
async with request.app.state.db() as session:
|
|
313
398
|
data = [
|
|
314
|
-
DatasetVersion(
|
|
315
|
-
version_id=str(GlobalID(DATASET_VERSION_NODE_NAME, str(version.id))),
|
|
316
|
-
description=version.description,
|
|
317
|
-
metadata=version.metadata_,
|
|
318
|
-
created_at=version.created_at,
|
|
319
|
-
)
|
|
320
|
-
async for version in await session.stream_scalars(stmt)
|
|
321
|
-
]
|
|
322
|
-
next_cursor = data.pop().version_id if len(data) == limit + 1 else None
|
|
323
|
-
return ListDatasetVersionsResponseBody(data=data, next_cursor=next_cursor)
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
class UploadDatasetData(V1RoutesBaseModel):
|
|
327
|
-
dataset_id: str
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
class UploadDatasetResponseBody(ResponseBody[UploadDatasetData]):
|
|
331
|
-
pass
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
@router.post(
|
|
335
|
-
"/datasets/upload",
|
|
336
|
-
operation_id="uploadDataset",
|
|
337
|
-
summary="Upload dataset from JSON, CSV, or PyArrow",
|
|
338
|
-
responses=add_errors_to_responses(
|
|
339
|
-
[
|
|
340
399
|
{
|
|
341
|
-
"
|
|
342
|
-
"description":
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
]
|
|
346
|
-
),
|
|
347
|
-
# FastAPI cannot generate the request body portion of the OpenAPI schema for
|
|
348
|
-
# routes that accept multiple request content types, so we have to provide
|
|
349
|
-
# this part of the schema manually. For context, see
|
|
350
|
-
# https://github.com/tiangolo/fastapi/discussions/7786 and
|
|
351
|
-
# https://github.com/tiangolo/fastapi/issues/990
|
|
352
|
-
openapi_extra={
|
|
353
|
-
"requestBody": {
|
|
354
|
-
"content": {
|
|
355
|
-
"application/json": {
|
|
356
|
-
"schema": {
|
|
357
|
-
"type": "object",
|
|
358
|
-
"required": ["name", "inputs"],
|
|
359
|
-
"properties": {
|
|
360
|
-
"action": {"type": "string", "enum": ["create", "append"]},
|
|
361
|
-
"name": {"type": "string"},
|
|
362
|
-
"description": {"type": "string"},
|
|
363
|
-
"inputs": {"type": "array", "items": {"type": "object"}},
|
|
364
|
-
"outputs": {"type": "array", "items": {"type": "object"}},
|
|
365
|
-
"metadata": {"type": "array", "items": {"type": "object"}},
|
|
366
|
-
},
|
|
367
|
-
}
|
|
368
|
-
},
|
|
369
|
-
"multipart/form-data": {
|
|
370
|
-
"schema": {
|
|
371
|
-
"type": "object",
|
|
372
|
-
"required": ["name", "input_keys[]", "output_keys[]", "file"],
|
|
373
|
-
"properties": {
|
|
374
|
-
"action": {"type": "string", "enum": ["create", "append"]},
|
|
375
|
-
"name": {"type": "string"},
|
|
376
|
-
"description": {"type": "string"},
|
|
377
|
-
"input_keys[]": {
|
|
378
|
-
"type": "array",
|
|
379
|
-
"items": {"type": "string"},
|
|
380
|
-
"uniqueItems": True,
|
|
381
|
-
},
|
|
382
|
-
"output_keys[]": {
|
|
383
|
-
"type": "array",
|
|
384
|
-
"items": {"type": "string"},
|
|
385
|
-
"uniqueItems": True,
|
|
386
|
-
},
|
|
387
|
-
"metadata_keys[]": {
|
|
388
|
-
"type": "array",
|
|
389
|
-
"items": {"type": "string"},
|
|
390
|
-
"uniqueItems": True,
|
|
391
|
-
},
|
|
392
|
-
"file": {"type": "string", "format": "binary"},
|
|
393
|
-
},
|
|
394
|
-
}
|
|
395
|
-
},
|
|
400
|
+
"version_id": str(GlobalID(DatasetVersion.__name__, str(version.id))),
|
|
401
|
+
"description": version.description,
|
|
402
|
+
"metadata": version.metadata_,
|
|
403
|
+
"created_at": version.created_at.isoformat(),
|
|
396
404
|
}
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
)
|
|
406
|
-
|
|
405
|
+
async for version in await session.stream_scalars(stmt)
|
|
406
|
+
]
|
|
407
|
+
next_cursor = data.pop()["version_id"] if len(data) == limit + 1 else None
|
|
408
|
+
return JSONResponse(content={"next_cursor": next_cursor, "data": data})
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
async def post_datasets_upload(request: Request) -> Response:
|
|
412
|
+
"""
|
|
413
|
+
summary: Upload dataset as either JSON or file (CSV or PyArrow)
|
|
414
|
+
operationId: uploadDataset
|
|
415
|
+
tags:
|
|
416
|
+
- datasets
|
|
417
|
+
parameters:
|
|
418
|
+
- in: query
|
|
419
|
+
name: sync
|
|
420
|
+
description: If true, fulfill request synchronously and return JSON containing dataset_id
|
|
421
|
+
schema:
|
|
422
|
+
type: boolean
|
|
423
|
+
requestBody:
|
|
424
|
+
content:
|
|
425
|
+
application/json:
|
|
426
|
+
schema:
|
|
427
|
+
type: object
|
|
428
|
+
required:
|
|
429
|
+
- name
|
|
430
|
+
- inputs
|
|
431
|
+
properties:
|
|
432
|
+
action:
|
|
433
|
+
type: string
|
|
434
|
+
enum: [create, append]
|
|
435
|
+
name:
|
|
436
|
+
type: string
|
|
437
|
+
description:
|
|
438
|
+
type: string
|
|
439
|
+
inputs:
|
|
440
|
+
type: array
|
|
441
|
+
items:
|
|
442
|
+
type: object
|
|
443
|
+
outputs:
|
|
444
|
+
type: array
|
|
445
|
+
items:
|
|
446
|
+
type: object
|
|
447
|
+
metadata:
|
|
448
|
+
type: array
|
|
449
|
+
items:
|
|
450
|
+
type: object
|
|
451
|
+
multipart/form-data:
|
|
452
|
+
schema:
|
|
453
|
+
type: object
|
|
454
|
+
required:
|
|
455
|
+
- name
|
|
456
|
+
- input_keys[]
|
|
457
|
+
- output_keys[]
|
|
458
|
+
- file
|
|
459
|
+
properties:
|
|
460
|
+
action:
|
|
461
|
+
type: string
|
|
462
|
+
enum: [create, append]
|
|
463
|
+
name:
|
|
464
|
+
type: string
|
|
465
|
+
description:
|
|
466
|
+
type: string
|
|
467
|
+
input_keys[]:
|
|
468
|
+
type: array
|
|
469
|
+
items:
|
|
470
|
+
type: string
|
|
471
|
+
uniqueItems: true
|
|
472
|
+
output_keys[]:
|
|
473
|
+
type: array
|
|
474
|
+
items:
|
|
475
|
+
type: string
|
|
476
|
+
uniqueItems: true
|
|
477
|
+
metadata_keys[]:
|
|
478
|
+
type: array
|
|
479
|
+
items:
|
|
480
|
+
type: string
|
|
481
|
+
uniqueItems: true
|
|
482
|
+
file:
|
|
483
|
+
type: string
|
|
484
|
+
format: binary
|
|
485
|
+
responses:
|
|
486
|
+
200:
|
|
487
|
+
description: Success
|
|
488
|
+
403:
|
|
489
|
+
description: Forbidden
|
|
490
|
+
409:
|
|
491
|
+
description: Dataset of the same name already exists
|
|
492
|
+
422:
|
|
493
|
+
description: Request body is invalid
|
|
494
|
+
"""
|
|
407
495
|
request_content_type = request.headers["content-type"]
|
|
408
496
|
examples: Union[Examples, Awaitable[Examples]]
|
|
409
497
|
if request_content_type.startswith("application/json"):
|
|
@@ -412,15 +500,15 @@ async def upload_dataset(
|
|
|
412
500
|
_process_json, await request.json()
|
|
413
501
|
)
|
|
414
502
|
except ValueError as e:
|
|
415
|
-
|
|
416
|
-
|
|
503
|
+
return Response(
|
|
504
|
+
content=str(e),
|
|
417
505
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
418
506
|
)
|
|
419
507
|
if action is DatasetAction.CREATE:
|
|
420
508
|
async with request.app.state.db() as session:
|
|
421
509
|
if await _check_table_exists(session, name):
|
|
422
|
-
|
|
423
|
-
|
|
510
|
+
return Response(
|
|
511
|
+
content=f"Dataset with the same name already exists: {name=}",
|
|
424
512
|
status_code=HTTP_409_CONFLICT,
|
|
425
513
|
)
|
|
426
514
|
elif request_content_type.startswith("multipart/form-data"):
|
|
@@ -436,15 +524,15 @@ async def upload_dataset(
|
|
|
436
524
|
file,
|
|
437
525
|
) = await _parse_form_data(form)
|
|
438
526
|
except ValueError as e:
|
|
439
|
-
|
|
440
|
-
|
|
527
|
+
return Response(
|
|
528
|
+
content=str(e),
|
|
441
529
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
442
530
|
)
|
|
443
531
|
if action is DatasetAction.CREATE:
|
|
444
532
|
async with request.app.state.db() as session:
|
|
445
533
|
if await _check_table_exists(session, name):
|
|
446
|
-
|
|
447
|
-
|
|
534
|
+
return Response(
|
|
535
|
+
content=f"Dataset with the same name already exists: {name=}",
|
|
448
536
|
status_code=HTTP_409_CONFLICT,
|
|
449
537
|
)
|
|
450
538
|
content = await file.read()
|
|
@@ -460,13 +548,13 @@ async def upload_dataset(
|
|
|
460
548
|
else:
|
|
461
549
|
assert_never(file_content_type)
|
|
462
550
|
except ValueError as e:
|
|
463
|
-
|
|
464
|
-
|
|
551
|
+
return Response(
|
|
552
|
+
content=str(e),
|
|
465
553
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
466
554
|
)
|
|
467
555
|
else:
|
|
468
|
-
|
|
469
|
-
|
|
556
|
+
return Response(
|
|
557
|
+
content=str("Invalid request Content-Type"),
|
|
470
558
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
471
559
|
)
|
|
472
560
|
operation = cast(
|
|
@@ -479,19 +567,19 @@ async def upload_dataset(
|
|
|
479
567
|
description=description,
|
|
480
568
|
),
|
|
481
569
|
)
|
|
482
|
-
if sync:
|
|
570
|
+
if request.query_params.get("sync") == "true":
|
|
483
571
|
async with request.app.state.db() as session:
|
|
484
572
|
dataset_id = (await operation(session)).dataset_id
|
|
485
|
-
return
|
|
486
|
-
data
|
|
573
|
+
return JSONResponse(
|
|
574
|
+
content={"data": {"dataset_id": str(GlobalID(Dataset.__name__, str(dataset_id)))}}
|
|
487
575
|
)
|
|
488
576
|
try:
|
|
489
577
|
request.state.enqueue_operation(operation)
|
|
490
578
|
except QueueFull:
|
|
491
579
|
if isinstance(examples, Coroutine):
|
|
492
580
|
examples.close()
|
|
493
|
-
|
|
494
|
-
return
|
|
581
|
+
return Response(status_code=HTTP_429_TOO_MANY_REQUESTS)
|
|
582
|
+
return Response()
|
|
495
583
|
|
|
496
584
|
|
|
497
585
|
class FileContentType(Enum):
|
|
@@ -669,125 +757,151 @@ async def _parse_form_data(
|
|
|
669
757
|
)
|
|
670
758
|
|
|
671
759
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
760
|
+
async def get_dataset_csv(request: Request) -> Response:
|
|
761
|
+
"""
|
|
762
|
+
summary: Download dataset examples as CSV text file
|
|
763
|
+
operationId: getDatasetCsv
|
|
764
|
+
tags:
|
|
765
|
+
- datasets
|
|
766
|
+
parameters:
|
|
767
|
+
- in: path
|
|
768
|
+
name: id
|
|
769
|
+
required: true
|
|
770
|
+
schema:
|
|
771
|
+
type: string
|
|
772
|
+
description: Dataset ID
|
|
773
|
+
- in: query
|
|
774
|
+
name: version_id
|
|
775
|
+
schema:
|
|
776
|
+
type: string
|
|
777
|
+
description: Dataset version ID. If omitted, returns the latest version.
|
|
778
|
+
responses:
|
|
779
|
+
200:
|
|
780
|
+
description: Success
|
|
781
|
+
content:
|
|
782
|
+
text/csv:
|
|
783
|
+
schema:
|
|
784
|
+
type: string
|
|
785
|
+
contentMediaType: text/csv
|
|
786
|
+
contentEncoding: gzip
|
|
787
|
+
403:
|
|
788
|
+
description: Forbidden
|
|
789
|
+
404:
|
|
790
|
+
description: Dataset does not exist.
|
|
791
|
+
422:
|
|
792
|
+
description: Dataset ID or version ID is invalid.
|
|
793
|
+
"""
|
|
702
794
|
try:
|
|
703
|
-
|
|
704
|
-
dataset_name, examples = await _get_db_examples(
|
|
705
|
-
session=session, id=id, version_id=version_id
|
|
706
|
-
)
|
|
795
|
+
dataset_name, examples = await _get_db_examples(request)
|
|
707
796
|
except ValueError as e:
|
|
708
|
-
|
|
797
|
+
return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
|
|
709
798
|
content = await run_in_threadpool(_get_content_csv, examples)
|
|
710
799
|
return Response(
|
|
711
800
|
content=content,
|
|
712
801
|
headers={
|
|
713
802
|
"content-disposition": f'attachment; filename="{dataset_name}.csv"',
|
|
714
803
|
"content-type": "text/csv",
|
|
804
|
+
"content-encoding": "gzip",
|
|
715
805
|
},
|
|
716
806
|
)
|
|
717
807
|
|
|
718
808
|
|
|
719
|
-
|
|
720
|
-
"
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
description
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
809
|
+
async def get_dataset_jsonl_openai_ft(request: Request) -> Response:
|
|
810
|
+
"""
|
|
811
|
+
summary: Download dataset examples as OpenAI Fine-Tuning JSONL file
|
|
812
|
+
operationId: getDatasetJSONLOpenAIFineTuning
|
|
813
|
+
tags:
|
|
814
|
+
- datasets
|
|
815
|
+
parameters:
|
|
816
|
+
- in: path
|
|
817
|
+
name: id
|
|
818
|
+
required: true
|
|
819
|
+
schema:
|
|
820
|
+
type: string
|
|
821
|
+
description: Dataset ID
|
|
822
|
+
- in: query
|
|
823
|
+
name: version_id
|
|
824
|
+
schema:
|
|
825
|
+
type: string
|
|
826
|
+
description: Dataset version ID. If omitted, returns the latest version.
|
|
827
|
+
responses:
|
|
828
|
+
200:
|
|
829
|
+
description: Success
|
|
830
|
+
content:
|
|
831
|
+
text/plain:
|
|
832
|
+
schema:
|
|
833
|
+
type: string
|
|
834
|
+
contentMediaType: text/plain
|
|
835
|
+
contentEncoding: gzip
|
|
836
|
+
403:
|
|
837
|
+
description: Forbidden
|
|
838
|
+
404:
|
|
839
|
+
description: Dataset does not exist.
|
|
840
|
+
422:
|
|
841
|
+
description: Dataset ID or version ID is invalid.
|
|
842
|
+
"""
|
|
744
843
|
try:
|
|
745
|
-
|
|
746
|
-
dataset_name, examples = await _get_db_examples(
|
|
747
|
-
session=session, id=id, version_id=version_id
|
|
748
|
-
)
|
|
844
|
+
dataset_name, examples = await _get_db_examples(request)
|
|
749
845
|
except ValueError as e:
|
|
750
|
-
|
|
846
|
+
return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
|
|
751
847
|
content = await run_in_threadpool(_get_content_jsonl_openai_ft, examples)
|
|
752
|
-
|
|
753
|
-
|
|
848
|
+
return Response(
|
|
849
|
+
content=content,
|
|
850
|
+
headers={
|
|
851
|
+
"content-disposition": f'attachment; filename="{dataset_name}.jsonl"',
|
|
852
|
+
"content-type": "text/plain",
|
|
853
|
+
"content-encoding": "gzip",
|
|
854
|
+
},
|
|
855
|
+
)
|
|
754
856
|
|
|
755
857
|
|
|
756
|
-
|
|
757
|
-
"
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
description
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
858
|
+
async def get_dataset_jsonl_openai_evals(request: Request) -> Response:
|
|
859
|
+
"""
|
|
860
|
+
summary: Download dataset examples as OpenAI Evals JSONL file
|
|
861
|
+
operationId: getDatasetJSONLOpenAIEvals
|
|
862
|
+
tags:
|
|
863
|
+
- datasets
|
|
864
|
+
parameters:
|
|
865
|
+
- in: path
|
|
866
|
+
name: id
|
|
867
|
+
required: true
|
|
868
|
+
schema:
|
|
869
|
+
type: string
|
|
870
|
+
description: Dataset ID
|
|
871
|
+
- in: query
|
|
872
|
+
name: version_id
|
|
873
|
+
schema:
|
|
874
|
+
type: string
|
|
875
|
+
description: Dataset version ID. If omitted, returns the latest version.
|
|
876
|
+
responses:
|
|
877
|
+
200:
|
|
878
|
+
description: Success
|
|
879
|
+
content:
|
|
880
|
+
text/plain:
|
|
881
|
+
schema:
|
|
882
|
+
type: string
|
|
883
|
+
contentMediaType: text/plain
|
|
884
|
+
contentEncoding: gzip
|
|
885
|
+
403:
|
|
886
|
+
description: Forbidden
|
|
887
|
+
404:
|
|
888
|
+
description: Dataset does not exist.
|
|
889
|
+
422:
|
|
890
|
+
description: Dataset ID or version ID is invalid.
|
|
891
|
+
"""
|
|
781
892
|
try:
|
|
782
|
-
|
|
783
|
-
dataset_name, examples = await _get_db_examples(
|
|
784
|
-
session=session, id=id, version_id=version_id
|
|
785
|
-
)
|
|
893
|
+
dataset_name, examples = await _get_db_examples(request)
|
|
786
894
|
except ValueError as e:
|
|
787
|
-
|
|
895
|
+
return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
|
|
788
896
|
content = await run_in_threadpool(_get_content_jsonl_openai_evals, examples)
|
|
789
|
-
|
|
790
|
-
|
|
897
|
+
return Response(
|
|
898
|
+
content=content,
|
|
899
|
+
headers={
|
|
900
|
+
"content-disposition": f'attachment; filename="{dataset_name}.jsonl"',
|
|
901
|
+
"content-type": "text/plain",
|
|
902
|
+
"content-encoding": "gzip",
|
|
903
|
+
},
|
|
904
|
+
)
|
|
791
905
|
|
|
792
906
|
|
|
793
907
|
def _get_content_csv(examples: List[models.DatasetExampleRevision]) -> bytes:
|
|
@@ -803,7 +917,7 @@ def _get_content_csv(examples: List[models.DatasetExampleRevision]) -> bytes:
|
|
|
803
917
|
}
|
|
804
918
|
for ex in examples
|
|
805
919
|
]
|
|
806
|
-
return
|
|
920
|
+
return gzip.compress(pd.DataFrame.from_records(records).to_csv(index=False).encode())
|
|
807
921
|
|
|
808
922
|
|
|
809
923
|
def _get_content_jsonl_openai_ft(examples: List[models.DatasetExampleRevision]) -> bytes:
|
|
@@ -824,7 +938,7 @@ def _get_content_jsonl_openai_ft(examples: List[models.DatasetExampleRevision])
|
|
|
824
938
|
).encode()
|
|
825
939
|
)
|
|
826
940
|
records.seek(0)
|
|
827
|
-
return records.read()
|
|
941
|
+
return gzip.compress(records.read())
|
|
828
942
|
|
|
829
943
|
|
|
830
944
|
def _get_content_jsonl_openai_evals(examples: List[models.DatasetExampleRevision]) -> bytes:
|
|
@@ -851,17 +965,18 @@ def _get_content_jsonl_openai_evals(examples: List[models.DatasetExampleRevision
|
|
|
851
965
|
).encode()
|
|
852
966
|
)
|
|
853
967
|
records.seek(0)
|
|
854
|
-
return records.read()
|
|
968
|
+
return gzip.compress(records.read())
|
|
855
969
|
|
|
856
970
|
|
|
857
|
-
async def _get_db_examples(
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
dataset_id = from_global_id_with_expected_type(GlobalID.from_id(
|
|
971
|
+
async def _get_db_examples(request: Request) -> Tuple[str, List[models.DatasetExampleRevision]]:
|
|
972
|
+
if not (id_ := request.path_params.get("id")):
|
|
973
|
+
raise ValueError("Missing Dataset ID")
|
|
974
|
+
dataset_id = from_global_id_with_expected_type(GlobalID.from_id(id_), Dataset.__name__)
|
|
861
975
|
dataset_version_id: Optional[int] = None
|
|
862
|
-
if version_id:
|
|
976
|
+
if version_id := request.query_params.get("version_id"):
|
|
863
977
|
dataset_version_id = from_global_id_with_expected_type(
|
|
864
|
-
GlobalID.from_id(version_id),
|
|
978
|
+
GlobalID.from_id(version_id),
|
|
979
|
+
DatasetVersion.__name__,
|
|
865
980
|
)
|
|
866
981
|
latest_version = (
|
|
867
982
|
select(
|
|
@@ -894,12 +1009,13 @@ async def _get_db_examples(
|
|
|
894
1009
|
.where(models.DatasetExampleRevision.revision_kind != "DELETE")
|
|
895
1010
|
.order_by(models.DatasetExampleRevision.dataset_example_id)
|
|
896
1011
|
)
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
1012
|
+
async with request.app.state.db() as session:
|
|
1013
|
+
dataset_name: Optional[str] = await session.scalar(
|
|
1014
|
+
select(models.Dataset.name).where(models.Dataset.id == dataset_id)
|
|
1015
|
+
)
|
|
1016
|
+
if not dataset_name:
|
|
1017
|
+
raise ValueError("Dataset does not exist.")
|
|
1018
|
+
examples = [r async for r in await session.stream_scalars(stmt)]
|
|
903
1019
|
return dataset_name, examples
|
|
904
1020
|
|
|
905
1021
|
|