PyPI - arize-phoenix - Versions diffs - 4.10.2rc2__py3-none-any.whl → 4.12.0__py3-none-any.whl - Mend

arize-phoenix 4.10.2rc2py3-none-any.whl → 4.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (30) hide show

{arize_phoenix-4.10.2rc2.dist-info → arize_phoenix-4.12.0.dist-info}/METADATA +3 -4
{arize_phoenix-4.10.2rc2.dist-info → arize_phoenix-4.12.0.dist-info}/RECORD +29 -29
phoenix/server/api/context.py +7 -3
phoenix/server/api/openapi/main.py +2 -18
phoenix/server/api/openapi/schema.py +12 -12
phoenix/server/api/routers/v1/__init__.py +83 -36
phoenix/server/api/routers/v1/dataset_examples.py +123 -102
phoenix/server/api/routers/v1/datasets.py +507 -389
phoenix/server/api/routers/v1/evaluations.py +66 -73
phoenix/server/api/routers/v1/experiment_evaluations.py +91 -67
phoenix/server/api/routers/v1/experiment_runs.py +155 -97
phoenix/server/api/routers/v1/experiments.py +181 -131
phoenix/server/api/routers/v1/spans.py +173 -143
phoenix/server/api/routers/v1/traces.py +128 -114
phoenix/server/api/types/Span.py +1 -0
phoenix/server/app.py +176 -148
phoenix/server/openapi/docs.py +221 -0
phoenix/server/static/index.js +574 -573
phoenix/server/thread_server.py +2 -2
phoenix/session/client.py +5 -0
phoenix/session/data_extractor.py +20 -1
phoenix/session/session.py +4 -0
phoenix/trace/attributes.py +2 -1
phoenix/trace/schemas.py +1 -0
phoenix/trace/span_json_decoder.py +1 -1
phoenix/version.py +1 -1
phoenix/server/api/routers/v1/utils.py +0 -94
{arize_phoenix-4.10.2rc2.dist-info → arize_phoenix-4.12.0.dist-info}/WHEEL +0 -0
{arize_phoenix-4.10.2rc2.dist-info → arize_phoenix-4.12.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-4.10.2rc2.dist-info → arize_phoenix-4.12.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/routers/v1/datasets.py CHANGED Viewed

@@ -6,7 +6,6 @@ import logging
 import zlib
 from asyncio import QueueFull
 from collections import Counter
-from datetime import datetime
 from enum import Enum
 from functools import partial
 from typing import (
@@ -14,7 +13,6 @@ from typing import (
     Awaitable,
     Callable,
     Coroutine,
-    Dict,
     FrozenSet,
     Iterator,
     List,
@@ -28,17 +26,14 @@ from typing import (
 import pandas as pd
 import pyarrow as pa
-from fastapi import APIRouter, BackgroundTasks, HTTPException, Path, Query
-from fastapi.responses import PlainTextResponse, StreamingResponse
-from pydantic import BaseModel
 from sqlalchemy import and_, delete, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
+from starlette.background import BackgroundTasks
 from starlette.concurrency import run_in_threadpool
 from starlette.datastructures import FormData, UploadFile
 from starlette.requests import Request
-from starlette.responses import Response
+from starlette.responses import JSONResponse, Response
 from starlette.status import (
-    HTTP_200_OK,
     HTTP_204_NO_CONTENT,
     HTTP_404_NOT_FOUND,
     HTTP_409_CONFLICT,
@@ -56,59 +51,79 @@ from phoenix.db.insertion.dataset import (
     ExampleContent,
     add_dataset_examples,
 )
-from phoenix.server.api.types.Dataset import Dataset as DatasetNodeType
+from phoenix.server.api.types.Dataset import Dataset
 from phoenix.server.api.types.DatasetExample import DatasetExample
-from phoenix.server.api.types.DatasetVersion import DatasetVersion as DatasetVersionNodeType
+from phoenix.server.api.types.DatasetVersion import DatasetVersion
 from phoenix.server.api.types.node import from_global_id_with_expected_type
 from phoenix.server.api.utils import delete_projects, delete_traces
-from .dataset_examples import router as dataset_examples_router
-from .utils import (
-    PaginatedResponseBody,
-    ResponseBody,
-    add_errors_to_responses,
-    add_text_csv_content_to_responses,
-)
 logger = logging.getLogger(__name__)
-DATASET_NODE_NAME = DatasetNodeType.__name__
-DATASET_VERSION_NODE_NAME = DatasetVersionNodeType.__name__
-router = APIRouter(tags=["datasets"])
-class Dataset(BaseModel):
-    id: str
-    name: str
-    description: Optional[str]
-    metadata: Dict[str, Any]
-    created_at: datetime
-    updated_at: datetime
-class ListDatasetsResponseBody(PaginatedResponseBody[Dataset]):
-    pass
-@router.get(
-    "/datasets",
-    operation_id="listDatasets",
-    summary="List datasets",
-    responses=add_errors_to_responses([HTTP_422_UNPROCESSABLE_ENTITY]),
-)
-async def list_datasets(
-    request: Request,
-    cursor: Optional[str] = Query(
-        default=None,
-        description="Cursor for pagination",
-    ),
-    name: Optional[str] = Query(default=None, description="An optional dataset name to filter by"),
-    limit: int = Query(
-        default=10, description="The max number of datasets to return at a time.", gt=0
-    ),
-) -> ListDatasetsResponseBody:
+NODE_NAME = "Dataset"
+async def list_datasets(request: Request) -> Response:
+    """
+    summary: List datasets with cursor-based pagination
+    operationId: listDatasets
+    tags:
+      - datasets
+    parameters:
+      - in: query
+        name: cursor
+        required: false
+        schema:
+          type: string
+        description: Cursor for pagination
+      - in: query
+        name: limit
+        required: false
+        schema:
+          type: integer
+          default: 10
+      - in: query
+        name: name
+        required: false
+        schema:
+          type: string
+        description: match by dataset name
+    responses:
+      200:
+        description: A paginated list of datasets
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                next_cursor:
+                  type: string
+                data:
+                  type: array
+                  items:
+                    type: object
+                    properties:
+                      id:
+                        type: string
+                      name:
+                        type: string
+                      description:
+                        type: string
+                      metadata:
+                        type: object
+                      created_at:
+                        type: string
+                        format: date-time
+                      updated_at:
+                        type: string
+                        format: date-time
+      403:
+        description: Forbidden
+      404:
+        description: No datasets found
+    """
+    name = request.query_params.get("name")
+    cursor = request.query_params.get("cursor")
+    limit = int(request.query_params.get("limit", 10))
     async with request.app.state.db() as session:
         query = select(models.Dataset).order_by(models.Dataset.id.desc())
@@ -117,8 +132,8 @@ async def list_datasets(
                 cursor_id = GlobalID.from_id(cursor).node_id
                 query = query.filter(models.Dataset.id <= int(cursor_id))
             except ValueError:
-                raise HTTPException(
-                    detail=f"Invalid cursor format: {cursor}",
+                return Response(
+                    content=f"Invalid cursor format: {cursor}",
                     status_code=HTTP_422_UNPROCESSABLE_ENTITY,
                 )
         if name:
@@ -129,56 +144,67 @@ async def list_datasets(
         datasets = result.scalars().all()
         if not datasets:
-            return ListDatasetsResponseBody(next_cursor=None, data=[])
+            return JSONResponse(content={"next_cursor": None, "data": []}, status_code=200)
         next_cursor = None
         if len(datasets) == limit + 1:
-            next_cursor = str(GlobalID(DATASET_NODE_NAME, str(datasets[-1].id)))
+            next_cursor = str(GlobalID(NODE_NAME, str(datasets[-1].id)))
             datasets = datasets[:-1]
         data = []
         for dataset in datasets:
             data.append(
-                Dataset(
-                    id=str(GlobalID(DATASET_NODE_NAME, str(dataset.id))),
-                    name=dataset.name,
-                    description=dataset.description,
-                    metadata=dataset.metadata_,
-                    created_at=dataset.created_at,
-                    updated_at=dataset.updated_at,
-                )
+                {
+                    "id": str(GlobalID(NODE_NAME, str(dataset.id))),
+                    "name": dataset.name,
+                    "description": dataset.description,
+                    "metadata": dataset.metadata_,
+                    "created_at": dataset.created_at.isoformat(),
+                    "updated_at": dataset.updated_at.isoformat(),
+                }
             )
-        return ListDatasetsResponseBody(next_cursor=next_cursor, data=data)
-@router.delete(
-    "/datasets/{id}",
-    operation_id="deleteDatasetById",
-    summary="Delete dataset by ID",
-    status_code=HTTP_204_NO_CONTENT,
-    responses=add_errors_to_responses(
-        [
-            {"status_code": HTTP_404_NOT_FOUND, "description": "Dataset not found"},
-            {"status_code": HTTP_422_UNPROCESSABLE_ENTITY, "description": "Invalid dataset ID"},
-        ]
-    ),
-)
-async def delete_dataset(
-    request: Request, id: str = Path(description="The ID of the dataset to delete.")
-) -> None:
-    if id:
+        return JSONResponse(content={"next_cursor": next_cursor, "data": data})
+async def delete_dataset_by_id(request: Request) -> Response:
+    """
+    summary: Delete dataset by ID
+    operationId: deleteDatasetById
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        schema:
+          type: string
+    responses:
+      204:
+        description: Success
+      403:
+        description: Forbidden
+      404:
+        description: Dataset not found
+      422:
+        description: Dataset ID is invalid
+    """
+    if id_ := request.path_params.get("id"):
         try:
             dataset_id = from_global_id_with_expected_type(
-                GlobalID.from_id(id),
-                DATASET_NODE_NAME,
+                GlobalID.from_id(id_),
+                Dataset.__name__,
             )
         except ValueError:
-            raise HTTPException(
-                detail=f"Invalid Dataset ID: {id}", status_code=HTTP_422_UNPROCESSABLE_ENTITY
+            return Response(
+                content=f"Invalid Dataset ID: {id_}",
+                status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
     else:
-        raise HTTPException(detail="Missing Dataset ID", status_code=HTTP_422_UNPROCESSABLE_ENTITY)
+        return Response(
+            content="Missing Dataset ID",
+            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+        )
     project_names_stmt = get_project_names_for_datasets(dataset_id)
     eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
     stmt = (
@@ -188,34 +214,59 @@ async def delete_dataset(
         project_names = await session.scalars(project_names_stmt)
         eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
         if (await session.scalar(stmt)) is None:
-            raise HTTPException(detail="Dataset does not exist", status_code=HTTP_404_NOT_FOUND)
+            return Response(content="Dataset does not exist", status_code=HTTP_404_NOT_FOUND)
     tasks = BackgroundTasks()
     tasks.add_task(delete_projects, request.app.state.db, *project_names)
     tasks.add_task(delete_traces, request.app.state.db, *eval_trace_ids)
-class DatasetWithExampleCount(Dataset):
-    example_count: int
-class GetDatasetResponseBody(ResponseBody[DatasetWithExampleCount]):
-    pass
-@router.get(
-    "/datasets/{id}",
-    operation_id="getDataset",
-    summary="Get dataset by ID",
-    responses=add_errors_to_responses([HTTP_404_NOT_FOUND]),
-)
-async def get_dataset(
-    request: Request, id: str = Path(description="The ID of the dataset")
-) -> GetDatasetResponseBody:
-    dataset_id = GlobalID.from_id(id)
-    if (type_name := dataset_id.type_name) != DATASET_NODE_NAME:
-        raise HTTPException(
-            detail=f"ID {dataset_id} refers to a f{type_name}", status_code=HTTP_404_NOT_FOUND
+    return Response(status_code=HTTP_204_NO_CONTENT, background=tasks)
+async def get_dataset_by_id(request: Request) -> Response:
+    """
+    summary: Get dataset by ID
+    operationId: getDatasetById
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        schema:
+          type: string
+    responses:
+      200:
+        description: Success
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                id:
+                  type: string
+                name:
+                  type: string
+                description:
+                  type: string
+                metadata:
+                  type: object
+                created_at:
+                  type: string
+                  format: date-time
+                updated_at:
+                  type: string
+                  format: date-time
+                example_count:
+                  type: integer
+      403:
+        description: Forbidden
+      404:
+        description: Dataset not found
+    """
+    dataset_id = GlobalID.from_id(request.path_params["id"])
+    if (type_name := dataset_id.type_name) != NODE_NAME:
+        return Response(
+            content=f"ID {dataset_id} refers to a f{type_name}", status_code=HTTP_404_NOT_FOUND
         )
     async with request.app.state.db() as session:
         result = await session.execute(
@@ -227,64 +278,97 @@ async def get_dataset(
         dataset = dataset_query[0] if dataset_query else None
         example_count = dataset_query[1] if dataset_query else 0
         if dataset is None:
-            raise HTTPException(
-                detail=f"Dataset with ID {dataset_id} not found", status_code=HTTP_404_NOT_FOUND
+            return Response(
+                content=f"Dataset with ID {dataset_id} not found", status_code=HTTP_404_NOT_FOUND
             )
-        dataset = DatasetWithExampleCount(
-            id=str(dataset_id),
-            name=dataset.name,
-            description=dataset.description,
-            metadata=dataset.metadata_,
-            created_at=dataset.created_at,
-            updated_at=dataset.updated_at,
-            example_count=example_count,
-        )
-        return GetDatasetResponseBody(data=dataset)
-class DatasetVersion(BaseModel):
-    version_id: str
-    description: str
-    metadata: Dict[str, Any]
-    created_at: datetime
-class ListDatasetVersionsResponseBody(PaginatedResponseBody[DatasetVersion]):
-    pass
-@router.get(
-    "/datasets/{id}/versions",
-    operation_id="listDatasetVersionsByDatasetId",
-    summary="List dataset versions",
-    responses=add_errors_to_responses([HTTP_422_UNPROCESSABLE_ENTITY]),
-)
-async def list_dataset_versions(
-    request: Request,
-    id: str = Path(description="The ID of the dataset"),
-    cursor: Optional[str] = Query(
-        default=None,
-        description="Cursor for pagination",
-    ),
-    limit: int = Query(
-        default=10, description="The max number of dataset versions to return at a time", gt=0
-    ),
-) -> ListDatasetVersionsResponseBody:
-    if id:
+        output_dict = {
+            "id": str(dataset_id),
+            "name": dataset.name,
+            "description": dataset.description,
+            "metadata": dataset.metadata_,
+            "created_at": dataset.created_at.isoformat(),
+            "updated_at": dataset.updated_at.isoformat(),
+            "example_count": example_count,
+        }
+        return JSONResponse(content={"data": output_dict})
+async def get_dataset_versions(request: Request) -> Response:
+    """
+    summary: Get dataset versions (sorted from latest to oldest)
+    operationId: getDatasetVersionsByDatasetId
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        description: Dataset ID
+        schema:
+          type: string
+      - in: query
+        name: cursor
+        description: Cursor for pagination.
+        schema:
+          type: string
+      - in: query
+        name: limit
+        description: Maximum number versions to return.
+        schema:
+          type: integer
+          default: 10
+    responses:
+      200:
+        description: Success
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                next_cursor:
+                  type: string
+                data:
+                  type: array
+                  items:
+                    type: object
+                    properties:
+                      version_id:
+                        type: string
+                      description:
+                        type: string
+                      metadata:
+                        type: object
+                      created_at:
+                        type: string
+                        format: date-time
+      403:
+        description: Forbidden
+      422:
+        description: Dataset ID, cursor or limit is invalid.
+    """
+    if id_ := request.path_params.get("id"):
         try:
             dataset_id = from_global_id_with_expected_type(
-                GlobalID.from_id(id),
-                DATASET_NODE_NAME,
+                GlobalID.from_id(id_),
+                Dataset.__name__,
             )
         except ValueError:
-            raise HTTPException(
-                detail=f"Invalid Dataset ID: {id}",
+            return Response(
+                content=f"Invalid Dataset ID: {id_}",
                 status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
     else:
-        raise HTTPException(
-            detail="Missing Dataset ID",
+        return Response(
+            content="Missing Dataset ID",
+            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+        )
+    try:
+        limit = int(request.query_params.get("limit", 10))
+        assert limit > 0
+    except (ValueError, AssertionError):
+        return Response(
+            content="Invalid limit parameter",
             status_code=HTTP_422_UNPROCESSABLE_ENTITY,
         )
     stmt = (
@@ -293,14 +377,15 @@ async def list_dataset_versions(
         .order_by(models.DatasetVersion.id.desc())
         .limit(limit + 1)
     )
-    if cursor:
+    if cursor := request.query_params.get("cursor"):
         try:
             dataset_version_id = from_global_id_with_expected_type(
-                GlobalID.from_id(cursor), DATASET_VERSION_NODE_NAME
+                GlobalID.from_id(cursor),
+                DatasetVersion.__name__,
             )
         except ValueError:
-            raise HTTPException(
-                detail=f"Invalid cursor: {cursor}",
+            return Response(
+                content=f"Invalid cursor: {cursor}",
                 status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
         max_dataset_version_id = (
@@ -311,99 +396,102 @@ async def list_dataset_versions(
         stmt = stmt.filter(models.DatasetVersion.id <= max_dataset_version_id)
     async with request.app.state.db() as session:
         data = [
-            DatasetVersion(
-                version_id=str(GlobalID(DATASET_VERSION_NODE_NAME, str(version.id))),
-                description=version.description,
-                metadata=version.metadata_,
-                created_at=version.created_at,
-            )
-            async for version in await session.stream_scalars(stmt)
-        ]
-    next_cursor = data.pop().version_id if len(data) == limit + 1 else None
-    return ListDatasetVersionsResponseBody(data=data, next_cursor=next_cursor)
-class UploadDatasetData(BaseModel):
-    dataset_id: str
-class UploadDatasetResponseBody(ResponseBody[UploadDatasetData]):
-    pass
-@router.post(
-    "/datasets/upload",
-    operation_id="uploadDataset",
-    summary="Upload dataset from JSON, CSV, or PyArrow",
-    responses=add_errors_to_responses(
-        [
             {
-                "status_code": HTTP_409_CONFLICT,
-                "description": "Dataset of the same name already exists",
-            },
-            {"status_code": HTTP_422_UNPROCESSABLE_ENTITY, "description": "Invalid request body"},
-        ]
-    ),
-    # FastAPI cannot generate the request body portion of the OpenAPI schema for
-    # routes that accept multiple request content types, so we have to provide
-    # this part of the schema manually. For context, see
-    # https://github.com/tiangolo/fastapi/discussions/7786 and
-    # https://github.com/tiangolo/fastapi/issues/990
-    openapi_extra={
-        "requestBody": {
-            "content": {
-                "application/json": {
-                    "schema": {
-                        "type": "object",
-                        "required": ["name", "inputs"],
-                        "properties": {
-                            "action": {"type": "string", "enum": ["create", "append"]},
-                            "name": {"type": "string"},
-                            "description": {"type": "string"},
-                            "inputs": {"type": "array", "items": {"type": "object"}},
-                            "outputs": {"type": "array", "items": {"type": "object"}},
-                            "metadata": {"type": "array", "items": {"type": "object"}},
-                        },
-                    }
-                },
-                "multipart/form-data": {
-                    "schema": {
-                        "type": "object",
-                        "required": ["name", "input_keys[]", "output_keys[]", "file"],
-                        "properties": {
-                            "action": {"type": "string", "enum": ["create", "append"]},
-                            "name": {"type": "string"},
-                            "description": {"type": "string"},
-                            "input_keys[]": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "uniqueItems": True,
-                            },
-                            "output_keys[]": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "uniqueItems": True,
-                            },
-                            "metadata_keys[]": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "uniqueItems": True,
-                            },
-                            "file": {"type": "string", "format": "binary"},
-                        },
-                    }
-                },
+                "version_id": str(GlobalID(DatasetVersion.__name__, str(version.id))),
+                "description": version.description,
+                "metadata": version.metadata_,
+                "created_at": version.created_at.isoformat(),
             }
-        },
-    },
-)
-async def upload_dataset(
-    request: Request,
-    sync: bool = Query(
-        default=False,
-        description="If true, fulfill request synchronously and return JSON containing dataset_id.",
-    ),
-) -> Optional[UploadDatasetResponseBody]:
+            async for version in await session.stream_scalars(stmt)
+        ]
+    next_cursor = data.pop()["version_id"] if len(data) == limit + 1 else None
+    return JSONResponse(content={"next_cursor": next_cursor, "data": data})
+async def post_datasets_upload(request: Request) -> Response:
+    """
+    summary: Upload dataset as either JSON or file (CSV or PyArrow)
+    operationId: uploadDataset
+    tags:
+      - datasets
+    parameters:
+      - in: query
+        name: sync
+        description: If true, fulfill request synchronously and return JSON containing dataset_id
+        schema:
+          type: boolean
+    requestBody:
+      content:
+        application/json:
+          schema:
+            type: object
+            required:
+              - name
+              - inputs
+            properties:
+              action:
+                type: string
+                enum: [create, append]
+              name:
+                type: string
+              description:
+                type: string
+              inputs:
+                type: array
+                items:
+                  type: object
+              outputs:
+                type: array
+                items:
+                  type: object
+              metadata:
+                type: array
+                items:
+                  type: object
+        multipart/form-data:
+          schema:
+            type: object
+            required:
+              - name
+              - input_keys[]
+              - output_keys[]
+              - file
+            properties:
+              action:
+                type: string
+                enum: [create, append]
+              name:
+                type: string
+              description:
+                type: string
+              input_keys[]:
+                type: array
+                items:
+                  type: string
+                uniqueItems: true
+              output_keys[]:
+                type: array
+                items:
+                  type: string
+                uniqueItems: true
+              metadata_keys[]:
+                type: array
+                items:
+                  type: string
+                uniqueItems: true
+              file:
+                type: string
+                format: binary
+    responses:
+      200:
+        description: Success
+      403:
+        description: Forbidden
+      409:
+        description: Dataset of the same name already exists
+      422:
+        description: Request body is invalid
+    """
     request_content_type = request.headers["content-type"]
     examples: Union[Examples, Awaitable[Examples]]
     if request_content_type.startswith("application/json"):
@@ -412,15 +500,15 @@ async def upload_dataset(
                 _process_json, await request.json()
             )
         except ValueError as e:
-            raise HTTPException(
-                detail=str(e),
+            return Response(
+                content=str(e),
                 status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
         if action is DatasetAction.CREATE:
             async with request.app.state.db() as session:
                 if await _check_table_exists(session, name):
-                    raise HTTPException(
-                        detail=f"Dataset with the same name already exists: {name=}",
+                    return Response(
+                        content=f"Dataset with the same name already exists: {name=}",
                         status_code=HTTP_409_CONFLICT,
                     )
     elif request_content_type.startswith("multipart/form-data"):
@@ -436,15 +524,15 @@ async def upload_dataset(
                     file,
                 ) = await _parse_form_data(form)
             except ValueError as e:
-                raise HTTPException(
-                    detail=str(e),
+                return Response(
+                    content=str(e),
                     status_code=HTTP_422_UNPROCESSABLE_ENTITY,
                 )
             if action is DatasetAction.CREATE:
                 async with request.app.state.db() as session:
                     if await _check_table_exists(session, name):
-                        raise HTTPException(
-                            detail=f"Dataset with the same name already exists: {name=}",
+                        return Response(
+                            content=f"Dataset with the same name already exists: {name=}",
                             status_code=HTTP_409_CONFLICT,
                         )
             content = await file.read()
@@ -460,13 +548,13 @@ async def upload_dataset(
             else:
                 assert_never(file_content_type)
         except ValueError as e:
-            raise HTTPException(
-                detail=str(e),
+            return Response(
+                content=str(e),
                 status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
     else:
-        raise HTTPException(
-            detail="Invalid request Content-Type",
+        return Response(
+            content=str("Invalid request Content-Type"),
             status_code=HTTP_422_UNPROCESSABLE_ENTITY,
         )
     operation = cast(
@@ -479,17 +567,19 @@ async def upload_dataset(
             description=description,
         ),
     )
-    if sync:
+    if request.query_params.get("sync") == "true":
         async with request.app.state.db() as session:
             dataset_id = (await operation(session)).dataset_id
-        return UploadDatasetResponseBody(data=UploadDatasetData(dataset_id=str(dataset_id)))
+        return JSONResponse(
+            content={"data": {"dataset_id": str(GlobalID(Dataset.__name__, str(dataset_id)))}}
+        )
     try:
         request.state.enqueue_operation(operation)
     except QueueFull:
         if isinstance(examples, Coroutine):
             examples.close()
-        raise HTTPException(detail="Too many requests.", status_code=HTTP_429_TOO_MANY_REQUESTS)
-    return None
+        return Response(status_code=HTTP_429_TOO_MANY_REQUESTS)
+    return Response()
 class FileContentType(Enum):
@@ -667,125 +757,151 @@ async def _parse_form_data(
     )
-# including the dataset examples router here ensures the dataset example routes
-# are included in a natural order in the openapi schema and the swagger ui
-#
-# todo: move the dataset examples routes here and remove the dataset_examples
-# sub-module
-router.include_router(dataset_examples_router)
-@router.get(
-    "/datasets/{id}/csv",
-    operation_id="getDatasetCsv",
-    summary="Download dataset examples as CSV file",
-    response_class=StreamingResponse,
-    status_code=HTTP_200_OK,
-    responses={
-        **add_errors_to_responses([HTTP_422_UNPROCESSABLE_ENTITY]),
-        **add_text_csv_content_to_responses(HTTP_200_OK),
-    },
-)
-async def get_dataset_csv(
-    request: Request,
-    response: Response,
-    id: str = Path(description="The ID of the dataset"),
-    version_id: Optional[str] = Query(
-        default=None,
-        description=(
-            "The ID of the dataset version " "(if omitted, returns data from the latest version)"
-        ),
-    ),
-) -> Response:
+async def get_dataset_csv(request: Request) -> Response:
+    """
+    summary: Download dataset examples as CSV text file
+    operationId: getDatasetCsv
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        schema:
+          type: string
+        description: Dataset ID
+      - in: query
+        name: version_id
+        schema:
+          type: string
+        description: Dataset version ID. If omitted, returns the latest version.
+    responses:
+      200:
+        description: Success
+        content:
+          text/csv:
+            schema:
+              type: string
+              contentMediaType: text/csv
+              contentEncoding: gzip
+      403:
+        description: Forbidden
+      404:
+        description: Dataset does not exist.
+      422:
+        description: Dataset ID or version ID is invalid.
+    """
     try:
-        async with request.app.state.db() as session:
-            dataset_name, examples = await _get_db_examples(
-                session=session, id=id, version_id=version_id
-            )
+        dataset_name, examples = await _get_db_examples(request)
     except ValueError as e:
-        raise HTTPException(detail=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
+        return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
     content = await run_in_threadpool(_get_content_csv, examples)
     return Response(
         content=content,
         headers={
             "content-disposition": f'attachment; filename="{dataset_name}.csv"',
             "content-type": "text/csv",
+            "content-encoding": "gzip",
         },
     )
-@router.get(
-    "/datasets/{id}/jsonl/openai_ft",
-    operation_id="getDatasetJSONLOpenAIFineTuning",
-    summary="Download dataset examples as OpenAI fine-tuning JSONL file",
-    response_class=PlainTextResponse,
-    responses=add_errors_to_responses(
-        [
-            {
-                "status_code": HTTP_422_UNPROCESSABLE_ENTITY,
-                "description": "Invalid dataset or version ID",
-            }
-        ]
-    ),
-)
-async def get_dataset_jsonl_openai_ft(
-    request: Request,
-    response: Response,
-    id: str = Path(description="The ID of the dataset"),
-    version_id: Optional[str] = Query(
-        default=None,
-        description=(
-            "The ID of the dataset version " "(if omitted, returns data from the latest version)"
-        ),
-    ),
-) -> bytes:
+async def get_dataset_jsonl_openai_ft(request: Request) -> Response:
+    """
+    summary: Download dataset examples as OpenAI Fine-Tuning JSONL file
+    operationId: getDatasetJSONLOpenAIFineTuning
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        schema:
+          type: string
+        description: Dataset ID
+      - in: query
+        name: version_id
+        schema:
+          type: string
+        description: Dataset version ID. If omitted, returns the latest version.
+    responses:
+      200:
+        description: Success
+        content:
+          text/plain:
+            schema:
+              type: string
+              contentMediaType: text/plain
+              contentEncoding: gzip
+      403:
+        description: Forbidden
+      404:
+        description: Dataset does not exist.
+      422:
+        description: Dataset ID or version ID is invalid.
+    """
     try:
-        async with request.app.state.db() as session:
-            dataset_name, examples = await _get_db_examples(
-                session=session, id=id, version_id=version_id
-            )
+        dataset_name, examples = await _get_db_examples(request)
     except ValueError as e:
-        raise HTTPException(detail=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
+        return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
     content = await run_in_threadpool(_get_content_jsonl_openai_ft, examples)
-    response.headers["content-disposition"] = f'attachment; filename="{dataset_name}.jsonl"'
-    return content
+    return Response(
+        content=content,
+        headers={
+            "content-disposition": f'attachment; filename="{dataset_name}.jsonl"',
+            "content-type": "text/plain",
+            "content-encoding": "gzip",
+        },
+    )
-@router.get(
-    "/datasets/{id}/jsonl/openai_evals",
-    operation_id="getDatasetJSONLOpenAIEvals",
-    summary="Download dataset examples as OpenAI evals JSONL file",
-    response_class=PlainTextResponse,
-    responses=add_errors_to_responses(
-        [
-            {
-                "status_code": HTTP_422_UNPROCESSABLE_ENTITY,
-                "description": "Invalid dataset or version ID",
-            }
-        ]
-    ),
-)
-async def get_dataset_jsonl_openai_evals(
-    request: Request,
-    response: Response,
-    id: str = Path(description="The ID of the dataset"),
-    version_id: Optional[str] = Query(
-        default=None,
-        description=(
-            "The ID of the dataset version " "(if omitted, returns data from the latest version)"
-        ),
-    ),
-) -> bytes:
+async def get_dataset_jsonl_openai_evals(request: Request) -> Response:
+    """
+    summary: Download dataset examples as OpenAI Evals JSONL file
+    operationId: getDatasetJSONLOpenAIEvals
+    tags:
+      - datasets
+    parameters:
+      - in: path
+        name: id
+        required: true
+        schema:
+          type: string
+        description: Dataset ID
+      - in: query
+        name: version_id
+        schema:
+          type: string
+        description: Dataset version ID. If omitted, returns the latest version.
+    responses:
+      200:
+        description: Success
+        content:
+          text/plain:
+            schema:
+              type: string
+              contentMediaType: text/plain
+              contentEncoding: gzip
+      403:
+        description: Forbidden
+      404:
+        description: Dataset does not exist.
+      422:
+        description: Dataset ID or version ID is invalid.
+    """
     try:
-        async with request.app.state.db() as session:
-            dataset_name, examples = await _get_db_examples(
-                session=session, id=id, version_id=version_id
-            )
+        dataset_name, examples = await _get_db_examples(request)
     except ValueError as e:
-        raise HTTPException(detail=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
+        return Response(content=str(e), status_code=HTTP_422_UNPROCESSABLE_ENTITY)
     content = await run_in_threadpool(_get_content_jsonl_openai_evals, examples)
-    response.headers["content-disposition"] = f'attachment; filename="{dataset_name}.jsonl"'
-    return content
+    return Response(
+        content=content,
+        headers={
+            "content-disposition": f'attachment; filename="{dataset_name}.jsonl"',
+            "content-type": "text/plain",
+            "content-encoding": "gzip",
+        },
+    )
 def _get_content_csv(examples: List[models.DatasetExampleRevision]) -> bytes:
@@ -801,7 +917,7 @@ def _get_content_csv(examples: List[models.DatasetExampleRevision]) -> bytes:
         }
         for ex in examples
     ]
-    return str(pd.DataFrame.from_records(records).to_csv(index=False)).encode()
+    return gzip.compress(pd.DataFrame.from_records(records).to_csv(index=False).encode())
 def _get_content_jsonl_openai_ft(examples: List[models.DatasetExampleRevision]) -> bytes:
@@ -822,7 +938,7 @@ def _get_content_jsonl_openai_ft(examples: List[models.DatasetExampleRevision])
             ).encode()
         )
     records.seek(0)
-    return records.read()
+    return gzip.compress(records.read())
 def _get_content_jsonl_openai_evals(examples: List[models.DatasetExampleRevision]) -> bytes:
@@ -849,17 +965,18 @@ def _get_content_jsonl_openai_evals(examples: List[models.DatasetExampleRevision
             ).encode()
         )
     records.seek(0)
-    return records.read()
+    return gzip.compress(records.read())
-async def _get_db_examples(
-    *, session: Any, id: str, version_id: Optional[str]
-) -> Tuple[str, List[models.DatasetExampleRevision]]:
-    dataset_id = from_global_id_with_expected_type(GlobalID.from_id(id), DATASET_NODE_NAME)
+async def _get_db_examples(request: Request) -> Tuple[str, List[models.DatasetExampleRevision]]:
+    if not (id_ := request.path_params.get("id")):
+        raise ValueError("Missing Dataset ID")
+    dataset_id = from_global_id_with_expected_type(GlobalID.from_id(id_), Dataset.__name__)
     dataset_version_id: Optional[int] = None
-    if version_id:
+    if version_id := request.query_params.get("version_id"):
         dataset_version_id = from_global_id_with_expected_type(
-            GlobalID.from_id(version_id), DATASET_VERSION_NODE_NAME
+            GlobalID.from_id(version_id),
+            DatasetVersion.__name__,
         )
     latest_version = (
         select(
@@ -892,12 +1009,13 @@ async def _get_db_examples(
         .where(models.DatasetExampleRevision.revision_kind != "DELETE")
         .order_by(models.DatasetExampleRevision.dataset_example_id)
     )
-    dataset_name: Optional[str] = await session.scalar(
-        select(models.Dataset.name).where(models.Dataset.id == dataset_id)
-    )
-    if not dataset_name:
-        raise ValueError("Dataset does not exist.")
-    examples = [r async for r in await session.stream_scalars(stmt)]
+    async with request.app.state.db() as session:
+        dataset_name: Optional[str] = await session.scalar(
+            select(models.Dataset.name).where(models.Dataset.id == dataset_id)
+        )
+        if not dataset_name:
+            raise ValueError("Dataset does not exist.")
+        examples = [r async for r in await session.stream_scalars(stmt)]
     return dataset_name, examples

arize-phoenix 4.10.2rc2__py3-none-any.whl → 4.12.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.10.2rc2py3-none-any.whl → 4.12.0py3-none-any.whl