PyPI - arize-phoenix - Versions diffs - 4.12.1rc1__py3-none-any.whl → 4.15.0__py3-none-any.whl - Mend

arize-phoenix 4.12.1rc1py3-none-any.whl → 4.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (73) hide show

phoenix/server/api/mutations/trace_annotations_mutations.py CHANGED Viewed

@@ -7,10 +7,11 @@ from strawberry.types import Info
 from phoenix.db import models
 from phoenix.server.api.context import Context
-from phoenix.server.api.input_types.CreateTraceAnnotationsInput import CreateTraceAnnotationsInput
+from phoenix.server.api.input_types.CreateTraceAnnotationInput import CreateTraceAnnotationInput
 from phoenix.server.api.input_types.DeleteAnnotationsInput import DeleteAnnotationsInput
-from phoenix.server.api.input_types.PatchAnnotationsInput import PatchAnnotationsInput
+from phoenix.server.api.input_types.PatchAnnotationInput import PatchAnnotationInput
 from phoenix.server.api.mutations.auth import IsAuthenticated
+from phoenix.server.api.queries import Query
 from phoenix.server.api.types.node import from_global_id_with_expected_type
 from phoenix.server.api.types.TraceAnnotation import TraceAnnotation, to_gql_trace_annotation
@@ -18,13 +19,14 @@ from phoenix.server.api.types.TraceAnnotation import TraceAnnotation, to_gql_tra
 @strawberry.type
 class TraceAnnotationMutationPayload:
     trace_annotations: List[TraceAnnotation]
+    query: Query
 @strawberry.type
 class TraceAnnotationMutationMixin:
     @strawberry.mutation(permission_classes=[IsAuthenticated])  # type: ignore
     async def create_trace_annotations(
-        self, info: Info[Context, None], input: List[CreateTraceAnnotationsInput]
+        self, info: Info[Context, None], input: List[CreateTraceAnnotationInput]
     ) -> TraceAnnotationMutationPayload:
         inserted_annotations: Sequence[models.TraceAnnotation] = []
         async with info.context.db() as session:
@@ -35,7 +37,7 @@ class TraceAnnotationMutationMixin:
                     label=annotation.label,
                     score=annotation.score,
                     explanation=annotation.explanation,
-                    annotator_kind=annotation.annotator_kind,
+                    annotator_kind=annotation.annotator_kind.value,
                     metadata_=annotation.metadata,
                 )
                 for annotation in input
@@ -49,12 +51,13 @@ class TraceAnnotationMutationMixin:
         return TraceAnnotationMutationPayload(
             trace_annotations=[
                 to_gql_trace_annotation(annotation) for annotation in inserted_annotations
-            ]
+            ],
+            query=Query(),
         )
     @strawberry.mutation(permission_classes=[IsAuthenticated])  # type: ignore
     async def patch_trace_annotations(
-        self, info: Info[Context, None], input: List[PatchAnnotationsInput]
+        self, info: Info[Context, None], input: List[PatchAnnotationInput]
     ) -> TraceAnnotationMutationPayload:
         patched_annotations = []
         async with info.context.db() as session:
@@ -66,7 +69,13 @@ class TraceAnnotationMutationMixin:
                     column.key: patch_value
                     for column, patch_value, column_is_nullable in (
                         (models.TraceAnnotation.name, annotation.name, False),
-                        (models.TraceAnnotation.annotator_kind, annotation.annotator_kind, False),
+                        (
+                            models.TraceAnnotation.annotator_kind,
+                            annotation.annotator_kind.value
+                            if annotation.annotator_kind is not None
+                            else None,
+                            False,
+                        ),
                         (models.TraceAnnotation.label, annotation.label, True),
                         (models.TraceAnnotation.score, annotation.score, True),
                         (models.TraceAnnotation.explanation, annotation.explanation, True),
@@ -83,7 +92,7 @@ class TraceAnnotationMutationMixin:
                 if trace_annotation:
                     patched_annotations.append(to_gql_trace_annotation(trace_annotation))
-        return TraceAnnotationMutationPayload(trace_annotations=patched_annotations)
+        return TraceAnnotationMutationPayload(trace_annotations=patched_annotations, query=Query())
     @strawberry.mutation(permission_classes=[IsAuthenticated])  # type: ignore
     async def delete_trace_annotations(
@@ -105,4 +114,6 @@ class TraceAnnotationMutationMixin:
             deleted_annotations_gql = [
                 to_gql_trace_annotation(annotation) for annotation in deleted_annotations
             ]
-        return TraceAnnotationMutationPayload(trace_annotations=deleted_annotations_gql)
+        return TraceAnnotationMutationPayload(
+            trace_annotations=deleted_annotations_gql, query=Query()
+        )

phoenix/server/api/routers/v1/datasets.py CHANGED Viewed

@@ -56,12 +56,11 @@ from phoenix.db.insertion.dataset import (
     add_dataset_examples,
 )
 from phoenix.server.api.types.Dataset import Dataset as DatasetNodeType
-from phoenix.server.api.types.DatasetExample import DatasetExample
+from phoenix.server.api.types.DatasetExample import DatasetExample as DatasetExampleNodeType
 from phoenix.server.api.types.DatasetVersion import DatasetVersion as DatasetVersionNodeType
 from phoenix.server.api.types.node import from_global_id_with_expected_type
 from phoenix.server.api.utils import delete_projects, delete_traces
-from .dataset_examples import router as dataset_examples_router
 from .pydantic_compat import V1RoutesBaseModel
 from .utils import (
     PaginatedResponseBody,
@@ -122,7 +121,7 @@ async def list_datasets(
                     status_code=HTTP_422_UNPROCESSABLE_ENTITY,
                 )
         if name:
-            query = query.filter(models.Dataset.name.is_(name))
+            query = query.filter(models.Dataset.name == name)
         query = query.limit(limit + 1)
         result = await session.execute(query)
@@ -669,12 +668,135 @@ async def _parse_form_data(
     )
-# including the dataset examples router here ensures the dataset example routes
-# are included in a natural order in the openapi schema and the swagger ui
-#
-# todo: move the dataset examples routes here and remove the dataset_examples
-# sub-module
-router.include_router(dataset_examples_router)
+class DatasetExample(V1RoutesBaseModel):
+    id: str
+    input: Dict[str, Any]
+    output: Dict[str, Any]
+    metadata: Dict[str, Any]
+    updated_at: datetime
+class ListDatasetExamplesData(V1RoutesBaseModel):
+    dataset_id: str
+    version_id: str
+    examples: List[DatasetExample]
+class ListDatasetExamplesResponseBody(ResponseBody[ListDatasetExamplesData]):
+    pass
+@router.get(
+    "/datasets/{id}/examples",
+    operation_id="getDatasetExamples",
+    summary="Get examples from a dataset",
+    responses=add_errors_to_responses([HTTP_404_NOT_FOUND]),
+)
+async def get_dataset_examples(
+    request: Request,
+    id: str = Path(description="The ID of the dataset"),
+    version_id: Optional[str] = Query(
+        default=None,
+        description=(
+            "The ID of the dataset version " "(if omitted, returns data from the latest version)"
+        ),
+    ),
+) -> ListDatasetExamplesResponseBody:
+    dataset_gid = GlobalID.from_id(id)
+    version_gid = GlobalID.from_id(version_id) if version_id else None
+    if (dataset_type := dataset_gid.type_name) != "Dataset":
+        raise HTTPException(
+            detail=f"ID {dataset_gid} refers to a {dataset_type}", status_code=HTTP_404_NOT_FOUND
+        )
+    if version_gid and (version_type := version_gid.type_name) != "DatasetVersion":
+        raise HTTPException(
+            detail=f"ID {version_gid} refers to a {version_type}", status_code=HTTP_404_NOT_FOUND
+        )
+    async with request.app.state.db() as session:
+        if (
+            resolved_dataset_id := await session.scalar(
+                select(models.Dataset.id).where(models.Dataset.id == int(dataset_gid.node_id))
+            )
+        ) is None:
+            raise HTTPException(
+                detail=f"No dataset with id {dataset_gid} can be found.",
+                status_code=HTTP_404_NOT_FOUND,
+            )
+        # Subquery to find the maximum created_at for each dataset_example_id
+        # timestamp tiebreaks are resolved by the largest id
+        partial_subquery = select(
+            func.max(models.DatasetExampleRevision.id).label("max_id"),
+        ).group_by(models.DatasetExampleRevision.dataset_example_id)
+        if version_gid:
+            if (
+                resolved_version_id := await session.scalar(
+                    select(models.DatasetVersion.id).where(
+                        and_(
+                            models.DatasetVersion.dataset_id == resolved_dataset_id,
+                            models.DatasetVersion.id == int(version_gid.node_id),
+                        )
+                    )
+                )
+            ) is None:
+                raise HTTPException(
+                    detail=f"No dataset version with id {version_id} can be found.",
+                    status_code=HTTP_404_NOT_FOUND,
+                )
+            # if a version_id is provided, filter the subquery to only include revisions from that
+            partial_subquery = partial_subquery.filter(
+                models.DatasetExampleRevision.dataset_version_id <= resolved_version_id
+            )
+        else:
+            if (
+                resolved_version_id := await session.scalar(
+                    select(func.max(models.DatasetVersion.id)).where(
+                        models.DatasetVersion.dataset_id == resolved_dataset_id
+                    )
+                )
+            ) is None:
+                raise HTTPException(
+                    detail="Dataset has no versions.",
+                    status_code=HTTP_404_NOT_FOUND,
+                )
+        subquery = partial_subquery.subquery()
+        # Query for the most recent example revisions that are not deleted
+        query = (
+            select(models.DatasetExample, models.DatasetExampleRevision)
+            .join(
+                models.DatasetExampleRevision,
+                models.DatasetExample.id == models.DatasetExampleRevision.dataset_example_id,
+            )
+            .join(
+                subquery,
+                (subquery.c.max_id == models.DatasetExampleRevision.id),
+            )
+            .filter(models.DatasetExample.dataset_id == resolved_dataset_id)
+            .filter(models.DatasetExampleRevision.revision_kind != "DELETE")
+            .order_by(models.DatasetExample.id.asc())
+        )
+        examples = [
+            DatasetExample(
+                id=str(GlobalID("DatasetExample", str(example.id))),
+                input=revision.input,
+                output=revision.output,
+                metadata=revision.metadata_,
+                updated_at=revision.created_at,
+            )
+            async for example, revision in await session.stream(query)
+        ]
+    return ListDatasetExamplesResponseBody(
+        data=ListDatasetExamplesData(
+            dataset_id=str(GlobalID("Dataset", str(resolved_dataset_id))),
+            version_id=str(GlobalID("DatasetVersion", str(resolved_version_id))),
+            examples=examples,
+        )
+    )
 @router.get(
@@ -794,7 +916,7 @@ def _get_content_csv(examples: List[models.DatasetExampleRevision]) -> bytes:
     records = [
         {
             "example_id": GlobalID(
-                type_name=DatasetExample.__name__,
+                type_name=DatasetExampleNodeType.__name__,
                 node_id=str(ex.dataset_example_id),
             ),
             **{f"input_{k}": v for k, v in ex.input.items()},

phoenix/server/api/routers/v1/evaluations.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gzip
 from itertools import chain
-from typing import AsyncContextManager, Callable, Iterator, Optional, Tuple
+from typing import Iterator, Optional, Tuple
 import pandas as pd
 import pyarrow as pa
@@ -9,9 +9,6 @@ from google.protobuf.message import DecodeError
 from pandas import DataFrame
 from sqlalchemy import select
 from sqlalchemy.engine import Connectable
-from sqlalchemy.ext.asyncio import (
-    AsyncSession,
-)
 from starlette.background import BackgroundTask
 from starlette.datastructures import State
 from starlette.requests import Request
@@ -29,6 +26,7 @@ from phoenix.config import DEFAULT_PROJECT_NAME
 from phoenix.db import models
 from phoenix.exceptions import PhoenixEvaluationNameIsMissing
 from phoenix.server.api.routers.utils import table_to_bytes
+from phoenix.server.types import DbSessionFactory
 from phoenix.session.evaluation import encode_evaluations
 from phoenix.trace.span_evaluations import (
     DocumentEvaluations,
@@ -128,7 +126,7 @@ async def get_evaluations(
         or DEFAULT_PROJECT_NAME
     )
-    db: Callable[[], AsyncContextManager[AsyncSession]] = request.app.state.db
+    db: DbSessionFactory = request.app.state.db
     async with db() as session:
         connection = await session.connection()
         trace_evals_dataframe = await connection.run_sync(

phoenix/server/api/routers/v1/experiments.py CHANGED Viewed

@@ -110,7 +110,7 @@ async def create_experiment(
             )
         except ValueError:
             raise HTTPException(
-                detail="DatasetVersion with ID {dataset_version_globalid} does not exist",
+                detail=f"DatasetVersion with ID {dataset_version_globalid_str} does not exist",
                 status_code=HTTP_404_NOT_FOUND,
             )

phoenix/server/api/types/Experiment.py CHANGED Viewed

@@ -104,11 +104,11 @@ class Experiment(Node):
         return await info.context.data_loaders.experiment_error_rates.load(self.id_attr)
     @strawberry.field
-    async def average_run_latency_ms(self, info: Info[Context, None]) -> float:
+    async def average_run_latency_ms(self, info: Info[Context, None]) -> Optional[float]:
         latency_seconds = await info.context.data_loaders.average_experiment_run_latency.load(
             self.id_attr
         )
-        return latency_seconds * 1000
+        return latency_seconds * 1000 if latency_seconds is not None else None
     @strawberry.field
     async def project(self, info: Info[Context, None]) -> Optional[Project]:

phoenix/server/api/types/Inferences.py CHANGED Viewed

@@ -2,8 +2,7 @@ from datetime import datetime
 from typing import Iterable, List, Optional, Set, Union
 import strawberry
-from strawberry.scalars import ID
-from strawberry.unset import UNSET
+from strawberry import ID, UNSET
 import phoenix.core.model_schema as ms
 from phoenix.core.model_schema import FEATURE, TAG, ScalarDimension

phoenix/server/api/types/Model.py CHANGED Viewed

@@ -2,9 +2,8 @@ import asyncio
 from typing import List, Optional
 import strawberry
+from strawberry import UNSET, Info
 from strawberry.relay import Connection
-from strawberry.types import Info
-from strawberry.unset import UNSET
 from typing_extensions import Annotated
 from phoenix.config import get_exported_files

phoenix/server/api/types/Span.py CHANGED Viewed

@@ -258,6 +258,11 @@ class Span(Node):
         project = await info.context.data_loaders.span_projects.load(span_id)
         return to_gql_project(project)
+    @strawberry.field(description="Indicates if the span is contained in any dataset")  # type: ignore
+    async def contained_in_dataset(self, info: Info[Context, None]) -> bool:
+        examples = await info.context.data_loaders.span_dataset_examples.load(self.id_attr)
+        return bool(examples)
 def to_gql_span(span: models.Span) -> Span:
     events: List[SpanEvent] = list(map(SpanEvent.from_dict, span.events))

phoenix/server/api/utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
-from typing import AsyncContextManager, Callable, List
+from typing import List
 from sqlalchemy import delete
-from sqlalchemy.ext.asyncio import AsyncSession
 from phoenix.db import models
+from phoenix.server.types import DbSessionFactory
 async def delete_projects(
-    db: Callable[[], AsyncContextManager[AsyncSession]],
+    db: DbSessionFactory,
     *project_names: str,
 ) -> List[int]:
     if not project_names:
@@ -22,7 +22,7 @@ async def delete_projects(
 async def delete_traces(
-    db: Callable[[], AsyncContextManager[AsyncSession]],
+    db: DbSessionFactory,
     *trace_ids: str,
 ) -> List[int]:
     if not trace_ids:

phoenix/server/app.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 import contextlib
 import json
 import logging
@@ -74,6 +75,7 @@ from phoenix.server.api.dataloaders import (
     ProjectByNameDataLoader,
     RecordCountDataLoader,
     SpanAnnotationsDataLoader,
+    SpanDatasetExamplesDataLoader,
     SpanDescendantsDataLoader,
     SpanEvaluationsDataLoader,
     SpanProjectsDataLoader,
@@ -86,7 +88,9 @@ from phoenix.server.api.routers.v1 import router as v1_router
 from phoenix.server.api.schema import schema
 from phoenix.server.grpc_server import GrpcServer
 from phoenix.server.telemetry import initialize_opentelemetry_tracer_provider
+from phoenix.server.types import DbSessionFactory
 from phoenix.trace.schemas import Span
+from phoenix.utilities.client import PHOENIX_SERVER_VERSION_HEADER
 if TYPE_CHECKING:
     from opentelemetry.trace import TracerProvider
@@ -167,9 +171,11 @@ class HeadersMiddleware(BaseHTTPMiddleware):
         request: Request,
         call_next: RequestResponseEndpoint,
     ) -> Response:
+        from phoenix import __version__ as phoenix_version
         response = await call_next(request)
         response.headers["x-colab-notebook-cache-control"] = "no-cache"
-        response.headers["Cache-Control"] = "no-store"
+        response.headers[PHOENIX_SERVER_VERSION_HEADER] = phoenix_version
         return response
@@ -193,19 +199,25 @@ async def version() -> PlainTextResponse:
     return PlainTextResponse(f"{phoenix.__version__}")
+DB_MUTEX: Optional[asyncio.Lock] = None
 def _db(engine: AsyncEngine) -> Callable[[], AsyncContextManager[AsyncSession]]:
     Session = async_sessionmaker(engine, expire_on_commit=False)
     @contextlib.asynccontextmanager
     async def factory() -> AsyncIterator[AsyncSession]:
-        async with Session.begin() as session:
-            yield session
+        async with contextlib.AsyncExitStack() as stack:
+            if DB_MUTEX:
+                await stack.enter_async_context(DB_MUTEX)
+            yield await stack.enter_async_context(Session.begin())
     return factory
 def _lifespan(
     *,
+    dialect: SupportedSQLDialect,
     bulk_inserter: BulkInserter,
     tracer_provider: Optional["TracerProvider"] = None,
     enable_prometheus: bool = False,
@@ -214,6 +226,8 @@ def _lifespan(
 ) -> StatefulLifespan[FastAPI]:
     @contextlib.asynccontextmanager
     async def lifespan(_: FastAPI) -> AsyncIterator[Dict[str, Any]]:
+        global DB_MUTEX
+        DB_MUTEX = asyncio.Lock() if dialect is SupportedSQLDialect.SQLITE else None
         async with bulk_inserter as (
             queue_span,
             queue_evaluation,
@@ -243,7 +257,7 @@ async def check_healthz(_: Request) -> PlainTextResponse:
 def create_graphql_router(
     *,
     schema: BaseSchema,
-    db: Callable[[], AsyncContextManager[AsyncSession]],
+    db: DbSessionFactory,
     model: Model,
     export_path: Path,
     corpus: Optional[Model] = None,
@@ -297,6 +311,7 @@ def create_graphql_router(
                     cache_map=cache_for_dataloaders.record_count if cache_for_dataloaders else None,
                 ),
                 span_annotations=SpanAnnotationsDataLoader(db),
+                span_dataset_examples=SpanDatasetExamplesDataLoader(db),
                 span_descendants=SpanDescendantsDataLoader(db),
                 span_evaluations=SpanEvaluationsDataLoader(db),
                 span_projects=SpanProjectsDataLoader(db),
@@ -321,19 +336,6 @@ def create_graphql_router(
     )
-class SessionFactory:
-    def __init__(
-        self,
-        session_factory: Callable[[], AsyncContextManager[AsyncSession]],
-        dialect: str,
-    ):
-        self.session_factory = session_factory
-        self.dialect = SupportedSQLDialect(dialect)
-    def __call__(self) -> AsyncContextManager[AsyncSession]:
-        return self.session_factory()
 def create_engine_and_run_migrations(
     database_url: str,
 ) -> AsyncEngine:
@@ -382,7 +384,7 @@ async def plain_text_http_exception_handler(request: Request, exc: HTTPException
 def create_app(
-    db: SessionFactory,
+    db: DbSessionFactory,
     export_path: Path,
     model: Model,
     umap_params: UMAPParameters,
@@ -463,6 +465,7 @@ def create_app(
         title="Arize-Phoenix REST API",
         version=REST_API_VERSION,
         lifespan=_lifespan(
+            dialect=db.dialect,
             read_only=read_only,
             bulk_inserter=bulk_inserter,
             tracer_provider=tracer_provider,

phoenix/server/grpc_server.py CHANGED Viewed

@@ -23,7 +23,7 @@ if TYPE_CHECKING:
 ProjectName: TypeAlias = str
-class Servicer(TraceServiceServicer):
+class Servicer(TraceServiceServicer):  # type:ignore
     def __init__(
         self,
         callback: Callable[[Span, ProjectName], Awaitable[None]],
@@ -78,7 +78,7 @@ class GrpcServer:
             interceptors=interceptors,
         )
         server.add_insecure_port(f"[::]:{get_env_grpc_port()}")
-        add_TraceServiceServicer_to_server(Servicer(self._callback), server)  # type: ignore
+        add_TraceServiceServicer_to_server(Servicer(self._callback), server)
         await server.start()
         self._server = server

phoenix/server/main.py CHANGED Viewed

@@ -33,25 +33,23 @@ from phoenix.pointcloud.umap_parameters import (
     UMAPParameters,
 )
 from phoenix.server.app import (
-    SessionFactory,
     _db,
     create_app,
     create_engine_and_run_migrations,
     instrument_engine_if_enabled,
 )
+from phoenix.server.types import DbSessionFactory
 from phoenix.settings import Settings
 from phoenix.trace.fixtures import (
     TRACES_FIXTURES,
-    download_traces_fixture,
     get_dataset_fixtures,
     get_evals_from_fixture,
-    get_trace_fixture_by_name,
+    load_example_traces,
     reset_fixture_span_ids_and_timestamps,
     send_dataset_fixtures,
 )
 from phoenix.trace.otel import decode_otlp_span, encode_span_to_otlp
 from phoenix.trace.schemas import Span
-from phoenix.trace.span_json_decoder import json_string_to_span
 logger = logging.getLogger(__name__)
@@ -221,10 +219,8 @@ if __name__ == "__main__":
             (
                 # Apply `encode` here because legacy jsonl files contains UUIDs as strings.
                 # `encode` removes the hyphens in the UUIDs.
-                decode_otlp_span(encode_span_to_otlp(json_string_to_span(json_span)))
-                for json_span in download_traces_fixture(
-                    get_trace_fixture_by_name(trace_dataset_name)
-                )
+                decode_otlp_span(encode_span_to_otlp(span))
+                for span in load_example_traces(trace_dataset_name).to_spans()
             ),
             get_evals_from_fixture(trace_dataset_name),
         )
@@ -250,7 +246,7 @@ if __name__ == "__main__":
     working_dir = get_working_dir().resolve()
     engine = create_engine_and_run_migrations(db_connection_str)
     instrumentation_cleanups = instrument_engine_if_enabled(engine)
-    factory = SessionFactory(session_factory=_db(engine), dialect=engine.dialect.name)
+    factory = DbSessionFactory(db=_db(engine), dialect=engine.dialect.name)
     app = create_app(
         db=factory,
         export_path=export_path,

arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.15.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.12.1rc1py3-none-any.whl → 4.15.0py3-none-any.whl