PyPI - arize-phoenix - Versions diffs - 4.14.1__py3-none-any.whl → 4.16.0__py3-none-any.whl - Mend

arize-phoenix 4.14.1py3-none-any.whl → 4.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (85) hide show

{arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/METADATA +5 -3
{arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/RECORD +81 -71
phoenix/db/bulk_inserter.py +131 -5
phoenix/db/engines.py +2 -1
phoenix/db/helpers.py +23 -1
phoenix/db/insertion/constants.py +2 -0
phoenix/db/insertion/document_annotation.py +157 -0
phoenix/db/insertion/helpers.py +13 -0
phoenix/db/insertion/span_annotation.py +144 -0
phoenix/db/insertion/trace_annotation.py +144 -0
phoenix/db/insertion/types.py +261 -0
phoenix/experiments/functions.py +3 -2
phoenix/experiments/types.py +3 -3
phoenix/server/api/context.py +7 -9
phoenix/server/api/dataloaders/__init__.py +2 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +3 -3
phoenix/server/api/dataloaders/dataset_example_revisions.py +2 -4
phoenix/server/api/dataloaders/dataset_example_spans.py +2 -4
phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -4
phoenix/server/api/dataloaders/document_evaluations.py +2 -4
phoenix/server/api/dataloaders/document_retrieval_metrics.py +2 -4
phoenix/server/api/dataloaders/evaluation_summaries.py +2 -4
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +2 -4
phoenix/server/api/dataloaders/experiment_error_rates.py +2 -4
phoenix/server/api/dataloaders/experiment_run_counts.py +2 -4
phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -4
phoenix/server/api/dataloaders/latency_ms_quantile.py +2 -3
phoenix/server/api/dataloaders/min_start_or_max_end_times.py +2 -4
phoenix/server/api/dataloaders/project_by_name.py +3 -3
phoenix/server/api/dataloaders/record_counts.py +2 -4
phoenix/server/api/dataloaders/span_annotations.py +2 -4
phoenix/server/api/dataloaders/span_dataset_examples.py +36 -0
phoenix/server/api/dataloaders/span_descendants.py +2 -4
phoenix/server/api/dataloaders/span_evaluations.py +2 -4
phoenix/server/api/dataloaders/span_projects.py +3 -3
phoenix/server/api/dataloaders/token_counts.py +2 -4
phoenix/server/api/dataloaders/trace_evaluations.py +2 -4
phoenix/server/api/dataloaders/trace_row_ids.py +2 -4
phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
phoenix/server/api/mutations/span_annotations_mutations.py +8 -3
phoenix/server/api/mutations/trace_annotations_mutations.py +8 -3
phoenix/server/api/openapi/main.py +18 -2
phoenix/server/api/openapi/schema.py +12 -12
phoenix/server/api/routers/v1/__init__.py +36 -83
phoenix/server/api/routers/v1/datasets.py +515 -509
phoenix/server/api/routers/v1/evaluations.py +164 -73
phoenix/server/api/routers/v1/experiment_evaluations.py +68 -91
phoenix/server/api/routers/v1/experiment_runs.py +98 -155
phoenix/server/api/routers/v1/experiments.py +132 -181
phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
phoenix/server/api/routers/v1/spans.py +164 -203
phoenix/server/api/routers/v1/traces.py +134 -159
phoenix/server/api/routers/v1/utils.py +95 -0
phoenix/server/api/types/Span.py +27 -3
phoenix/server/api/types/Trace.py +21 -4
phoenix/server/api/utils.py +4 -4
phoenix/server/app.py +172 -192
phoenix/server/grpc_server.py +2 -2
phoenix/server/main.py +5 -9
phoenix/server/static/.vite/manifest.json +31 -31
phoenix/server/static/assets/components-Ci5kMOk5.js +1175 -0
phoenix/server/static/assets/{index-CQgXRwU0.js → index-BQG5WVX7.js} +2 -2
phoenix/server/static/assets/{pages-hdjlFZhO.js → pages-BrevprVW.js} +451 -275
phoenix/server/static/assets/{vendor-DPvSDRn3.js → vendor-CP0b0YG0.js} +2 -2
phoenix/server/static/assets/{vendor-arizeai-CkvPT67c.js → vendor-arizeai-DTbiPGp6.js} +27 -27
phoenix/server/static/assets/vendor-codemirror-DtdPDzrv.js +15 -0
phoenix/server/static/assets/{vendor-recharts-5jlNaZuF.js → vendor-recharts-A0DA1O99.js} +1 -1
phoenix/server/thread_server.py +2 -2
phoenix/server/types.py +18 -0
phoenix/session/client.py +5 -3
phoenix/session/session.py +2 -2
phoenix/trace/dsl/filter.py +2 -6
phoenix/trace/fixtures.py +17 -23
phoenix/trace/utils.py +23 -0
phoenix/utilities/client.py +116 -0
phoenix/utilities/project.py +1 -1
phoenix/version.py +1 -1
phoenix/server/api/routers/v1/dataset_examples.py +0 -178
phoenix/server/openapi/docs.py +0 -221
phoenix/server/static/assets/components-DeS0YEmv.js +0 -1142
phoenix/server/static/assets/vendor-codemirror-Cqwpwlua.js +0 -12
{arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/WHEEL +0 -0
{arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/routers/v1/evaluations.py CHANGED Viewed

@@ -1,22 +1,20 @@
 import gzip
 from itertools import chain
-from typing import AsyncContextManager, Callable, Iterator, Tuple
+from typing import Any, Callable, Iterator, Optional, Tuple, Union, cast
 import pandas as pd
 import pyarrow as pa
+from fastapi import APIRouter, Header, HTTPException, Query
 from google.protobuf.message import DecodeError
 from pandas import DataFrame
 from sqlalchemy import select
 from sqlalchemy.engine import Connectable
-from sqlalchemy.ext.asyncio import (
-    AsyncSession,
-)
 from starlette.background import BackgroundTask
 from starlette.datastructures import State
 from starlette.requests import Request
 from starlette.responses import Response, StreamingResponse
 from starlette.status import (
-    HTTP_403_FORBIDDEN,
+    HTTP_204_NO_CONTENT,
     HTTP_404_NOT_FOUND,
     HTTP_415_UNSUPPORTED_MEDIA_TYPE,
     HTTP_422_UNPROCESSABLE_ENTITY,
@@ -26,9 +24,10 @@ from typing_extensions import TypeAlias
 import phoenix.trace.v1 as pb
 from phoenix.config import DEFAULT_PROJECT_NAME
 from phoenix.db import models
+from phoenix.db.insertion.types import Precursors
 from phoenix.exceptions import PhoenixEvaluationNameIsMissing
 from phoenix.server.api.routers.utils import table_to_bytes
-from phoenix.session.evaluation import encode_evaluations
+from phoenix.server.types import DbSessionFactory
 from phoenix.trace.span_evaluations import (
     DocumentEvaluations,
     Evaluations,
@@ -36,92 +35,98 @@ from phoenix.trace.span_evaluations import (
     TraceEvaluations,
 )
+from .utils import add_errors_to_responses
 EvaluationName: TypeAlias = str
+router = APIRouter(tags=["traces"], include_in_schema=False)
-async def post_evaluations(request: Request) -> Response:
-    """
-    summary: Add evaluations to a span, trace, or document
-    operationId: addEvaluations
-    tags:
-      - private
-    requestBody:
-      required: true
-      content:
-        application/x-protobuf:
-          schema:
-            type: string
-            format: binary
-        application/x-pandas-arrow:
-          schema:
-            type: string
-            format: binary
-    responses:
-      200:
-        description: Success
-      403:
-        description: Forbidden
-      415:
-        description: Unsupported content type, only gzipped protobuf and pandas-arrow are supported
-      422:
-        description: Request body is invalid
-    """
-    if request.app.state.read_only:
-        return Response(status_code=HTTP_403_FORBIDDEN)
-    content_type = request.headers.get("content-type")
+@router.post(
+    "/evaluations",
+    operation_id="addEvaluations",
+    summary="Add span, trace, or document evaluations",
+    status_code=HTTP_204_NO_CONTENT,
+    responses=add_errors_to_responses(
+        [
+            {
+                "status_code": HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+                "description": (
+                    "Unsupported content type, "
+                    "only gzipped protobuf and pandas-arrow are supported"
+                ),
+            },
+            HTTP_422_UNPROCESSABLE_ENTITY,
+        ]
+    ),
+    openapi_extra={
+        "requestBody": {
+            "required": True,
+            "content": {
+                "application/x-protobuf": {"schema": {"type": "string", "format": "binary"}},
+                "application/x-pandas-arrow": {"schema": {"type": "string", "format": "binary"}},
+            },
+        },
+    },
+)
+async def post_evaluations(
+    request: Request,
+    content_type: Optional[str] = Header(default=None),
+    content_encoding: Optional[str] = Header(default=None),
+) -> Response:
     if content_type == "application/x-pandas-arrow":
         return await _process_pyarrow(request)
     if content_type != "application/x-protobuf":
-        return Response("Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
+        raise HTTPException(
+            detail="Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
+        )
     body = await request.body()
-    content_encoding = request.headers.get("content-encoding")
     if content_encoding == "gzip":
         body = gzip.decompress(body)
     elif content_encoding:
-        return Response("Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
+        raise HTTPException(
+            detail="Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
+        )
     evaluation = pb.Evaluation()
     try:
         evaluation.ParseFromString(body)
     except DecodeError:
-        return Response("Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY)
+        raise HTTPException(
+            detail="Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY
+        )
     if not evaluation.name.strip():
-        return Response(
-            "Evaluation name must not be blank/empty",
+        raise HTTPException(
+            detail="Evaluation name must not be blank/empty",
             status_code=HTTP_422_UNPROCESSABLE_ENTITY,
         )
     await request.state.queue_evaluation_for_bulk_insert(evaluation)
     return Response()
-async def get_evaluations(request: Request) -> Response:
-    """
-    summary: Get evaluations from Phoenix
-    operationId: getEvaluation
-    tags:
-      - private
-    parameters:
-      - name: project_name
-        in: query
-        schema:
-          type: string
-          default: default
-        description: The project name to get evaluations from
-    responses:
-      200:
-        description: Success
-      403:
-        description: Forbidden
-      404:
-        description: Not found
-    """
+@router.get(
+    "/evaluations",
+    operation_id="getEvaluations",
+    summary="Get span, trace, or document evaluations from a project",
+    responses=add_errors_to_responses([HTTP_404_NOT_FOUND]),
+)
+async def get_evaluations(
+    request: Request,
+    project_name: Optional[str] = Query(
+        default=None,
+        description=(
+            "The name of the project to get evaluations from (if omitted, "
+            f"evaluations will be drawn from the `{DEFAULT_PROJECT_NAME}` project)"
+        ),
+    ),
+) -> Response:
     project_name = (
-        request.query_params.get("project_name")
+        project_name
         or request.query_params.get("project-name")  # for backward compatibility
         or request.headers.get("project-name")  # read from headers for backwards compatibility
         or DEFAULT_PROJECT_NAME
     )
-    db: Callable[[], AsyncContextManager[AsyncSession]] = request.app.state.db
+    db: DbSessionFactory = request.app.state.db
     async with db() as session:
         connection = await session.connection()
         trace_evals_dataframe = await connection.run_sync(
@@ -169,28 +174,114 @@ async def _process_pyarrow(request: Request) -> Response:
     try:
         reader = pa.ipc.open_stream(body)
     except pa.ArrowInvalid:
-        return Response(
-            content="Request body is not valid pyarrow",
+        raise HTTPException(
+            detail="Request body is not valid pyarrow",
             status_code=HTTP_422_UNPROCESSABLE_ENTITY,
         )
     try:
         evaluations = Evaluations.from_pyarrow_reader(reader)
     except Exception as e:
         if isinstance(e, PhoenixEvaluationNameIsMissing):
-            return Response(
-                "Evaluation name must not be blank/empty",
+            raise HTTPException(
+                detail="Evaluation name must not be blank/empty",
                 status_code=HTTP_422_UNPROCESSABLE_ENTITY,
             )
-        return Response(
-            content="Invalid data in request body",
+        raise HTTPException(
+            detail="Invalid data in request body",
             status_code=HTTP_422_UNPROCESSABLE_ENTITY,
         )
     return Response(background=BackgroundTask(_add_evaluations, request.state, evaluations))
 async def _add_evaluations(state: State, evaluations: Evaluations) -> None:
-    for evaluation in encode_evaluations(evaluations):
-        await state.queue_evaluation_for_bulk_insert(evaluation)
+    dataframe = evaluations.dataframe
+    eval_name = evaluations.eval_name
+    names = dataframe.index.names
+    if (
+        len(names) == 2
+        and "document_position" in names
+        and ("context.span_id" in names or "span_id" in names)
+    ):
+        cls = _document_annotation_factory(
+            names.index("span_id") if "span_id" in names else names.index("context.span_id"),
+            names.index("document_position"),
+        )
+        for index, row in dataframe.iterrows():
+            score, label, explanation = _get_annotation_result(row)
+            document_annotation = cls(cast(Union[Tuple[str, int], Tuple[int, str]], index))(
+                name=eval_name,
+                annotator_kind="LLM",
+                score=score,
+                label=label,
+                explanation=explanation,
+                metadata_={},
+            )
+            await state.enqueue(document_annotation)
+    elif len(names) == 1 and names[0] in ("context.span_id", "span_id"):
+        for index, row in dataframe.iterrows():
+            score, label, explanation = _get_annotation_result(row)
+            span_annotation = _span_annotation_factory(cast(str, index))(
+                name=eval_name,
+                annotator_kind="LLM",
+                score=score,
+                label=label,
+                explanation=explanation,
+                metadata_={},
+            )
+            await state.enqueue(span_annotation)
+    elif len(names) == 1 and names[0] in ("context.trace_id", "trace_id"):
+        for index, row in dataframe.iterrows():
+            score, label, explanation = _get_annotation_result(row)
+            trace_annotation = _trace_annotation_factory(cast(str, index))(
+                name=eval_name,
+                annotator_kind="LLM",
+                score=score,
+                label=label,
+                explanation=explanation,
+                metadata_={},
+            )
+            await state.enqueue(trace_annotation)
+def _get_annotation_result(
+    row: "pd.Series[Any]",
+) -> Tuple[Optional[float], Optional[str], Optional[str]]:
+    return (
+        cast(Optional[float], row.get("score")),
+        cast(Optional[str], row.get("label")),
+        cast(Optional[str], row.get("explanation")),
+    )
+def _document_annotation_factory(
+    span_id_idx: int,
+    document_position_idx: int,
+) -> Callable[
+    [Union[Tuple[str, int], Tuple[int, str]]],
+    Callable[..., Precursors.DocumentAnnotation],
+]:
+    return lambda index: lambda **kwargs: Precursors.DocumentAnnotation(
+        span_id=str(index[span_id_idx]),
+        document_position=int(index[document_position_idx]),
+        obj=models.DocumentAnnotation(
+            document_position=int(index[document_position_idx]),
+            **kwargs,
+        ),
+    )
+def _span_annotation_factory(span_id: str) -> Callable[..., Precursors.SpanAnnotation]:
+    return lambda **kwargs: Precursors.SpanAnnotation(
+        span_id=str(span_id),
+        obj=models.SpanAnnotation(**kwargs),
+    )
+def _trace_annotation_factory(trace_id: str) -> Callable[..., Precursors.TraceAnnotation]:
+    return lambda **kwargs: Precursors.TraceAnnotation(
+        trace_id=str(trace_id),
+        obj=models.TraceAnnotation(**kwargs),
+    )
 def _read_sql_trace_evaluations_into_dataframe(

phoenix/server/api/routers/v1/experiment_evaluations.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from datetime import datetime
+from typing import Any, Dict, Literal, Optional
+from fastapi import APIRouter, HTTPException
+from pydantic import Field
 from starlette.requests import Request
-from starlette.responses import JSONResponse, Response
 from starlette.status import HTTP_404_NOT_FOUND
 from strawberry.relay import GlobalID
@@ -10,103 +12,76 @@ from phoenix.db.helpers import SupportedSQLDialect
 from phoenix.db.insertion.helpers import insert_on_conflict
 from phoenix.server.api.types.node import from_global_id_with_expected_type
+from .pydantic_compat import V1RoutesBaseModel
+from .utils import ResponseBody, add_errors_to_responses
-async def upsert_experiment_evaluation(request: Request) -> Response:
-    """
-    summary: Create an evaluation for a specific experiment run
-    operationId: upsertExperimentEvaluation
-    tags:
-      - private
-    requestBody:
-      description: Details of the experiment evaluation to be upserted
-      required: true
-      content:
-        application/json:
-          schema:
-            type: object
-            properties:
-              experiment_run_id:
-                type: string
-                description: The ID of the experiment run being evaluated
-              name:
-                type: string
-                description: The name of the evaluation
-              annotator_kind:
-                type: string
-                description: The kind of annotator used for the evaluation
-              result:
-                type: object
-                description: The result of the evaluation
-                properties:
-                  label:
-                    type: string
-                    description: The label assigned by the evaluation
-                  score:
-                    type: number
-                    format: float
-                    description: The score assigned by the evaluation
-                  explanation:
-                    type: string
-                    description: Explanation of the evaluation result
-              error:
-                type: string
-                description: Optional error message if the evaluation encountered an error
-              metadata:
-                type: object
-                description: Metadata for the evaluation
-                additionalProperties:
-                  type: string
-              start_time:
-                type: string
-                format: date-time
-                description: The start time of the evaluation in ISO format
-              end_time:
-                type: string
-                format: date-time
-                description: The end time of the evaluation in ISO format
-              trace_id:
-                type: string
-                description: Optional trace ID for tracking
-            required:
-              - experiment_run_id
-              - name
-              - annotator_kind
-              - start_time
-              - end_time
-    responses:
-      200:
-        description: Experiment evaluation upserted successfully
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                data:
-                  type: object
-                  properties:
-                    id:
-                      type: string
-                      description: The ID of the upserted experiment evaluation
-      404:
-        description: ExperimentRun not found
-    """
+router = APIRouter(tags=["experiments"], include_in_schema=False)
+class ExperimentEvaluationResult(V1RoutesBaseModel):
+    label: Optional[str] = Field(default=None, description="The label assigned by the evaluation")
+    score: Optional[float] = Field(default=None, description="The score assigned by the evaluation")
+    explanation: Optional[str] = Field(
+        default=None, description="Explanation of the evaluation result"
+    )
+class UpsertExperimentEvaluationRequestBody(V1RoutesBaseModel):
+    experiment_run_id: str = Field(description="The ID of the experiment run being evaluated")
+    name: str = Field(description="The name of the evaluation")
+    annotator_kind: Literal["LLM", "CODE", "HUMAN"] = Field(
+        description="The kind of annotator used for the evaluation"
+    )
+    start_time: datetime = Field(description="The start time of the evaluation in ISO format")
+    end_time: datetime = Field(description="The end time of the evaluation in ISO format")
+    result: ExperimentEvaluationResult = Field(description="The result of the evaluation")
+    error: Optional[str] = Field(
+        None, description="Optional error message if the evaluation encountered an error"
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        default=None, description="Metadata for the evaluation"
+    )
+    trace_id: Optional[str] = Field(default=None, description="Optional trace ID for tracking")
+class UpsertExperimentEvaluationResponseBodyData(V1RoutesBaseModel):
+    id: str = Field(description="The ID of the upserted experiment evaluation")
+class UpsertExperimentEvaluationResponseBody(
+    ResponseBody[UpsertExperimentEvaluationResponseBodyData]
+):
+    pass
+@router.post(
+    "/experiment_evaluations",
+    operation_id="upsertExperimentEvaluation",
+    summary="Create or update evaluation for an experiment run",
+    responses=add_errors_to_responses(
+        [{"status_code": HTTP_404_NOT_FOUND, "description": "Experiment run not found"}]
+    ),
+)
+async def upsert_experiment_evaluation(
+    request: Request, request_body: UpsertExperimentEvaluationRequestBody
+) -> UpsertExperimentEvaluationResponseBody:
     payload = await request.json()
     experiment_run_gid = GlobalID.from_id(payload["experiment_run_id"])
     try:
         experiment_run_id = from_global_id_with_expected_type(experiment_run_gid, "ExperimentRun")
     except ValueError:
-        return Response(
-            content=f"ExperimentRun with ID {experiment_run_gid} does not exist",
+        raise HTTPException(
+            detail=f"ExperimentRun with ID {experiment_run_gid} does not exist",
             status_code=HTTP_404_NOT_FOUND,
         )
-    name = payload["name"]
-    annotator_kind = payload["annotator_kind"]
-    result = payload.get("result")
-    label = result.get("label") if result else None
-    score = result.get("score") if result else None
-    explanation = result.get("explanation") if result else None
-    error = payload.get("error")
-    metadata = payload.get("metadata") or {}
+    name = request_body.name
+    annotator_kind = request_body.annotator_kind
+    result = request_body.result
+    label = result.label if result else None
+    score = result.score if result else None
+    explanation = result.explanation if result else None
+    error = request_body.error
+    metadata = request_body.metadata or {}
     start_time = payload["start_time"]
     end_time = payload["end_time"]
     async with request.app.state.db() as session:
@@ -133,4 +108,6 @@ async def upsert_experiment_evaluation(request: Request) -> Response:
             ).returning(models.ExperimentRunAnnotation)
         )
     evaluation_gid = GlobalID("ExperimentEvaluation", str(exp_eval_run.id))
-    return JSONResponse(content={"data": {"id": str(evaluation_gid)}})
+    return UpsertExperimentEvaluationResponseBody(
+        data=UpsertExperimentEvaluationResponseBodyData(id=str(evaluation_gid))
+    )

arize-phoenix 4.14.1__py3-none-any.whl → 4.16.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.14.1py3-none-any.whl → 4.16.0py3-none-any.whl