PyPI - arize-phoenix - Versions diffs - 8.32.1__py3-none-any.whl → 9.0.1__py3-none-any.whl - Mend

arize-phoenix 8.32.1py3-none-any.whl → 9.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (79) hide show

{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/METADATA +5 -5
{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/RECORD +76 -56
phoenix/db/constants.py +1 -0
phoenix/db/facilitator.py +55 -0
phoenix/db/insertion/document_annotation.py +31 -13
phoenix/db/insertion/evaluation.py +15 -3
phoenix/db/insertion/helpers.py +2 -1
phoenix/db/insertion/span_annotation.py +26 -9
phoenix/db/insertion/trace_annotation.py +25 -9
phoenix/db/insertion/types.py +7 -0
phoenix/db/migrations/versions/2f9d1a65945f_annotation_config_migration.py +322 -0
phoenix/db/migrations/versions/8a3764fe7f1a_change_jsonb_to_json_for_prompts.py +76 -0
phoenix/db/migrations/versions/bb8139330879_create_project_trace_retention_policies_table.py +77 -0
phoenix/db/models.py +151 -10
phoenix/db/types/annotation_configs.py +97 -0
phoenix/db/types/db_models.py +41 -0
phoenix/db/types/trace_retention.py +267 -0
phoenix/experiments/functions.py +5 -1
phoenix/server/api/auth.py +9 -0
phoenix/server/api/context.py +5 -0
phoenix/server/api/dataloaders/__init__.py +4 -0
phoenix/server/api/dataloaders/annotation_summaries.py +203 -24
phoenix/server/api/dataloaders/project_ids_by_trace_retention_policy_id.py +42 -0
phoenix/server/api/dataloaders/trace_retention_policy_id_by_project_id.py +34 -0
phoenix/server/api/helpers/annotations.py +9 -0
phoenix/server/api/helpers/prompts/models.py +34 -67
phoenix/server/api/input_types/CreateSpanAnnotationInput.py +9 -0
phoenix/server/api/input_types/CreateTraceAnnotationInput.py +3 -0
phoenix/server/api/input_types/PatchAnnotationInput.py +3 -0
phoenix/server/api/input_types/SpanAnnotationFilter.py +67 -0
phoenix/server/api/mutations/__init__.py +6 -0
phoenix/server/api/mutations/annotation_config_mutations.py +413 -0
phoenix/server/api/mutations/dataset_mutations.py +62 -39
phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +245 -0
phoenix/server/api/mutations/span_annotations_mutations.py +272 -70
phoenix/server/api/mutations/trace_annotations_mutations.py +203 -74
phoenix/server/api/queries.py +86 -0
phoenix/server/api/routers/v1/__init__.py +4 -0
phoenix/server/api/routers/v1/annotation_configs.py +449 -0
phoenix/server/api/routers/v1/annotations.py +161 -0
phoenix/server/api/routers/v1/evaluations.py +6 -0
phoenix/server/api/routers/v1/projects.py +1 -50
phoenix/server/api/routers/v1/spans.py +35 -8
phoenix/server/api/routers/v1/traces.py +22 -13
phoenix/server/api/routers/v1/utils.py +60 -0
phoenix/server/api/types/Annotation.py +7 -0
phoenix/server/api/types/AnnotationConfig.py +124 -0
phoenix/server/api/types/AnnotationSource.py +9 -0
phoenix/server/api/types/AnnotationSummary.py +28 -14
phoenix/server/api/types/AnnotatorKind.py +1 -0
phoenix/server/api/types/CronExpression.py +15 -0
phoenix/server/api/types/Evaluation.py +4 -30
phoenix/server/api/types/Project.py +50 -2
phoenix/server/api/types/ProjectTraceRetentionPolicy.py +110 -0
phoenix/server/api/types/Span.py +78 -0
phoenix/server/api/types/SpanAnnotation.py +24 -0
phoenix/server/api/types/Trace.py +2 -2
phoenix/server/api/types/TraceAnnotation.py +23 -0
phoenix/server/app.py +20 -0
phoenix/server/retention.py +76 -0
phoenix/server/static/.vite/manifest.json +36 -36
phoenix/server/static/assets/components-B2MWTXnm.js +4326 -0
phoenix/server/static/assets/{index-B0CbpsxD.js → index-Bfvpea_-.js} +10 -10
phoenix/server/static/assets/pages-CZ2vKu8H.js +7268 -0
phoenix/server/static/assets/vendor-BRDkBC5J.js +903 -0
phoenix/server/static/assets/{vendor-arizeai-CxXYQNUl.js → vendor-arizeai-BvTqp_W8.js} +3 -3
phoenix/server/static/assets/{vendor-codemirror-B0NIFPOL.js → vendor-codemirror-COt9UfW7.js} +1 -1
phoenix/server/static/assets/{vendor-recharts-CrrDFWK1.js → vendor-recharts-BoHX9Hvs.js} +2 -2
phoenix/server/static/assets/{vendor-shiki-C5bJ-RPf.js → vendor-shiki-Cw1dsDAz.js} +1 -1
phoenix/trace/dsl/filter.py +25 -5
phoenix/utilities/__init__.py +18 -0
phoenix/version.py +1 -1
phoenix/server/static/assets/components-x-gKFJ8C.js +0 -3414
phoenix/server/static/assets/pages-BU4VdyeH.js +0 -5867
phoenix/server/static/assets/vendor-BfhM_F1u.js +0 -902
{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/WHEEL +0 -0
{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/entry_points.txt +0 -0
{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-8.32.1.dist-info → arize_phoenix-9.0.1.dist-info}/licenses/LICENSE +0 -0

phoenix/db/types/trace_retention.py ADDED Viewed

@@ -0,0 +1,267 @@
+from __future__ import annotations
+from datetime import datetime, timedelta, timezone
+from typing import Annotated, Iterable, Literal, Optional, Union
+import sqlalchemy as sa
+from pydantic import AfterValidator, BaseModel, Field, RootModel
+from sqlalchemy.ext.asyncio import AsyncSession
+from phoenix.utilities import hour_of_week
+class _MaxDays(BaseModel):
+    max_days: Annotated[float, Field(ge=0)]
+    @property
+    def max_days_filter(self) -> sa.ColumnElement[bool]:
+        if self.max_days <= 0:
+            return sa.literal(False)
+        from phoenix.db.models import Trace
+        return Trace.start_time < datetime.now(timezone.utc) - timedelta(days=self.max_days)
+class _MaxCount(BaseModel):
+    max_count: Annotated[int, Field(ge=0)]
+    @property
+    def max_count_filter(self) -> sa.ColumnElement[bool]:
+        if self.max_count <= 0:
+            return sa.literal(False)
+        from phoenix.db.models import Trace
+        return Trace.start_time < (
+            sa.select(Trace.start_time)
+            .order_by(Trace.start_time.desc())
+            .offset(self.max_count - 1)
+            .limit(1)
+            .scalar_subquery()
+        )
+class MaxDaysRule(_MaxDays, BaseModel):
+    type: Literal["max_days"] = "max_days"
+    def __bool__(self) -> bool:
+        return self.max_days > 0
+    async def delete_traces(
+        self,
+        session: AsyncSession,
+        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+    ) -> set[int]:
+        if self.max_days <= 0:
+            return set()
+        from phoenix.db.models import Trace
+        stmt = (
+            sa.delete(Trace)
+            .where(Trace.project_rowid.in_(project_rowids))
+            .where(self.max_days_filter)
+            .returning(Trace.project_rowid)
+        )
+        return set(await session.scalars(stmt))
+class MaxCountRule(_MaxCount, BaseModel):
+    type: Literal["max_count"] = "max_count"
+    def __bool__(self) -> bool:
+        return self.max_count > 0
+    async def delete_traces(
+        self,
+        session: AsyncSession,
+        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+    ) -> set[int]:
+        if self.max_count <= 0:
+            return set()
+        from phoenix.db.models import Trace
+        stmt = (
+            sa.delete(Trace)
+            .where(Trace.project_rowid.in_(project_rowids))
+            .where(self.max_count_filter)
+            .returning(Trace.project_rowid)
+        )
+        return set(await session.scalars(stmt))
+class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
+    type: Literal["max_days_or_count"] = "max_days_or_count"
+    def __bool__(self) -> bool:
+        return self.max_days > 0 or self.max_count > 0
+    async def delete_traces(
+        self,
+        session: AsyncSession,
+        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+    ) -> set[int]:
+        if self.max_days <= 0 and self.max_count <= 0:
+            return set()
+        from phoenix.db.models import Trace
+        stmt = (
+            sa.delete(Trace)
+            .where(Trace.project_rowid.in_(project_rowids))
+            .where(sa.or_(self.max_days_filter, self.max_count_filter))
+            .returning(Trace.project_rowid)
+        )
+        return set(await session.scalars(stmt))
+class TraceRetentionRule(RootModel[Union[MaxDaysRule, MaxCountRule, MaxDaysOrCountRule]]):
+    root: Annotated[
+        Union[MaxDaysRule, MaxCountRule, MaxDaysOrCountRule], Field(discriminator="type")
+    ]
+    def __bool__(self) -> bool:
+        return bool(self.root)
+    async def delete_traces(
+        self,
+        session: AsyncSession,
+        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+    ) -> set[int]:
+        return await self.root.delete_traces(session, project_rowids)
+def _time_of_next_run(
+    cron_expression: str,
+    after: Optional[datetime] = None,
+) -> datetime:
+    """
+    Parse a cron expression and calculate the UTC datetime of the next run.
+    Only processes hour, and day of week fields; day-of-month and
+    month fields must be '*'; minute field must be 0.
+    Args:
+        cron_expression (str): Standard cron expression with 5 fields:
+            minute hour day-of-month month day-of-week
+            (minute must be '0'; day-of-month and month must be '*')
+        after: Optional[datetime]: The datetime to start searching from. If None,
+            the current time is used. Must be timezone-aware.
+    Returns:
+        datetime: The datetime of the next run. Timezone is UTC.
+    Raises:
+        ValueError: If the expression has non-wildcard values for day-of-month or month, if the
+            minute field is not '0', or if no match is found within the next 7 days (168 hours).
+    """
+    fields: list[str] = cron_expression.strip().split()
+    if len(fields) != 5:
+        raise ValueError(
+            "Invalid cron expression. Expected 5 fields "
+            "(minute hour day-of-month month day-of-week)."
+        )
+    if fields[0] != "0":
+        raise ValueError("Invalid cron expression. Minute field must be '0'.")
+    if fields[2] != "*" or fields[3] != "*":
+        raise ValueError("Invalid cron expression. Day-of-month and month fields must be '*'.")
+    hours: set[int] = _parse_field(fields[1], 0, 23)
+    # Parse days of week (0-6, where 0 is Sunday)
+    days_of_week: set[int] = _parse_field(fields[4], 0, 6)
+    # Convert to Python's weekday format (0-6, where 0 is Monday)
+    # Sunday (0 in cron) becomes 6 in Python's weekday()
+    python_days_of_week = {(day_of_week + 6) % 7 for day_of_week in days_of_week}
+    t = after.replace(tzinfo=timezone.utc) if after else datetime.now(timezone.utc)
+    t = t.replace(minute=0, second=0, microsecond=0)
+    for _ in range(168):  # Check up to 7 days (168 hours)
+        t += timedelta(hours=1)
+        if t.hour in hours and t.weekday() in python_days_of_week:
+            return t
+    raise ValueError("No matching execution time found within the next 7 days.")
+class TraceRetentionCronExpression(RootModel[str]):
+    root: Annotated[str, AfterValidator(lambda x: (_time_of_next_run(x), x)[1])]
+    def get_hour_of_prev_run(self) -> int:
+        """
+        Calculate the hour of the previous run before now.
+        Returns:
+            int: The hour of the previous run (0-167), where 0 is midnight Sunday UTC.
+        """
+        after = datetime.now(timezone.utc) - timedelta(hours=1)
+        return hour_of_week(_time_of_next_run(self.root, after))
+def _parse_field(field: str, min_val: int, max_val: int) -> set[int]:
+    """
+    Parse a cron field and return the set of matching values.
+    Args:
+        field (str): The cron field to parse
+        min_val (int): Minimum allowed value for this field
+        max_val (int): Maximum allowed value for this field
+    Returns:
+        set[int]: Set of all valid values represented by the field expression
+    Raises:
+        ValueError: If the field contains invalid values or formats
+    """
+    if field == "*":
+        return set(range(min_val, max_val + 1))
+    values: set[int] = set()
+    for part in field.split(","):
+        if "/" in part:
+            # Handle steps
+            range_part, step_str = part.split("/")
+            try:
+                step = int(step_str)
+            except ValueError:
+                raise ValueError(f"Invalid step value: {step_str}")
+            if step <= 0:
+                raise ValueError(f"Step value must be positive: {step}")
+            if range_part == "*":
+                start, end = min_val, max_val
+            elif "-" in range_part:
+                try:
+                    start_str, end_str = range_part.split("-")
+                    start, end = int(start_str), int(end_str)
+                except ValueError:
+                    raise ValueError(f"Invalid range format: {range_part}")
+                if start < min_val or end > max_val:
+                    raise ValueError(
+                        f"Range {start}-{end} outside allowed values ({min_val}-{max_val})"
+                    )
+                if start > end:
+                    raise ValueError(f"Invalid range: {start}-{end} (start > end)")
+            else:
+                try:
+                    start = int(range_part)
+                except ValueError:
+                    raise ValueError(f"Invalid value: {range_part}")
+                if start < min_val or start > max_val:
+                    raise ValueError(f"Value {start} out of range ({min_val}-{max_val})")
+                end = max_val
+            values.update(range(start, end + 1, step))
+        elif "-" in part:
+            # Handle ranges
+            try:
+                start_str, end_str = part.split("-")
+                start, end = int(start_str), int(end_str)
+            except ValueError:
+                raise ValueError(f"Invalid range format: {part}")
+            if start < min_val or end > max_val:
+                raise ValueError(
+                    f"Range {start}-{end} outside allowed values ({min_val}-{max_val})"
+                )
+            if start > end:
+                raise ValueError(f"Invalid range: {start}-{end} (start > end)")
+            values.update(range(start, end + 1))
+        else:
+            # Handle single values
+            try:
+                value = int(part)
+            except ValueError:
+                raise ValueError(f"Invalid value: {part}")
+            if value < min_val or value > max_val:
+                raise ValueError(f"Value {value} out of range ({min_val}-{max_val})")
+            values.add(value)
+    return values

phoenix/experiments/functions.py CHANGED Viewed

@@ -95,6 +95,7 @@ def run_experiment(
     dry_run: Union[bool, int] = False,
     print_summary: bool = True,
     concurrency: int = 3,
+    timeout: Optional[int] = None,
 ) -> RanExperiment:
     """
     Runs an experiment using a given set of dataset of examples.
@@ -148,6 +149,8 @@ def run_experiment(
         concurrency (int): Specifies the concurrency for task execution. In order to enable
             concurrent task execution, the task callable must be a coroutine function.
             Defaults to 3.
+        timeout (Optional[int]): The timeout for the task execution in seconds. Use this to run
+            longer tasks to avoid re-queuing the same task multiple times. Defaults to None.
     Returns:
         RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
@@ -380,6 +383,7 @@ def run_experiment(
         fallback_return_value=None,
         tqdm_bar_format=get_tqdm_progress_bar_formatter("running tasks"),
         concurrency=concurrency,
+        timeout=timeout,
     )
     test_cases = [
@@ -752,7 +756,7 @@ def _print_experiment_error(
     Prints an experiment error.
     """
     display_error = RuntimeError(
-        f"{kind} failed for example id {repr(example_id)}, " f"repetition {repr(repetition_number)}"
+        f"{kind} failed for example id {repr(example_id)}, repetition {repr(repetition_number)}"
     )
     display_error.__cause__ = error
     formatted_exception = "".join(

phoenix/server/api/auth.py CHANGED Viewed

@@ -42,3 +42,12 @@ class IsAdmin(Authorization):
         if not info.context.auth_enabled:
             return False
         return isinstance((user := info.context.user), PhoenixUser) and user.is_admin
+class IsAdminIfAuthEnabled(Authorization):
+    message = MSG_ADMIN_ONLY
+    def has_permission(self, source: Any, info: Info, **kwargs: Any) -> bool:
+        if not info.context.auth_enabled:
+            return True
+        return isinstance((user := info.context.user), PhoenixUser) and user.is_admin

phoenix/server/api/context.py CHANGED Viewed

@@ -32,6 +32,7 @@ from phoenix.server.api.dataloaders import (
     NumChildSpansDataLoader,
     NumSpansPerTraceDataLoader,
     ProjectByNameDataLoader,
+    ProjectIdsByTraceRetentionPolicyIdDataLoader,
     PromptVersionSequenceNumberDataLoader,
     RecordCountDataLoader,
     SessionIODataLoader,
@@ -47,6 +48,7 @@ from phoenix.server.api.dataloaders import (
     TableFieldsDataLoader,
     TokenCountDataLoader,
     TraceByTraceIdsDataLoader,
+    TraceRetentionPolicyIdByProjectIdDataLoader,
     TraceRootSpansDataLoader,
     UserRolesDataLoader,
     UsersDataLoader,
@@ -82,6 +84,7 @@ class DataLoaders:
     num_child_spans: NumChildSpansDataLoader
     num_spans_per_trace: NumSpansPerTraceDataLoader
     project_fields: TableFieldsDataLoader
+    projects_by_trace_retention_policy_id: ProjectIdsByTraceRetentionPolicyIdDataLoader
     prompt_version_sequence_number: PromptVersionSequenceNumberDataLoader
     record_counts: RecordCountDataLoader
     session_first_inputs: SessionIODataLoader
@@ -99,6 +102,8 @@ class DataLoaders:
     token_counts: TokenCountDataLoader
     trace_by_trace_ids: TraceByTraceIdsDataLoader
     trace_fields: TableFieldsDataLoader
+    trace_retention_policy_id_by_project_id: TraceRetentionPolicyIdByProjectIdDataLoader
+    project_trace_retention_policy_fields: TableFieldsDataLoader
     trace_root_spans: TraceRootSpansDataLoader
     project_by_name: ProjectByNameDataLoader
     users: UsersDataLoader

phoenix/server/api/dataloaders/__init__.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .min_start_or_max_end_times import MinStartOrMaxEndTimeCache, MinStartOrMax
 from .num_child_spans import NumChildSpansDataLoader
 from .num_spans_per_trace import NumSpansPerTraceDataLoader
 from .project_by_name import ProjectByNameDataLoader
+from .project_ids_by_trace_retention_policy_id import ProjectIdsByTraceRetentionPolicyIdDataLoader
 from .prompt_version_sequence_number import PromptVersionSequenceNumberDataLoader
 from .record_counts import RecordCountCache, RecordCountDataLoader
 from .session_io import SessionIODataLoader
@@ -35,6 +36,7 @@ from .span_projects import SpanProjectsDataLoader
 from .table_fields import TableFieldsDataLoader
 from .token_counts import TokenCountCache, TokenCountDataLoader
 from .trace_by_trace_ids import TraceByTraceIdsDataLoader
+from .trace_retention_policy_id_by_project_id import TraceRetentionPolicyIdByProjectIdDataLoader
 from .trace_root_spans import TraceRootSpansDataLoader
 from .user_roles import UserRolesDataLoader
 from .users import UsersDataLoader
@@ -57,6 +59,7 @@ __all__ = [
     "MinStartOrMaxEndTimeDataLoader",
     "NumChildSpansDataLoader",
     "NumSpansPerTraceDataLoader",
+    "ProjectIdsByTraceRetentionPolicyIdDataLoader",
     "PromptVersionSequenceNumberDataLoader",
     "RecordCountDataLoader",
     "SessionIODataLoader",
@@ -71,6 +74,7 @@ __all__ = [
     "TableFieldsDataLoader",
     "TokenCountDataLoader",
     "TraceByTraceIdsDataLoader",
+    "TraceRetentionPolicyIdByProjectIdDataLoader",
     "TraceRootSpansDataLoader",
     "ProjectByNameDataLoader",
     "SpanAnnotationsDataLoader",

phoenix/server/api/dataloaders/annotation_summaries.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from collections import defaultdict
 from datetime import datetime
-from typing import Any, Literal, Optional
+from typing import Any, Literal, Optional, Type, Union, cast
 import pandas as pd
 from aioitertools.itertools import groupby
 from cachetools import LFUCache, TTLCache
-from sqlalchemy import Select, func, or_, select
+from sqlalchemy import Select, and_, case, distinct, func, or_, select
 from strawberry.dataloader import AbstractCache, DataLoader
 from typing_extensions import TypeAlias, assert_never
@@ -92,7 +92,7 @@ class AnnotationSummaryDataLoader(DataLoader[Key, Result]):
             async with self._db() as session:
                 data = await session.stream(stmt)
                 async for annotation_name, group in groupby(data, lambda row: row.name):
-                    summary = AnnotationSummary(pd.DataFrame(group))
+                    summary = AnnotationSummary(name=annotation_name, df=pd.DataFrame(group))
                     for position in params[annotation_name]:
                         results[position] = summary
         return results
@@ -103,23 +103,64 @@ def _get_stmt(
     *annotation_names: Param,
 ) -> Select[Any]:
     kind, project_rowid, (start_time, end_time), filter_condition = segment
-    stmt = select()
+    annotation_model: Union[Type[models.SpanAnnotation], Type[models.TraceAnnotation]]
+    entity_model: Union[Type[models.Span], Type[models.Trace]]
+    entity_join_model: Optional[Type[models.Base]]
+    entity_id_column: Any
     if kind == "span":
-        msa = models.SpanAnnotation
-        name_column, label_column, score_column = msa.name, msa.label, msa.score
-        time_column = models.Span.start_time
-        stmt = stmt.join(models.Span).join_from(models.Span, models.Trace)
-        if filter_condition:
-            sf = SpanFilter(filter_condition)
-            stmt = sf(stmt)
+        annotation_model = models.SpanAnnotation
+        entity_model = models.Span
+        entity_join_model = models.Trace
+        entity_id_column = models.Span.id.label("entity_id")
     elif kind == "trace":
-        mta = models.TraceAnnotation
-        name_column, label_column, score_column = mta.name, mta.label, mta.score
-        time_column = models.Trace.start_time
-        stmt = stmt.join(models.Trace)
+        annotation_model = models.TraceAnnotation
+        entity_model = models.Trace
+        entity_join_model = None
+        entity_id_column = models.Trace.id.label("entity_id")
     else:
         assert_never(kind)
-    stmt = stmt.add_columns(
+    name_column = annotation_model.name
+    label_column = annotation_model.label
+    score_column = annotation_model.score
+    time_column = entity_model.start_time
+    # First query: count distinct entities per annotation name
+    # This is used later to calculate accurate fractions that account for entities without labels
+    entity_count_query = select(
+        name_column, func.count(distinct(entity_id_column)).label("entity_count")
+    )
+    if kind == "span":
+        entity_count_query = entity_count_query.join(cast(Type[models.Span], entity_model))
+        entity_count_query = entity_count_query.join_from(
+            cast(Type[models.Span], entity_model), cast(Type[models.Trace], entity_join_model)
+        )
+        entity_count_query = entity_count_query.where(models.Trace.project_rowid == project_rowid)
+    elif kind == "trace":
+        entity_count_query = entity_count_query.join(cast(Type[models.Trace], entity_model))
+        entity_count_query = entity_count_query.where(
+            cast(Type[models.Trace], entity_model).project_rowid == project_rowid
+        )
+    entity_count_query = entity_count_query.where(
+        or_(score_column.is_not(None), label_column.is_not(None))
+    )
+    entity_count_query = entity_count_query.where(name_column.in_(annotation_names))
+    if start_time:
+        entity_count_query = entity_count_query.where(start_time <= time_column)
+    if end_time:
+        entity_count_query = entity_count_query.where(time_column < end_time)
+    entity_count_query = entity_count_query.group_by(name_column)
+    entity_count_subquery = entity_count_query.subquery()
+    # Main query: gets raw annotation data with counts per (span/trace)+name+label
+    base_stmt = select(
+        entity_id_column,
         name_column,
         label_column,
         func.count().label("record_count"),
@@ -127,13 +168,151 @@ def _get_stmt(
         func.count(score_column).label("score_count"),
         func.sum(score_column).label("score_sum"),
     )
-    stmt = stmt.group_by(name_column, label_column)
-    stmt = stmt.order_by(name_column, label_column)
-    stmt = stmt.where(models.Trace.project_rowid == project_rowid)
-    stmt = stmt.where(or_(score_column.is_not(None), label_column.is_not(None)))
-    stmt = stmt.where(name_column.in_(annotation_names))
+    if kind == "span":
+        base_stmt = base_stmt.join(cast(Type[models.Span], entity_model))
+        base_stmt = base_stmt.join_from(
+            cast(Type[models.Span], entity_model), cast(Type[models.Trace], entity_join_model)
+        )
+        base_stmt = base_stmt.where(models.Trace.project_rowid == project_rowid)
+        if filter_condition:
+            sf = SpanFilter(filter_condition)
+            base_stmt = sf(base_stmt)
+    elif kind == "trace":
+        base_stmt = base_stmt.join(cast(Type[models.Trace], entity_model))
+        base_stmt = base_stmt.where(
+            cast(Type[models.Trace], entity_model).project_rowid == project_rowid
+        )
+    else:
+        assert_never(kind)
+    base_stmt = base_stmt.where(or_(score_column.is_not(None), label_column.is_not(None)))
+    base_stmt = base_stmt.where(name_column.in_(annotation_names))
     if start_time:
-        stmt = stmt.where(start_time <= time_column)
+        base_stmt = base_stmt.where(start_time <= time_column)
     if end_time:
-        stmt = stmt.where(time_column < end_time)
-    return stmt
+        base_stmt = base_stmt.where(time_column < end_time)
+    # Group to get one row per (span/trace)+name+label combination
+    base_stmt = base_stmt.group_by(entity_id_column, name_column, label_column)
+    base_subquery = base_stmt.subquery()
+    # Calculate total counts per (span/trace)+name for computing fractions
+    entity_totals = (
+        select(
+            base_subquery.c.entity_id,
+            base_subquery.c.name,
+            func.sum(base_subquery.c.label_count).label("total_label_count"),
+            func.sum(base_subquery.c.score_count).label("total_score_count"),
+            func.sum(base_subquery.c.score_sum).label("entity_score_sum"),
+        )
+        .group_by(base_subquery.c.entity_id, base_subquery.c.name)
+        .subquery()
+    )
+    per_entity_fractions = (
+        select(
+            base_subquery.c.entity_id,
+            base_subquery.c.name,
+            base_subquery.c.label,
+            base_subquery.c.record_count,
+            base_subquery.c.label_count,
+            base_subquery.c.score_count,
+            base_subquery.c.score_sum,
+            # Calculate label fraction, avoiding division by zero when total_label_count is 0
+            case(
+                (
+                    entity_totals.c.total_label_count > 0,
+                    base_subquery.c.label_count * 1.0 / entity_totals.c.total_label_count,
+                ),
+                else_=None,
+            ).label("label_fraction"),
+            # Calculate average score for the entity (if there are any scores)
+            case(
+                (
+                    entity_totals.c.total_score_count > 0,
+                    entity_totals.c.entity_score_sum * 1.0 / entity_totals.c.total_score_count,
+                ),
+                else_=None,
+            ).label("entity_avg_score"),
+        )
+        .join(
+            entity_totals,
+            and_(
+                base_subquery.c.entity_id == entity_totals.c.entity_id,
+                base_subquery.c.name == entity_totals.c.name,
+            ),
+        )
+        .subquery()
+    )
+    # Aggregate metrics across (spans/traces) for each name+label combination.
+    label_entity_metrics = (
+        select(
+            per_entity_fractions.c.name,
+            per_entity_fractions.c.label,
+            func.count(distinct(per_entity_fractions.c.entity_id)).label("entities_with_label"),
+            func.sum(per_entity_fractions.c.label_count).label("total_label_count"),
+            func.sum(per_entity_fractions.c.score_count).label("total_score_count"),
+            func.sum(per_entity_fractions.c.score_sum).label("total_score_sum"),
+            # Average of label fractions for entities that have this label
+            func.avg(per_entity_fractions.c.label_fraction).label("avg_label_fraction_present"),
+            # Average of per-entity average scores (but we handle overall aggregation separately)
+        )
+        .group_by(per_entity_fractions.c.name, per_entity_fractions.c.label)
+        .subquery()
+    )
+    # Compute distinct per-entity average scores to ensure each entity counts only once.
+    distinct_entity_scores = (
+        select(
+            per_entity_fractions.c.entity_id,
+            per_entity_fractions.c.name,
+            per_entity_fractions.c.entity_avg_score,
+        )
+        .distinct()
+        .subquery()
+    )
+    overall_score_aggregates = (
+        select(
+            distinct_entity_scores.c.name,
+            func.avg(distinct_entity_scores.c.entity_avg_score).label("overall_avg_score"),
+        )
+        .group_by(distinct_entity_scores.c.name)
+        .subquery()
+    )
+    # Final result: adjust label fractions by the proportion of entities reporting this label
+    # and include the overall average score per annotation name.
+    final_stmt = (
+        select(
+            label_entity_metrics.c.name,
+            label_entity_metrics.c.label,
+            # Adjust label fraction, guarding against division by zero in entity_count
+            case(
+                (
+                    entity_count_subquery.c.entity_count > 0,
+                    label_entity_metrics.c.avg_label_fraction_present
+                    * label_entity_metrics.c.entities_with_label
+                    / entity_count_subquery.c.entity_count,
+                ),
+                else_=None,
+            ).label("avg_label_fraction"),
+            overall_score_aggregates.c.overall_avg_score.label("avg_score"),  # same for all labels
+            label_entity_metrics.c.total_label_count.label("label_count"),
+            label_entity_metrics.c.total_score_count.label("score_count"),
+            label_entity_metrics.c.total_score_sum.label("score_sum"),
+            label_entity_metrics.c.entities_with_label.label("record_count"),
+        )
+        .join(entity_count_subquery, label_entity_metrics.c.name == entity_count_subquery.c.name)
+        .join(
+            overall_score_aggregates,
+            label_entity_metrics.c.name == overall_score_aggregates.c.name,
+        )
+        .order_by(label_entity_metrics.c.name, label_entity_metrics.c.label)
+    )
+    return final_stmt

phoenix/server/api/dataloaders/project_ids_by_trace_retention_policy_id.py ADDED Viewed

@@ -0,0 +1,42 @@
+from collections import defaultdict
+from sqlalchemy import or_, select
+from strawberry.dataloader import DataLoader
+from typing_extensions import TypeAlias
+from phoenix.db.constants import DEFAULT_PROJECT_TRACE_RETENTION_POLICY_ID
+from phoenix.db.models import Project
+from phoenix.server.types import DbSessionFactory
+PolicyRowId: TypeAlias = int
+ProjectRowId: TypeAlias = int
+Key: TypeAlias = PolicyRowId
+Result: TypeAlias = list[ProjectRowId]
+class ProjectIdsByTraceRetentionPolicyIdDataLoader(DataLoader[Key, Result]):
+    def __init__(self, db: DbSessionFactory) -> None:
+        super().__init__(load_fn=self._load_fn)
+        self._db = db
+    async def _load_fn(self, keys: list[Key]) -> list[Result]:
+        ids = set(keys)
+        stmt = select(Project.trace_retention_policy_id, Project.id)
+        if DEFAULT_PROJECT_TRACE_RETENTION_POLICY_ID in ids:
+            stmt = stmt.where(
+                or_(
+                    Project.trace_retention_policy_id.in_(ids),
+                    Project.trace_retention_policy_id.is_(None),
+                )
+            )
+        else:
+            stmt = stmt.where(Project.trace_retention_policy_id.in_(ids))
+        projects: defaultdict[Key, Result] = defaultdict(list)
+        async with self._db() as session:
+            data = await session.stream(stmt)
+            async for policy_rowid, project_rowid in data:
+                projects[policy_rowid or DEFAULT_PROJECT_TRACE_RETENTION_POLICY_ID].append(
+                    project_rowid
+                )
+        return [projects.get(project_name, []).copy() for project_name in keys]

arize-phoenix 8.32.1__py3-none-any.whl → 9.0.1__py3-none-any.whl

Potentially problematic release.

arize-phoenix 8.32.1py3-none-any.whl → 9.0.1py3-none-any.whl