arize-phoenix 11.7.0__py3-none-any.whl → 11.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (32) hide show
  1. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/METADATA +14 -2
  2. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/RECORD +32 -31
  3. phoenix/config.py +33 -0
  4. phoenix/datetime_utils.py +112 -1
  5. phoenix/db/helpers.py +156 -1
  6. phoenix/server/api/auth.py +28 -6
  7. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +6 -7
  8. phoenix/server/api/exceptions.py +6 -0
  9. phoenix/server/api/input_types/TimeBinConfig.py +23 -0
  10. phoenix/server/api/routers/oauth2.py +19 -2
  11. phoenix/server/api/types/CostBreakdown.py +4 -7
  12. phoenix/server/api/types/Project.py +341 -73
  13. phoenix/server/app.py +7 -3
  14. phoenix/server/authorization.py +27 -2
  15. phoenix/server/cost_tracking/cost_details_calculator.py +22 -16
  16. phoenix/server/daemons/span_cost_calculator.py +2 -8
  17. phoenix/server/email/sender.py +2 -1
  18. phoenix/server/email/templates/db_disk_usage_notification.html +3 -0
  19. phoenix/server/static/.vite/manifest.json +36 -36
  20. phoenix/server/static/assets/{components-J3qjrjBf.js → components-5M9nebi4.js} +344 -263
  21. phoenix/server/static/assets/{index-CEObsQf_.js → index-OU2WTnGN.js} +11 -11
  22. phoenix/server/static/assets/{pages-CW1UdBht.js → pages-DF8rqxJ4.js} +451 -444
  23. phoenix/server/static/assets/{vendor-BnPh9i9e.js → vendor-Bl7CyFDw.js} +147 -147
  24. phoenix/server/static/assets/{vendor-arizeai-Cr9o_Iu_.js → vendor-arizeai-B_viEUUA.js} +18 -480
  25. phoenix/server/static/assets/{vendor-codemirror-k3zCIjlN.js → vendor-codemirror-vlcH1_iR.js} +1 -1
  26. phoenix/server/static/assets/{vendor-recharts-BdblEuGB.js → vendor-recharts-C9cQu72o.js} +25 -25
  27. phoenix/server/static/assets/{vendor-shiki-DPtuv2M4.js → vendor-shiki-BsknB7bv.js} +1 -1
  28. phoenix/version.py +1 -1
  29. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/WHEEL +0 -0
  30. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/entry_points.txt +0 -0
  31. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/IP_NOTICE +0 -0
  32. {arize_phoenix-11.7.0.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
1
1
  from collections import defaultdict
2
2
 
3
3
  from sqlalchemy import func, select
4
- from sqlalchemy.sql.functions import coalesce
5
4
  from strawberry.dataloader import DataLoader
6
5
  from typing_extensions import TypeAlias
7
6
 
@@ -23,12 +22,12 @@ class SpanCostSummaryByExperimentDataLoader(DataLoader[Key, Result]):
23
22
  stmt = (
24
23
  select(
25
24
  models.ExperimentRun.experiment_id,
26
- coalesce(func.sum(models.SpanCost.prompt_cost), 0).label("prompt_cost"),
27
- coalesce(func.sum(models.SpanCost.completion_cost), 0).label("completion_cost"),
28
- coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
29
- coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label("prompt_tokens"),
30
- coalesce(func.sum(models.SpanCost.completion_tokens), 0).label("completion_tokens"),
31
- coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
25
+ func.sum(models.SpanCost.prompt_cost).label("prompt_cost"),
26
+ func.sum(models.SpanCost.completion_cost).label("completion_cost"),
27
+ func.sum(models.SpanCost.total_cost).label("total_cost"),
28
+ func.sum(models.SpanCost.prompt_tokens).label("prompt_tokens"),
29
+ func.sum(models.SpanCost.completion_tokens).label("completion_tokens"),
30
+ func.sum(models.SpanCost.total_tokens).label("total_tokens"),
32
31
  )
33
32
  .select_from(models.ExperimentRun)
34
33
  .join(models.Trace, models.ExperimentRun.trace_id == models.Trace.trace_id)
@@ -27,6 +27,12 @@ class Unauthorized(CustomGraphQLError):
27
27
  """
28
28
 
29
29
 
30
+ class InsufficientStorage(CustomGraphQLError):
31
+ """
32
+ An error raised when the database has insufficient storage to complete a request.
33
+ """
34
+
35
+
30
36
  class Conflict(CustomGraphQLError):
31
37
  """
32
38
  An error raised when a mutation cannot be completed due to a conflict with
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+
3
+ import strawberry
4
+
5
+
6
+ @strawberry.enum
7
+ class TimeBinScale(Enum):
8
+ MINUTE = "minute"
9
+ HOUR = "hour"
10
+ DAY = "day"
11
+ WEEK = "week"
12
+ MONTH = "month"
13
+ YEAR = "year"
14
+
15
+
16
+ @strawberry.input
17
+ class TimeBinConfig:
18
+ scale: TimeBinScale = strawberry.field(
19
+ default=TimeBinScale.HOUR, description="The scale of time bins for aggregation."
20
+ )
21
+ utc_offset_minutes: int = strawberry.field(
22
+ default=0, description="Offset in minutes from UTC for local time binning."
23
+ )
@@ -169,7 +169,11 @@ async def create_tokens(
169
169
  error=f"OAuth2 IDP {idp_name} does not appear to support OpenID Connect.",
170
170
  )
171
171
  user_info = await oauth2_client.parse_id_token(token_data, nonce=stored_nonce)
172
- user_info = _parse_user_info(user_info)
172
+ try:
173
+ user_info = _parse_user_info(user_info)
174
+ except MissingEmailScope as error:
175
+ return _redirect_to_login(request=request, error=str(error))
176
+
173
177
  try:
174
178
  async with request.app.state.db() as session:
175
179
  user = await _process_oauth2_user(
@@ -237,7 +241,12 @@ def _parse_user_info(user_info: dict[str, Any]) -> UserInfo:
237
241
  """
238
242
  assert isinstance(subject := user_info.get("sub"), (str, int))
239
243
  idp_user_id = str(subject)
240
- assert isinstance(email := user_info.get("email"), str)
244
+ email = user_info.get("email")
245
+ if not isinstance(email, str):
246
+ raise MissingEmailScope(
247
+ "Please ensure your OIDC provider is configured to use the 'email' scope."
248
+ )
249
+
241
250
  assert isinstance(username := user_info.get("name"), str) or username is None
242
251
  assert (
243
252
  isinstance(profile_picture_url := user_info.get("picture"), str)
@@ -541,6 +550,14 @@ class NotInvited(Exception):
541
550
  pass
542
551
 
543
552
 
553
+ class MissingEmailScope(Exception):
554
+ """
555
+ Raised when the OIDC provider does not return the email scope.
556
+ """
557
+
558
+ pass
559
+
560
+
544
561
  def _redirect_to_login(*, request: Request, error: str) -> RedirectResponse:
545
562
  """
546
563
  Creates a RedirectResponse to the login page to display an error message.
@@ -5,11 +5,8 @@ import strawberry
5
5
 
6
6
  @strawberry.type
7
7
  class CostBreakdown:
8
- tokens: Optional[float] = None
8
+ tokens: Optional[float] = strawberry.field(
9
+ default=None,
10
+ description="Total number of tokens, including tokens for which no cost was computed.",
11
+ )
9
12
  cost: Optional[float] = None
10
-
11
- @strawberry.field
12
- def cost_per_token(self) -> Optional[float]:
13
- if self.tokens and self.cost:
14
- return self.cost / self.tokens
15
- return None
@@ -1,30 +1,32 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import operator
4
- from datetime import datetime, timedelta
5
- from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Optional
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal, Optional, cast
6
6
 
7
7
  import strawberry
8
- from aioitertools.itertools import islice
8
+ from aioitertools.itertools import groupby, islice
9
9
  from openinference.semconv.trace import SpanAttributes
10
- from sqlalchemy import desc, distinct, func, or_, select
10
+ from sqlalchemy import and_, desc, distinct, exists, func, or_, select
11
11
  from sqlalchemy.dialects import postgresql, sqlite
12
12
  from sqlalchemy.sql.elements import ColumnElement
13
13
  from sqlalchemy.sql.expression import tuple_
14
14
  from strawberry import ID, UNSET, Private, lazy
15
- from strawberry.relay import Connection, Node, NodeID
15
+ from strawberry.relay import Connection, Edge, Node, NodeID, PageInfo
16
16
  from strawberry.types import Info
17
17
  from typing_extensions import assert_never
18
18
 
19
- from phoenix.datetime_utils import right_open_time_range
19
+ from phoenix.datetime_utils import get_timestamp_range, normalize_datetime, right_open_time_range
20
20
  from phoenix.db import models
21
- from phoenix.db.helpers import SupportedSQLDialect
21
+ from phoenix.db.helpers import SupportedSQLDialect, date_trunc
22
22
  from phoenix.server.api.context import Context
23
+ from phoenix.server.api.exceptions import BadRequest
23
24
  from phoenix.server.api.input_types.ProjectSessionSort import (
24
25
  ProjectSessionColumn,
25
26
  ProjectSessionSort,
26
27
  )
27
- from phoenix.server.api.input_types.SpanSort import SpanSort, SpanSortConfig
28
+ from phoenix.server.api.input_types.SpanSort import SpanColumn, SpanSort, SpanSortConfig
29
+ from phoenix.server.api.input_types.TimeBinConfig import TimeBinConfig, TimeBinScale
28
30
  from phoenix.server.api.input_types.TimeRange import TimeRange
29
31
  from phoenix.server.api.types.AnnotationConfig import AnnotationConfig, to_gql_annotation_config
30
32
  from phoenix.server.api.types.AnnotationSummary import AnnotationSummary
@@ -34,6 +36,7 @@ from phoenix.server.api.types.pagination import (
34
36
  ConnectionArgs,
35
37
  Cursor,
36
38
  CursorSortColumn,
39
+ CursorSortColumnDataType,
37
40
  CursorString,
38
41
  connection_from_cursors_and_nodes,
39
42
  connection_from_list,
@@ -45,6 +48,7 @@ from phoenix.server.api.types.SpanCostSummary import SpanCostSummary
45
48
  from phoenix.server.api.types.TimeSeries import TimeSeries, TimeSeriesDataPoint
46
49
  from phoenix.server.api.types.Trace import Trace
47
50
  from phoenix.server.api.types.ValidationResult import ValidationResult
51
+ from phoenix.server.types import DbSessionFactory
48
52
  from phoenix.trace.dsl import SpanFilter
49
53
 
50
54
  DEFAULT_PAGE_SIZE = 30
@@ -262,6 +266,16 @@ class Project(Node):
262
266
  filter_condition: Optional[str] = UNSET,
263
267
  orphan_span_as_root_span: Optional[bool] = True,
264
268
  ) -> Connection[Span]:
269
+ if root_spans_only and not filter_condition and sort and sort.col is SpanColumn.startTime:
270
+ return await _paginate_span_by_trace_start_time(
271
+ db=info.context.db,
272
+ project_rowid=self.project_rowid,
273
+ time_range=time_range,
274
+ first=first,
275
+ after=after,
276
+ sort=sort,
277
+ orphan_span_as_root_span=orphan_span_as_root_span,
278
+ )
265
279
  stmt = (
266
280
  select(models.Span.id)
267
281
  .select_from(models.Span)
@@ -698,83 +712,134 @@ class Project(Node):
698
712
  )
699
713
  return updated_at
700
714
 
701
- @strawberry.field(
702
- description="Hourly span count for the project.",
703
- ) # type: ignore
715
+ @strawberry.field
704
716
  async def span_count_time_series(
705
717
  self,
706
718
  info: Info[Context, None],
707
- time_range: Optional[TimeRange] = UNSET,
719
+ time_range: TimeRange,
720
+ time_bin_config: Optional[TimeBinConfig] = UNSET,
708
721
  ) -> SpanCountTimeSeries:
709
- """Returns a time series of span counts grouped by hour for the project.
722
+ if time_range.start is None:
723
+ raise BadRequest("Start time is required")
710
724
 
711
- This field provides hourly aggregated span counts, which can be useful for
712
- visualizing span activity over time. The data points represent the number
713
- of spans that started in each hour.
725
+ dialect = info.context.db.dialect
726
+ utc_offset_minutes = 0
727
+ field: Literal["minute", "hour", "day", "week", "month", "year"] = "hour"
728
+ if time_bin_config:
729
+ utc_offset_minutes = time_bin_config.utc_offset_minutes
730
+ if time_bin_config.scale is TimeBinScale.MINUTE:
731
+ field = "minute"
732
+ elif time_bin_config.scale is TimeBinScale.HOUR:
733
+ field = "hour"
734
+ elif time_bin_config.scale is TimeBinScale.DAY:
735
+ field = "day"
736
+ elif time_bin_config.scale is TimeBinScale.WEEK:
737
+ field = "week"
738
+ elif time_bin_config.scale is TimeBinScale.MONTH:
739
+ field = "month"
740
+ elif time_bin_config.scale is TimeBinScale.YEAR:
741
+ field = "year"
742
+ bucket = date_trunc(dialect, field, models.Span.start_time, utc_offset_minutes)
743
+ stmt = (
744
+ select(bucket, func.count(models.Span.id))
745
+ .join_from(models.Span, models.Trace)
746
+ .where(models.Trace.project_rowid == self.project_rowid)
747
+ .group_by(bucket)
748
+ .order_by(bucket)
749
+ )
750
+ if time_range.start:
751
+ stmt = stmt.where(time_range.start <= models.Span.start_time)
752
+ if time_range.end:
753
+ stmt = stmt.where(models.Span.start_time < time_range.end)
714
754
 
715
- Args:
716
- info: The GraphQL info object containing context information.
717
- time_range: Optional time range to filter the spans. If provided, only
718
- spans that started within this range will be counted.
755
+ data = {}
756
+ async with info.context.db() as session:
757
+ async for t, v in await session.stream(stmt):
758
+ timestamp = _as_datetime(t)
759
+ data[timestamp] = TimeSeriesDataPoint(timestamp=timestamp, value=v)
719
760
 
720
- Returns:
721
- A SpanCountTimeSeries object containing data points with timestamps
722
- (rounded to the nearest hour) and corresponding span counts.
723
-
724
- Notes:
725
- - The timestamps are rounded down to the nearest hour.
726
- - If a time range is provided, the start time is rounded down to the
727
- nearest hour, and the end time is rounded up to the nearest hour.
728
- - The SQL query is optimized for both PostgreSQL and SQLite databases.
729
- """
730
- # Determine the appropriate SQL function to truncate timestamps to hours
731
- # based on the database dialect
732
- if info.context.db.dialect is SupportedSQLDialect.POSTGRESQL:
733
- # PostgreSQL uses date_trunc for timestamp truncation
734
- hour = func.date_trunc("hour", models.Span.start_time)
735
- elif info.context.db.dialect is SupportedSQLDialect.SQLITE:
736
- # SQLite uses strftime for timestamp formatting
737
- hour = func.strftime("%Y-%m-%dT%H:00:00.000+00:00", models.Span.start_time)
738
- else:
739
- assert_never(info.context.db.dialect)
761
+ data_timestamps: list[datetime] = [data_point.timestamp for data_point in data.values()]
762
+ min_time = min([*data_timestamps, time_range.start])
763
+ max_time = max(
764
+ [
765
+ *data_timestamps,
766
+ *([time_range.end] if time_range.end else []),
767
+ ],
768
+ default=datetime.now(timezone.utc),
769
+ )
770
+ for timestamp in get_timestamp_range(
771
+ start_time=min_time,
772
+ end_time=max_time,
773
+ stride=field,
774
+ utc_offset_minutes=utc_offset_minutes,
775
+ ):
776
+ if timestamp not in data:
777
+ data[timestamp] = TimeSeriesDataPoint(timestamp=timestamp)
778
+ return SpanCountTimeSeries(data=sorted(data.values(), key=lambda x: x.timestamp))
740
779
 
741
- # Build the base query to count spans grouped by hour
780
+ @strawberry.field
781
+ async def trace_count_time_series(
782
+ self,
783
+ info: Info[Context, None],
784
+ time_range: TimeRange,
785
+ time_bin_config: Optional[TimeBinConfig] = UNSET,
786
+ ) -> TraceCountTimeSeries:
787
+ if time_range.start is None:
788
+ raise BadRequest("Start time is required")
789
+
790
+ dialect = info.context.db.dialect
791
+ utc_offset_minutes = 0
792
+ field: Literal["minute", "hour", "day", "week", "month", "year"] = "hour"
793
+ if time_bin_config:
794
+ utc_offset_minutes = time_bin_config.utc_offset_minutes
795
+ if time_bin_config.scale is TimeBinScale.MINUTE:
796
+ field = "minute"
797
+ elif time_bin_config.scale is TimeBinScale.HOUR:
798
+ field = "hour"
799
+ elif time_bin_config.scale is TimeBinScale.DAY:
800
+ field = "day"
801
+ elif time_bin_config.scale is TimeBinScale.WEEK:
802
+ field = "week"
803
+ elif time_bin_config.scale is TimeBinScale.MONTH:
804
+ field = "month"
805
+ elif time_bin_config.scale is TimeBinScale.YEAR:
806
+ field = "year"
807
+ bucket = date_trunc(dialect, field, models.Trace.start_time, utc_offset_minutes)
742
808
  stmt = (
743
- select(hour, func.count())
744
- .join(models.Trace)
809
+ select(bucket, func.count(models.Trace.id))
745
810
  .where(models.Trace.project_rowid == self.project_rowid)
746
- .group_by(hour)
747
- .order_by(hour)
811
+ .group_by(bucket)
812
+ .order_by(bucket)
748
813
  )
749
-
750
- # Apply time range filtering if provided
751
814
  if time_range:
752
- if t := time_range.start:
753
- # Round down to nearest hour for the start time
754
- start = t.replace(minute=0, second=0, microsecond=0)
755
- stmt = stmt.where(start <= models.Span.start_time)
756
- if t := time_range.end:
757
- # Round up to nearest hour for the end time
758
- # If the time is already at the start of an hour, use it as is
759
- if t.minute == 0 and t.second == 0 and t.microsecond == 0:
760
- end = t
761
- else:
762
- # Otherwise, round up to the next hour
763
- end = t.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
764
- stmt = stmt.where(models.Span.start_time < end)
765
-
766
- # Execute the query and convert the results to a time series
815
+ if time_range.start:
816
+ stmt = stmt.where(time_range.start <= models.Trace.start_time)
817
+ if time_range.end:
818
+ stmt = stmt.where(models.Trace.start_time < time_range.end)
819
+ data = {}
767
820
  async with info.context.db() as session:
768
- data = await session.stream(stmt)
769
- return SpanCountTimeSeries(
770
- data=[
771
- TimeSeriesDataPoint(
772
- timestamp=_as_datetime(t),
773
- value=v,
774
- )
775
- async for t, v in data
776
- ]
777
- )
821
+ async for t, v in await session.stream(stmt):
822
+ timestamp = _as_datetime(t)
823
+ data[timestamp] = TimeSeriesDataPoint(timestamp=timestamp, value=v)
824
+
825
+ data_timestamps: list[datetime] = [data_point.timestamp for data_point in data.values()]
826
+ min_time = min([*data_timestamps, time_range.start])
827
+ max_time = max(
828
+ [
829
+ *data_timestamps,
830
+ *([time_range.end] if time_range.end else []),
831
+ ],
832
+ default=datetime.now(timezone.utc),
833
+ )
834
+ for timestamp in get_timestamp_range(
835
+ start_time=min_time,
836
+ end_time=max_time,
837
+ stride=field,
838
+ utc_offset_minutes=utc_offset_minutes,
839
+ ):
840
+ if timestamp not in data:
841
+ data[timestamp] = TimeSeriesDataPoint(timestamp=timestamp)
842
+ return TraceCountTimeSeries(data=sorted(data.values(), key=lambda x: x.timestamp))
778
843
 
779
844
 
780
845
  @strawberry.type
@@ -782,6 +847,11 @@ class SpanCountTimeSeries(TimeSeries):
782
847
  """A time series of span count"""
783
848
 
784
849
 
850
+ @strawberry.type
851
+ class TraceCountTimeSeries(TimeSeries):
852
+ """A time series of trace count"""
853
+
854
+
785
855
  INPUT_VALUE = SpanAttributes.INPUT_VALUE.split(".")
786
856
  OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE.split(".")
787
857
 
@@ -790,5 +860,203 @@ def _as_datetime(value: Any) -> datetime:
790
860
  if isinstance(value, datetime):
791
861
  return value
792
862
  if isinstance(value, str):
793
- return datetime.fromisoformat(value)
863
+ return cast(datetime, normalize_datetime(datetime.fromisoformat(value), timezone.utc))
794
864
  raise ValueError(f"Cannot convert {value} to datetime")
865
+
866
+
867
+ async def _paginate_span_by_trace_start_time(
868
+ db: DbSessionFactory,
869
+ project_rowid: int,
870
+ time_range: Optional[TimeRange] = None,
871
+ first: Optional[int] = DEFAULT_PAGE_SIZE,
872
+ after: Optional[CursorString] = None,
873
+ sort: SpanSort = SpanSort(col=SpanColumn.startTime, dir=SortDir.desc),
874
+ orphan_span_as_root_span: Optional[bool] = True,
875
+ retries: int = 3,
876
+ ) -> Connection[Span]:
877
+ """Return one representative root span per trace, ordered by trace start time.
878
+
879
+ **Note**: Despite the function name, cursors are based on trace rowids, not span rowids.
880
+ This is because we paginate by traces (one span per trace), not individual spans.
881
+
882
+ **Important**: The edges list can be empty while has_next_page=True. This happens
883
+ when traces exist but have no matching root spans. Pagination continues because there
884
+ may be more traces ahead with spans.
885
+
886
+ Args:
887
+ db: Database session factory.
888
+ project_rowid: Project ID to query spans from.
889
+ time_range: Optional time range filter on trace start times.
890
+ first: Maximum number of edges to return (default: DEFAULT_PAGE_SIZE).
891
+ after: Cursor for pagination (points to trace position, not span).
892
+ sort: Sort by trace start time (asc/desc only).
893
+ orphan_span_as_root_span: Whether to include orphan spans as root spans.
894
+ True: spans with parent_id=NULL OR pointing to non-existent spans.
895
+ False: only spans with parent_id=NULL.
896
+ retries: Maximum number of retry attempts when insufficient edges are found.
897
+ When traces exist but lack root spans, the function retries pagination
898
+ to find traces with spans. Set to 0 to disable retries.
899
+
900
+ Returns:
901
+ Connection[Span] with:
902
+ - edges: At most one Edge per trace (may be empty list).
903
+ - page_info: Pagination info based on trace positions.
904
+
905
+ Key Points:
906
+ - Traces without root spans produce NO edges
907
+ - Spans ordered by trace start time, not span start time
908
+ - Cursors track trace positions for efficient large-scale pagination
909
+ """
910
+ # Build base trace query ordered by start time
911
+ traces = select(
912
+ models.Trace.id,
913
+ models.Trace.start_time,
914
+ ).where(models.Trace.project_rowid == project_rowid)
915
+ if sort.dir is SortDir.desc:
916
+ traces = traces.order_by(
917
+ models.Trace.start_time.desc(),
918
+ models.Trace.id.desc(),
919
+ )
920
+ else:
921
+ traces = traces.order_by(
922
+ models.Trace.start_time.asc(),
923
+ models.Trace.id.asc(),
924
+ )
925
+
926
+ # Apply time range filters
927
+ if time_range:
928
+ if time_range.start:
929
+ traces = traces.where(time_range.start <= models.Trace.start_time)
930
+ if time_range.end:
931
+ traces = traces.where(models.Trace.start_time < time_range.end)
932
+
933
+ # Apply cursor pagination
934
+ if after:
935
+ cursor = Cursor.from_string(after)
936
+ assert cursor.sort_column
937
+ compare = operator.lt if sort.dir is SortDir.desc else operator.gt
938
+ traces = traces.where(
939
+ compare(
940
+ tuple_(models.Trace.start_time, models.Trace.id),
941
+ (cursor.sort_column.value, cursor.rowid),
942
+ )
943
+ )
944
+
945
+ # Limit for pagination
946
+ if first:
947
+ traces = traces.limit(
948
+ first + 1 # over-fetch by one to determine whether there's a next page
949
+ )
950
+ traces_cte = traces.cte()
951
+
952
+ # Define join condition for root spans
953
+ if orphan_span_as_root_span:
954
+ # Include both NULL parent_id and orphaned spans
955
+ parent_spans = select(models.Span.span_id).alias("parent_spans")
956
+ onclause = and_(
957
+ models.Span.trace_rowid == traces_cte.c.id,
958
+ or_(
959
+ models.Span.parent_id.is_(None),
960
+ ~exists().where(models.Span.parent_id == parent_spans.c.span_id),
961
+ ),
962
+ )
963
+ else:
964
+ # Only spans with no parent (parent_id is NULL, excludes orphaned spans)
965
+ onclause = and_(
966
+ models.Span.trace_rowid == traces_cte.c.id,
967
+ models.Span.parent_id.is_(None),
968
+ )
969
+
970
+ # Join traces with root spans (left join allows traces without spans)
971
+ stmt = select(
972
+ traces_cte.c.id,
973
+ traces_cte.c.start_time,
974
+ models.Span.id,
975
+ ).join_from(
976
+ traces_cte,
977
+ models.Span,
978
+ onclause=onclause,
979
+ isouter=True,
980
+ )
981
+
982
+ # Order by trace time, then pick earliest span per trace
983
+ if sort.dir is SortDir.desc:
984
+ stmt = stmt.order_by(
985
+ traces_cte.c.start_time.desc(),
986
+ traces_cte.c.id.desc(),
987
+ models.Span.start_time.asc(), # earliest span
988
+ models.Span.id.desc(),
989
+ )
990
+ else:
991
+ stmt = stmt.order_by(
992
+ traces_cte.c.start_time.asc(),
993
+ traces_cte.c.id.asc(),
994
+ models.Span.start_time.asc(), # earliest span
995
+ models.Span.id.desc(),
996
+ )
997
+
998
+ # Use DISTINCT for PostgreSQL, manual grouping for SQLite
999
+ if db.dialect is SupportedSQLDialect.POSTGRESQL:
1000
+ stmt = stmt.distinct(traces_cte.c.start_time, traces_cte.c.id)
1001
+ elif db.dialect is SupportedSQLDialect.SQLITE:
1002
+ # too complicated for SQLite, so we rely on groupby() below
1003
+ pass
1004
+ else:
1005
+ assert_never(db.dialect)
1006
+
1007
+ # Process results and build edges
1008
+ edges: list[Edge[Span]] = []
1009
+ start_cursor: Optional[str] = None
1010
+ end_cursor: Optional[str] = None
1011
+ async with db() as session:
1012
+ records = groupby(await session.stream(stmt), key=lambda record: record[:2])
1013
+ async for (trace_rowid, trace_start_time), group in islice(records, first):
1014
+ cursor = Cursor(
1015
+ rowid=trace_rowid,
1016
+ sort_column=CursorSortColumn(
1017
+ type=CursorSortColumnDataType.DATETIME,
1018
+ value=trace_start_time,
1019
+ ),
1020
+ )
1021
+ if start_cursor is None:
1022
+ start_cursor = str(cursor)
1023
+ end_cursor = str(cursor)
1024
+ first_record = group[0]
1025
+ # Only create edge if trace has a root span
1026
+ if (span_rowid := first_record[2]) is not None:
1027
+ edges.append(Edge(node=Span(span_rowid=span_rowid), cursor=str(cursor)))
1028
+ has_next_page = True
1029
+ try:
1030
+ await records.__anext__()
1031
+ except StopAsyncIteration:
1032
+ has_next_page = False
1033
+
1034
+ # Retry if we need more edges and more traces exist
1035
+ if first and len(edges) < first and has_next_page:
1036
+ while retries and (num_needed := first - len(edges)) and has_next_page:
1037
+ retries -= 1
1038
+ batch_size = max(first, 1000)
1039
+ more = await _paginate_span_by_trace_start_time(
1040
+ db=db,
1041
+ project_rowid=project_rowid,
1042
+ time_range=time_range,
1043
+ first=batch_size,
1044
+ after=end_cursor,
1045
+ sort=sort,
1046
+ orphan_span_as_root_span=orphan_span_as_root_span,
1047
+ retries=0,
1048
+ )
1049
+ edges.extend(more.edges[:num_needed])
1050
+ start_cursor = start_cursor or more.page_info.start_cursor
1051
+ end_cursor = more.page_info.end_cursor if len(edges) < first else edges[-1].cursor
1052
+ has_next_page = len(more.edges) > num_needed or more.page_info.has_next_page
1053
+
1054
+ return Connection(
1055
+ edges=edges,
1056
+ page_info=PageInfo(
1057
+ start_cursor=start_cursor,
1058
+ end_cursor=end_cursor,
1059
+ has_previous_page=False,
1060
+ has_next_page=has_next_page,
1061
+ ),
1062
+ )
phoenix/server/app.py CHANGED
@@ -62,6 +62,7 @@ from phoenix.config import (
62
62
  get_env_host,
63
63
  get_env_host_root_path,
64
64
  get_env_port,
65
+ get_env_support_email,
65
66
  server_instrumentation_is_enabled,
66
67
  verify_server_environment_variables,
67
68
  )
@@ -850,10 +851,13 @@ class DbDiskUsageInterceptor(AsyncServerInterceptor):
850
851
  method_name.endswith("trace.v1.TraceService/Export")
851
852
  and self._db.should_not_insert_or_update
852
853
  ):
853
- await context.abort(
854
- grpc.StatusCode.RESOURCE_EXHAUSTED,
855
- "Database disk usage threshold exceeded",
854
+ details = (
855
+ "Database operations are disabled due to insufficient storage. "
856
+ "Please delete old data or increase storage."
856
857
  )
858
+ if support_email := get_env_support_email():
859
+ details += f" Need help? Contact us at {support_email}"
860
+ await context.abort(grpc.StatusCode.RESOURCE_EXHAUSTED, details)
857
861
  return await method(request_or_iterator, context)
858
862
 
859
863