arize-phoenix 3.25.0__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (113) hide show
  1. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/METADATA +26 -4
  2. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/RECORD +80 -75
  3. phoenix/__init__.py +9 -5
  4. phoenix/config.py +109 -53
  5. phoenix/datetime_utils.py +18 -1
  6. phoenix/db/README.md +25 -0
  7. phoenix/db/__init__.py +4 -0
  8. phoenix/db/alembic.ini +119 -0
  9. phoenix/db/bulk_inserter.py +206 -0
  10. phoenix/db/engines.py +152 -0
  11. phoenix/db/helpers.py +47 -0
  12. phoenix/db/insertion/evaluation.py +209 -0
  13. phoenix/db/insertion/helpers.py +54 -0
  14. phoenix/db/insertion/span.py +142 -0
  15. phoenix/db/migrate.py +71 -0
  16. phoenix/db/migrations/env.py +121 -0
  17. phoenix/db/migrations/script.py.mako +26 -0
  18. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  19. phoenix/db/models.py +371 -0
  20. phoenix/exceptions.py +5 -1
  21. phoenix/server/api/context.py +40 -3
  22. phoenix/server/api/dataloaders/__init__.py +97 -0
  23. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  24. phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
  25. phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
  26. phoenix/server/api/dataloaders/document_evaluations.py +37 -0
  27. phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
  28. phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
  29. phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
  30. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
  31. phoenix/server/api/dataloaders/record_counts.py +125 -0
  32. phoenix/server/api/dataloaders/span_descendants.py +64 -0
  33. phoenix/server/api/dataloaders/span_evaluations.py +37 -0
  34. phoenix/server/api/dataloaders/token_counts.py +138 -0
  35. phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
  36. phoenix/server/api/input_types/SpanSort.py +138 -68
  37. phoenix/server/api/routers/v1/__init__.py +11 -0
  38. phoenix/server/api/routers/v1/evaluations.py +275 -0
  39. phoenix/server/api/routers/v1/spans.py +126 -0
  40. phoenix/server/api/routers/v1/traces.py +82 -0
  41. phoenix/server/api/schema.py +112 -48
  42. phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
  43. phoenix/server/api/types/Evaluation.py +29 -12
  44. phoenix/server/api/types/EvaluationSummary.py +29 -44
  45. phoenix/server/api/types/MimeType.py +2 -2
  46. phoenix/server/api/types/Model.py +9 -9
  47. phoenix/server/api/types/Project.py +240 -171
  48. phoenix/server/api/types/Span.py +87 -131
  49. phoenix/server/api/types/Trace.py +29 -20
  50. phoenix/server/api/types/pagination.py +151 -10
  51. phoenix/server/app.py +263 -35
  52. phoenix/server/grpc_server.py +93 -0
  53. phoenix/server/main.py +75 -60
  54. phoenix/server/openapi/docs.py +218 -0
  55. phoenix/server/prometheus.py +23 -7
  56. phoenix/server/static/index.js +662 -643
  57. phoenix/server/telemetry.py +68 -0
  58. phoenix/services.py +4 -0
  59. phoenix/session/client.py +34 -30
  60. phoenix/session/data_extractor.py +8 -3
  61. phoenix/session/session.py +176 -155
  62. phoenix/settings.py +13 -0
  63. phoenix/trace/attributes.py +349 -0
  64. phoenix/trace/dsl/README.md +116 -0
  65. phoenix/trace/dsl/filter.py +660 -192
  66. phoenix/trace/dsl/helpers.py +24 -5
  67. phoenix/trace/dsl/query.py +562 -185
  68. phoenix/trace/fixtures.py +69 -7
  69. phoenix/trace/otel.py +33 -199
  70. phoenix/trace/schemas.py +14 -8
  71. phoenix/trace/span_evaluations.py +5 -2
  72. phoenix/utilities/__init__.py +0 -26
  73. phoenix/utilities/span_store.py +0 -23
  74. phoenix/version.py +1 -1
  75. phoenix/core/project.py +0 -773
  76. phoenix/core/traces.py +0 -96
  77. phoenix/datasets/dataset.py +0 -214
  78. phoenix/datasets/fixtures.py +0 -24
  79. phoenix/datasets/schema.py +0 -31
  80. phoenix/experimental/evals/__init__.py +0 -73
  81. phoenix/experimental/evals/evaluators.py +0 -413
  82. phoenix/experimental/evals/functions/__init__.py +0 -4
  83. phoenix/experimental/evals/functions/classify.py +0 -453
  84. phoenix/experimental/evals/functions/executor.py +0 -353
  85. phoenix/experimental/evals/functions/generate.py +0 -138
  86. phoenix/experimental/evals/functions/processing.py +0 -76
  87. phoenix/experimental/evals/models/__init__.py +0 -14
  88. phoenix/experimental/evals/models/anthropic.py +0 -175
  89. phoenix/experimental/evals/models/base.py +0 -170
  90. phoenix/experimental/evals/models/bedrock.py +0 -221
  91. phoenix/experimental/evals/models/litellm.py +0 -134
  92. phoenix/experimental/evals/models/openai.py +0 -453
  93. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  94. phoenix/experimental/evals/models/vertex.py +0 -173
  95. phoenix/experimental/evals/models/vertexai.py +0 -186
  96. phoenix/experimental/evals/retrievals.py +0 -96
  97. phoenix/experimental/evals/templates/__init__.py +0 -50
  98. phoenix/experimental/evals/templates/default_templates.py +0 -472
  99. phoenix/experimental/evals/templates/template.py +0 -195
  100. phoenix/experimental/evals/utils/__init__.py +0 -172
  101. phoenix/experimental/evals/utils/threads.py +0 -27
  102. phoenix/server/api/routers/evaluation_handler.py +0 -110
  103. phoenix/server/api/routers/span_handler.py +0 -70
  104. phoenix/server/api/routers/trace_handler.py +0 -60
  105. phoenix/storage/span_store/__init__.py +0 -23
  106. phoenix/storage/span_store/text_file.py +0 -85
  107. phoenix/trace/dsl/missing.py +0 -60
  108. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/WHEEL +0 -0
  109. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/IP_NOTICE +0 -0
  110. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/LICENSE +0 -0
  111. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  112. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  113. /phoenix/{storage → server/openapi}/__init__.py +0 -0
@@ -1,30 +1,88 @@
1
- from enum import Enum
2
- from functools import partial
3
- from typing import Any, Iterable, Iterator, Optional, Protocol
1
+ from dataclasses import dataclass
2
+ from enum import Enum, auto
3
+ from typing import Any, Optional, Protocol
4
4
 
5
- import pandas as pd
6
5
  import strawberry
7
6
  from openinference.semconv.trace import SpanAttributes
7
+ from sqlalchemy import and_, desc, nulls_last
8
+ from sqlalchemy.orm import InstrumentedAttribute
9
+ from sqlalchemy.sql.expression import Select
8
10
  from strawberry import UNSET
9
11
  from typing_extensions import assert_never
10
12
 
11
13
  import phoenix.trace.v1 as pb
12
- from phoenix.core.project import WrappedSpan
14
+ from phoenix.db import models
15
+ from phoenix.server.api.types.pagination import CursorSortColumnDataType
13
16
  from phoenix.server.api.types.SortDir import SortDir
14
- from phoenix.trace.schemas import ComputedAttributes, SpanID
17
+ from phoenix.trace.schemas import SpanID
18
+
19
+ LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT.split(".")
20
+ LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION.split(".")
21
+ LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL.split(".")
15
22
 
16
23
 
17
24
  @strawberry.enum
18
25
  class SpanColumn(Enum):
19
- startTime = "start_time"
20
- endTime = "end_time"
21
- latencyMs = ComputedAttributes.LATENCY_MS
22
- tokenCountTotal = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
23
- tokenCountPrompt = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
24
- tokenCountCompletion = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
25
- cumulativeTokenCountTotal = ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_TOTAL
26
- cumulativeTokenCountPrompt = ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_PROMPT
27
- cumulativeTokenCountCompletion = ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION
26
+ startTime = auto()
27
+ endTime = auto()
28
+ latencyMs = auto()
29
+ tokenCountTotal = auto()
30
+ tokenCountPrompt = auto()
31
+ tokenCountCompletion = auto()
32
+ cumulativeTokenCountTotal = auto()
33
+ cumulativeTokenCountPrompt = auto()
34
+ cumulativeTokenCountCompletion = auto()
35
+
36
+ @property
37
+ def column_name(self) -> str:
38
+ return "f{self.name}_span_sort_column"
39
+
40
+ @property
41
+ def orm_expression(self) -> Any:
42
+ expr: Any
43
+ if self is SpanColumn.startTime:
44
+ expr = models.Span.start_time
45
+ elif self is SpanColumn.endTime:
46
+ expr = models.Span.end_time
47
+ elif self is SpanColumn.latencyMs:
48
+ expr = models.Span.latency_ms
49
+ elif self is SpanColumn.tokenCountTotal:
50
+ expr = models.Span.attributes[LLM_TOKEN_COUNT_TOTAL].as_float()
51
+ elif self is SpanColumn.tokenCountPrompt:
52
+ expr = models.Span.attributes[LLM_TOKEN_COUNT_PROMPT].as_float()
53
+ elif self is SpanColumn.tokenCountCompletion:
54
+ expr = models.Span.attributes[LLM_TOKEN_COUNT_COMPLETION].as_float()
55
+ elif self is SpanColumn.cumulativeTokenCountTotal:
56
+ expr = (
57
+ models.Span.cumulative_llm_token_count_prompt
58
+ + models.Span.cumulative_llm_token_count_completion
59
+ )
60
+ elif self is SpanColumn.cumulativeTokenCountPrompt:
61
+ expr = models.Span.cumulative_llm_token_count_prompt
62
+ elif self is SpanColumn.cumulativeTokenCountCompletion:
63
+ expr = models.Span.cumulative_llm_token_count_completion
64
+ else:
65
+ assert_never(self)
66
+ return expr.label(self.column_name)
67
+
68
+ @property
69
+ def data_type(self) -> CursorSortColumnDataType:
70
+ if (
71
+ self is SpanColumn.cumulativeTokenCountTotal
72
+ or self is SpanColumn.cumulativeTokenCountPrompt
73
+ or self is SpanColumn.cumulativeTokenCountCompletion
74
+ ):
75
+ return CursorSortColumnDataType.INT
76
+ if (
77
+ self is SpanColumn.latencyMs
78
+ or self is SpanColumn.tokenCountTotal
79
+ or self is SpanColumn.tokenCountPrompt
80
+ or self is SpanColumn.tokenCountCompletion
81
+ ):
82
+ return CursorSortColumnDataType.FLOAT
83
+ if self is SpanColumn.startTime or self is SpanColumn.endTime:
84
+ return CursorSortColumnDataType.DATETIME
85
+ assert_never(self)
28
86
 
29
87
 
30
88
  @strawberry.enum
@@ -32,6 +90,29 @@ class EvalAttr(Enum):
32
90
  score = "score"
33
91
  label = "label"
34
92
 
93
+ @property
94
+ def column_name(self) -> str:
95
+ return f"{self.value}_eval_sort_column"
96
+
97
+ @property
98
+ def orm_expression(self) -> Any:
99
+ expr: InstrumentedAttribute[Any]
100
+ if self is EvalAttr.score:
101
+ expr = models.SpanAnnotation.score
102
+ elif self is EvalAttr.label:
103
+ expr = models.SpanAnnotation.label
104
+ else:
105
+ assert_never(self)
106
+ return expr.label(self.column_name)
107
+
108
+ @property
109
+ def data_type(self) -> CursorSortColumnDataType:
110
+ if self is EvalAttr.label:
111
+ return CursorSortColumnDataType.STRING
112
+ if self is EvalAttr.score:
113
+ return CursorSortColumnDataType.FLOAT
114
+ assert_never(self)
115
+
35
116
 
36
117
  @strawberry.input
37
118
  class EvalResultKey:
@@ -43,6 +124,15 @@ class SupportsGetSpanEvaluation(Protocol):
43
124
  def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]: ...
44
125
 
45
126
 
127
+ @dataclass(frozen=True)
128
+ class SpanSortConfig:
129
+ stmt: Select[Any]
130
+ orm_expression: Any
131
+ dir: SortDir
132
+ column_name: str
133
+ column_data_type: CursorSortColumnDataType
134
+
135
+
46
136
  @strawberry.input(
47
137
  description="The sort key and direction for span connections. Must "
48
138
  "specify one and only one of either `col` or `evalResultKey`."
@@ -52,58 +142,38 @@ class SpanSort:
52
142
  eval_result_key: Optional[EvalResultKey] = UNSET
53
143
  dir: SortDir
54
144
 
55
- def __call__(
56
- self,
57
- spans: Iterable[WrappedSpan],
58
- evals: Optional[SupportsGetSpanEvaluation] = None,
59
- ) -> Iterator[WrappedSpan]:
60
- """
61
- Sorts the spans by the given key (column or eval) and direction
62
- """
63
- if self.eval_result_key:
64
- get_sort_key_value = partial(
65
- _get_eval_result_value,
66
- eval_name=self.eval_result_key.name,
67
- eval_attr=self.eval_result_key.attr,
68
- evals=evals,
145
+ def update_orm_expr(self, stmt: Select[Any]) -> SpanSortConfig:
146
+ if (col := self.col) and not self.eval_result_key:
147
+ expr = col.orm_expression
148
+ stmt = stmt.add_columns(expr)
149
+ if self.dir == SortDir.desc:
150
+ expr = desc(expr)
151
+ return SpanSortConfig(
152
+ stmt=stmt.order_by(nulls_last(expr)),
153
+ orm_expression=col.orm_expression,
154
+ dir=self.dir,
155
+ column_name=col.column_name,
156
+ column_data_type=col.data_type,
69
157
  )
70
- else:
71
- get_sort_key_value = partial(
72
- _get_column_value,
73
- span_column=self.col or SpanColumn.startTime,
158
+ if (eval_result_key := self.eval_result_key) and not col:
159
+ eval_name = eval_result_key.name
160
+ eval_attr = eval_result_key.attr
161
+ expr = eval_result_key.attr.orm_expression
162
+ stmt = stmt.add_columns(expr)
163
+ if self.dir == SortDir.desc:
164
+ expr = desc(expr)
165
+ stmt = stmt.join(
166
+ models.SpanAnnotation,
167
+ onclause=and_(
168
+ models.SpanAnnotation.span_rowid == models.Span.id,
169
+ models.SpanAnnotation.name == eval_name,
170
+ ),
171
+ ).order_by(expr)
172
+ return SpanSortConfig(
173
+ stmt=stmt,
174
+ orm_expression=eval_result_key.attr.orm_expression,
175
+ dir=self.dir,
176
+ column_name=eval_attr.column_name,
177
+ column_data_type=eval_attr.data_type,
74
178
  )
75
- yield from pd.Series(spans, dtype=object).sort_values(
76
- key=lambda series: series.apply(get_sort_key_value),
77
- ascending=self.dir.value == SortDir.asc.value,
78
- )
79
-
80
-
81
- def _get_column_value(span: WrappedSpan, span_column: SpanColumn) -> Any:
82
- if span_column is SpanColumn.startTime:
83
- return span.start_time
84
- if span_column is SpanColumn.endTime:
85
- return span.end_time
86
- return span[span_column.value]
87
-
88
-
89
- def _get_eval_result_value(
90
- span: WrappedSpan,
91
- eval_name: str,
92
- eval_attr: EvalAttr,
93
- evals: Optional[SupportsGetSpanEvaluation] = None,
94
- ) -> Any:
95
- """
96
- Returns the evaluation result for the given span
97
- """
98
- if evals is None:
99
- return None
100
- span_id = span.context.span_id
101
- evaluation = evals.get_span_evaluation(span_id, eval_name)
102
- if evaluation is None:
103
- return None
104
- result = evaluation.result
105
- if eval_attr is EvalAttr.score:
106
- return result.score.value if result.HasField("score") else None
107
- if eval_attr is EvalAttr.label:
108
- return result.label.value if result.HasField("label") else None
109
- assert_never(eval_attr)
179
+ raise ValueError("Exactly one of `col` or `evalResultKey` must be specified on `SpanSort`.")
@@ -0,0 +1,11 @@
1
+ from starlette.routing import Route
2
+
3
+ from . import evaluations, spans, traces
4
+
5
+ V1_ROUTES = [
6
+ Route("/v1/evaluations", evaluations.post_evaluations, methods=["POST"]),
7
+ Route("/v1/evaluations", evaluations.get_evaluations, methods=["GET"]),
8
+ Route("/v1/traces", traces.post_traces, methods=["POST"]),
9
+ Route("/v1/spans", spans.query_spans_handler, methods=["POST"]),
10
+ Route("/v1/spans", spans.get_spans_handler, methods=["GET"]),
11
+ ]
@@ -0,0 +1,275 @@
1
+ import gzip
2
+ from itertools import chain
3
+ from typing import AsyncContextManager, Callable, Iterator, Tuple
4
+
5
+ import pandas as pd
6
+ import pyarrow as pa
7
+ from google.protobuf.message import DecodeError
8
+ from pandas import DataFrame
9
+ from sqlalchemy import select
10
+ from sqlalchemy.engine import Connectable
11
+ from sqlalchemy.ext.asyncio import (
12
+ AsyncSession,
13
+ )
14
+ from starlette.background import BackgroundTask
15
+ from starlette.datastructures import State
16
+ from starlette.requests import Request
17
+ from starlette.responses import Response, StreamingResponse
18
+ from starlette.status import (
19
+ HTTP_403_FORBIDDEN,
20
+ HTTP_404_NOT_FOUND,
21
+ HTTP_415_UNSUPPORTED_MEDIA_TYPE,
22
+ HTTP_422_UNPROCESSABLE_ENTITY,
23
+ )
24
+ from typing_extensions import TypeAlias
25
+
26
+ import phoenix.trace.v1 as pb
27
+ from phoenix.config import DEFAULT_PROJECT_NAME
28
+ from phoenix.db import models
29
+ from phoenix.exceptions import PhoenixEvaluationNameIsMissing
30
+ from phoenix.server.api.routers.utils import table_to_bytes
31
+ from phoenix.session.evaluation import encode_evaluations
32
+ from phoenix.trace.span_evaluations import (
33
+ DocumentEvaluations,
34
+ Evaluations,
35
+ SpanEvaluations,
36
+ TraceEvaluations,
37
+ )
38
+
39
+ EvaluationName: TypeAlias = str
40
+
41
+
42
+ async def post_evaluations(request: Request) -> Response:
43
+ """
44
+ summary: Add evaluations to a span, trace, or document
45
+ operationId: addEvaluations
46
+ tags:
47
+ - evaluations
48
+ parameters:
49
+ - name: project-name
50
+ in: query
51
+ schema:
52
+ type: string
53
+ default: default
54
+ description: The project name to add the evaluation to
55
+ requestBody:
56
+ required: true
57
+ content:
58
+ application/x-protobuf:
59
+ schema:
60
+ type: string
61
+ format: binary
62
+ application/x-pandas-arrow:
63
+ schema:
64
+ type: string
65
+ format: binary
66
+ responses:
67
+ 200:
68
+ description: Success
69
+ 403:
70
+ description: Forbidden
71
+ 415:
72
+ description: Unsupported content type, only gzipped protobuf and pandas-arrow are supported
73
+ 422:
74
+ description: Request body is invalid
75
+ """
76
+ if request.app.state.read_only:
77
+ return Response(status_code=HTTP_403_FORBIDDEN)
78
+ content_type = request.headers.get("content-type")
79
+ if content_type == "application/x-pandas-arrow":
80
+ return await _process_pyarrow(request)
81
+ if content_type != "application/x-protobuf":
82
+ return Response("Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
83
+ body = await request.body()
84
+ content_encoding = request.headers.get("content-encoding")
85
+ if content_encoding == "gzip":
86
+ body = gzip.decompress(body)
87
+ elif content_encoding:
88
+ return Response("Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
89
+ evaluation = pb.Evaluation()
90
+ try:
91
+ evaluation.ParseFromString(body)
92
+ except DecodeError:
93
+ return Response("Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY)
94
+ if not evaluation.name.strip():
95
+ return Response(
96
+ "Evaluation name must not be blank/empty",
97
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
98
+ )
99
+ await request.state.queue_evaluation_for_bulk_insert(evaluation)
100
+ return Response()
101
+
102
+
103
+ async def get_evaluations(request: Request) -> Response:
104
+ """
105
+ summary: Get evaluations from Phoenix
106
+ operationId: getEvaluation
107
+ tags:
108
+ - evaluations
109
+ parameters:
110
+ - name: project-name
111
+ in: query
112
+ schema:
113
+ type: string
114
+ default: default
115
+ description: The project name to get evaluations from
116
+ responses:
117
+ 200:
118
+ description: Success
119
+ 404:
120
+ description: Not found
121
+ """
122
+ project_name = (
123
+ request.query_params.get("project-name")
124
+ # read from headers for backwards compatibility
125
+ or request.headers.get("project-name")
126
+ or DEFAULT_PROJECT_NAME
127
+ )
128
+
129
+ db: Callable[[], AsyncContextManager[AsyncSession]] = request.app.state.db
130
+ async with db() as session:
131
+ connection = await session.connection()
132
+ trace_evals_dataframe = await connection.run_sync(
133
+ _read_sql_trace_evaluations_into_dataframe,
134
+ project_name,
135
+ )
136
+ span_evals_dataframe = await connection.run_sync(
137
+ _read_sql_span_evaluations_into_dataframe,
138
+ project_name,
139
+ )
140
+ document_evals_dataframe = await connection.run_sync(
141
+ _read_sql_document_evaluations_into_dataframe,
142
+ project_name,
143
+ )
144
+ if (
145
+ trace_evals_dataframe.empty
146
+ and span_evals_dataframe.empty
147
+ and document_evals_dataframe.empty
148
+ ):
149
+ return Response(status_code=HTTP_404_NOT_FOUND)
150
+
151
+ evals = chain(
152
+ map(
153
+ lambda args: TraceEvaluations(*args),
154
+ _groupby_eval_name(trace_evals_dataframe),
155
+ ),
156
+ map(
157
+ lambda args: SpanEvaluations(*args),
158
+ _groupby_eval_name(span_evals_dataframe),
159
+ ),
160
+ map(
161
+ lambda args: DocumentEvaluations(*args),
162
+ _groupby_eval_name(document_evals_dataframe),
163
+ ),
164
+ )
165
+ bytestream = map(lambda evals: table_to_bytes(evals.to_pyarrow_table()), evals)
166
+ return StreamingResponse(
167
+ content=bytestream,
168
+ media_type="application/x-pandas-arrow",
169
+ )
170
+
171
+
172
+ async def _process_pyarrow(request: Request) -> Response:
173
+ body = await request.body()
174
+ try:
175
+ reader = pa.ipc.open_stream(body)
176
+ except pa.ArrowInvalid:
177
+ return Response(
178
+ content="Request body is not valid pyarrow",
179
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
180
+ )
181
+ try:
182
+ evaluations = Evaluations.from_pyarrow_reader(reader)
183
+ except Exception as e:
184
+ if isinstance(e, PhoenixEvaluationNameIsMissing):
185
+ return Response(
186
+ "Evaluation name must not be blank/empty",
187
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
188
+ )
189
+ return Response(
190
+ content="Invalid data in request body",
191
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
192
+ )
193
+ return Response(background=BackgroundTask(_add_evaluations, request.state, evaluations))
194
+
195
+
196
+ async def _add_evaluations(state: State, evaluations: Evaluations) -> None:
197
+ for evaluation in encode_evaluations(evaluations):
198
+ await state.queue_evaluation_for_bulk_insert(evaluation)
199
+
200
+
201
+ def _read_sql_trace_evaluations_into_dataframe(
202
+ connectable: Connectable,
203
+ project_name: str,
204
+ ) -> DataFrame:
205
+ """
206
+ Reads a project's trace evaluations into a pandas dataframe.
207
+
208
+ Inputs a synchronous connectable to pandas.read_sql since it does not
209
+ support async connectables. For more information, see:
210
+
211
+ https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
212
+ """
213
+ return pd.read_sql(
214
+ select(models.TraceAnnotation, models.Trace.trace_id)
215
+ .join_from(models.TraceAnnotation, models.Trace)
216
+ .join_from(models.Trace, models.Project)
217
+ .where(models.Project.name == project_name)
218
+ .where(models.TraceAnnotation.annotator_kind == "LLM"),
219
+ connectable,
220
+ index_col="trace_id",
221
+ )
222
+
223
+
224
+ def _read_sql_span_evaluations_into_dataframe(
225
+ connectable: Connectable,
226
+ project_name: str,
227
+ ) -> DataFrame:
228
+ """
229
+ Reads a project's span evaluations into a pandas dataframe.
230
+
231
+ Inputs a synchronous connectable to pandas.read_sql since it does not
232
+ support async connectables. For more information, see:
233
+
234
+ https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
235
+ """
236
+ return pd.read_sql_query(
237
+ select(models.SpanAnnotation, models.Span.span_id)
238
+ .join_from(models.SpanAnnotation, models.Span)
239
+ .join_from(models.Span, models.Trace)
240
+ .join_from(models.Trace, models.Project)
241
+ .where(models.Project.name == project_name)
242
+ .where(models.SpanAnnotation.annotator_kind == "LLM"),
243
+ connectable,
244
+ index_col="span_id",
245
+ )
246
+
247
+
248
+ def _read_sql_document_evaluations_into_dataframe(
249
+ connectable: Connectable,
250
+ project_name: str,
251
+ ) -> DataFrame:
252
+ """
253
+ Reads a project's document evaluations into a pandas dataframe.
254
+
255
+ Inputs a synchronous connectable to pandas.read_sql since it does not
256
+ support async connectables. For more information, see:
257
+
258
+ https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
259
+ """
260
+ return pd.read_sql(
261
+ select(models.DocumentAnnotation, models.Span.span_id)
262
+ .join_from(models.DocumentAnnotation, models.Span)
263
+ .join_from(models.Span, models.Trace)
264
+ .join_from(models.Trace, models.Project)
265
+ .where(models.Project.name == project_name)
266
+ .where(models.DocumentAnnotation.annotator_kind == "LLM"),
267
+ connectable,
268
+ ).set_index(["span_id", "document_position"])
269
+
270
+
271
+ def _groupby_eval_name(
272
+ evals_dataframe: DataFrame,
273
+ ) -> Iterator[Tuple[EvaluationName, DataFrame]]:
274
+ for eval_name, evals_dataframe_for_name in evals_dataframe.groupby("name", as_index=False):
275
+ yield str(eval_name), evals_dataframe_for_name
@@ -0,0 +1,126 @@
1
+ from datetime import timezone
2
+ from typing import AsyncIterator
3
+
4
+ from starlette.requests import Request
5
+ from starlette.responses import Response, StreamingResponse
6
+ from starlette.status import HTTP_404_NOT_FOUND, HTTP_422_UNPROCESSABLE_ENTITY
7
+
8
+ from phoenix.config import DEFAULT_PROJECT_NAME
9
+ from phoenix.datetime_utils import normalize_datetime
10
+ from phoenix.server.api.routers.utils import df_to_bytes, from_iso_format
11
+ from phoenix.trace.dsl import SpanQuery
12
+
13
+ DEFAULT_SPAN_LIMIT = 1000
14
+
15
+
16
+ # TODO: Add property details to SpanQuery schema
17
+ async def query_spans_handler(request: Request) -> Response:
18
+ """
19
+ summary: Query spans using query DSL
20
+ operationId: querySpans
21
+ tags:
22
+ - spans
23
+ parameters:
24
+ - name: project-name
25
+ in: query
26
+ schema:
27
+ type: string
28
+ default: default
29
+ description: The project name to get evaluations from
30
+ requestBody:
31
+ required: true
32
+ content:
33
+ application/json:
34
+ schema:
35
+ type: object
36
+ properties:
37
+ queries:
38
+ type: array
39
+ items:
40
+ type: object
41
+ properties:
42
+ select:
43
+ type: object
44
+ filter:
45
+ type: object
46
+ explode:
47
+ type: object
48
+ concat:
49
+ type: object
50
+ rename:
51
+ type: object
52
+ index:
53
+ type: object
54
+ start_time:
55
+ type: string
56
+ format: date-time
57
+ end_time:
58
+ type: string
59
+ format: date-time
60
+ nullable: true
61
+ limit:
62
+ type: integer
63
+ nullable: true
64
+ default: 1000
65
+ root_spans_only:
66
+ type: boolean
67
+ nullable: true
68
+ responses:
69
+ 200:
70
+ description: Success
71
+ 404:
72
+ description: Not found
73
+ 422:
74
+ description: Request body is invalid
75
+ """
76
+ payload = await request.json()
77
+ queries = payload.pop("queries", [])
78
+ project_name = (
79
+ request.query_params.get("project-name")
80
+ # read from headers/payload for backward-compatibility
81
+ or request.headers.get("project-name")
82
+ or payload.get("project_name")
83
+ or DEFAULT_PROJECT_NAME
84
+ )
85
+ end_time = payload.get("end_time") or payload.get("stop_time")
86
+ try:
87
+ span_queries = [SpanQuery.from_dict(query) for query in queries]
88
+ except Exception as e:
89
+ return Response(
90
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
91
+ content=f"Invalid query: {e}",
92
+ )
93
+ async with request.app.state.db() as session:
94
+ results = []
95
+ for query in span_queries:
96
+ results.append(
97
+ await session.run_sync(
98
+ query,
99
+ project_name=project_name,
100
+ start_time=normalize_datetime(
101
+ from_iso_format(payload.get("start_time")),
102
+ timezone.utc,
103
+ ),
104
+ end_time=normalize_datetime(
105
+ from_iso_format(end_time),
106
+ timezone.utc,
107
+ ),
108
+ limit=payload.get("limit", DEFAULT_SPAN_LIMIT),
109
+ root_spans_only=payload.get("root_spans_only"),
110
+ )
111
+ )
112
+ if not results:
113
+ return Response(status_code=HTTP_404_NOT_FOUND)
114
+
115
+ async def content() -> AsyncIterator[bytes]:
116
+ for result in results:
117
+ yield df_to_bytes(result)
118
+
119
+ return StreamingResponse(
120
+ content=content(),
121
+ media_type="application/x-pandas-arrow",
122
+ )
123
+
124
+
125
+ async def get_spans_handler(request: Request) -> Response:
126
+ return await query_spans_handler(request)