arize-phoenix 3.24.0__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.24.0.dist-info → arize_phoenix-4.0.0.dist-info}/METADATA +26 -4
- {arize_phoenix-3.24.0.dist-info → arize_phoenix-4.0.0.dist-info}/RECORD +80 -75
- phoenix/__init__.py +9 -5
- phoenix/config.py +109 -53
- phoenix/datetime_utils.py +18 -1
- phoenix/db/README.md +25 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +119 -0
- phoenix/db/bulk_inserter.py +206 -0
- phoenix/db/engines.py +152 -0
- phoenix/db/helpers.py +47 -0
- phoenix/db/insertion/evaluation.py +209 -0
- phoenix/db/insertion/helpers.py +54 -0
- phoenix/db/insertion/span.py +142 -0
- phoenix/db/migrate.py +71 -0
- phoenix/db/migrations/env.py +121 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +371 -0
- phoenix/exceptions.py +5 -1
- phoenix/server/api/context.py +40 -3
- phoenix/server/api/dataloaders/__init__.py +97 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
- phoenix/server/api/dataloaders/document_evaluations.py +37 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
- phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
- phoenix/server/api/dataloaders/record_counts.py +125 -0
- phoenix/server/api/dataloaders/span_descendants.py +64 -0
- phoenix/server/api/dataloaders/span_evaluations.py +37 -0
- phoenix/server/api/dataloaders/token_counts.py +138 -0
- phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
- phoenix/server/api/input_types/SpanSort.py +138 -68
- phoenix/server/api/routers/v1/__init__.py +11 -0
- phoenix/server/api/routers/v1/evaluations.py +275 -0
- phoenix/server/api/routers/v1/spans.py +126 -0
- phoenix/server/api/routers/v1/traces.py +82 -0
- phoenix/server/api/schema.py +112 -48
- phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
- phoenix/server/api/types/Evaluation.py +29 -12
- phoenix/server/api/types/EvaluationSummary.py +29 -44
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +9 -9
- phoenix/server/api/types/Project.py +240 -171
- phoenix/server/api/types/Span.py +87 -131
- phoenix/server/api/types/Trace.py +29 -20
- phoenix/server/api/types/pagination.py +151 -10
- phoenix/server/app.py +263 -35
- phoenix/server/grpc_server.py +93 -0
- phoenix/server/main.py +75 -60
- phoenix/server/openapi/docs.py +218 -0
- phoenix/server/prometheus.py +23 -7
- phoenix/server/static/index.js +662 -643
- phoenix/server/telemetry.py +68 -0
- phoenix/services.py +4 -0
- phoenix/session/client.py +34 -30
- phoenix/session/data_extractor.py +8 -3
- phoenix/session/session.py +176 -155
- phoenix/settings.py +13 -0
- phoenix/trace/attributes.py +349 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +660 -192
- phoenix/trace/dsl/helpers.py +24 -5
- phoenix/trace/dsl/query.py +562 -185
- phoenix/trace/fixtures.py +69 -7
- phoenix/trace/otel.py +33 -199
- phoenix/trace/schemas.py +14 -8
- phoenix/trace/span_evaluations.py +5 -2
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/span_store.py +0 -23
- phoenix/version.py +1 -1
- phoenix/core/project.py +0 -773
- phoenix/core/traces.py +0 -96
- phoenix/datasets/dataset.py +0 -214
- phoenix/datasets/fixtures.py +0 -24
- phoenix/datasets/schema.py +0 -31
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -453
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/routers/evaluation_handler.py +0 -110
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- {arize_phoenix-3.24.0.dist-info → arize_phoenix-4.0.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.24.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.24.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → server/openapi}/__init__.py +0 -0
|
@@ -1,30 +1,88 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
from typing import Any,
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum, auto
|
|
3
|
+
from typing import Any, Optional, Protocol
|
|
4
4
|
|
|
5
|
-
import pandas as pd
|
|
6
5
|
import strawberry
|
|
7
6
|
from openinference.semconv.trace import SpanAttributes
|
|
7
|
+
from sqlalchemy import and_, desc, nulls_last
|
|
8
|
+
from sqlalchemy.orm import InstrumentedAttribute
|
|
9
|
+
from sqlalchemy.sql.expression import Select
|
|
8
10
|
from strawberry import UNSET
|
|
9
11
|
from typing_extensions import assert_never
|
|
10
12
|
|
|
11
13
|
import phoenix.trace.v1 as pb
|
|
12
|
-
from phoenix.
|
|
14
|
+
from phoenix.db import models
|
|
15
|
+
from phoenix.server.api.types.pagination import CursorSortColumnDataType
|
|
13
16
|
from phoenix.server.api.types.SortDir import SortDir
|
|
14
|
-
from phoenix.trace.schemas import
|
|
17
|
+
from phoenix.trace.schemas import SpanID
|
|
18
|
+
|
|
19
|
+
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT.split(".")
|
|
20
|
+
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION.split(".")
|
|
21
|
+
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL.split(".")
|
|
15
22
|
|
|
16
23
|
|
|
17
24
|
@strawberry.enum
|
|
18
25
|
class SpanColumn(Enum):
|
|
19
|
-
startTime =
|
|
20
|
-
endTime =
|
|
21
|
-
latencyMs =
|
|
22
|
-
tokenCountTotal =
|
|
23
|
-
tokenCountPrompt =
|
|
24
|
-
tokenCountCompletion =
|
|
25
|
-
cumulativeTokenCountTotal =
|
|
26
|
-
cumulativeTokenCountPrompt =
|
|
27
|
-
cumulativeTokenCountCompletion =
|
|
26
|
+
startTime = auto()
|
|
27
|
+
endTime = auto()
|
|
28
|
+
latencyMs = auto()
|
|
29
|
+
tokenCountTotal = auto()
|
|
30
|
+
tokenCountPrompt = auto()
|
|
31
|
+
tokenCountCompletion = auto()
|
|
32
|
+
cumulativeTokenCountTotal = auto()
|
|
33
|
+
cumulativeTokenCountPrompt = auto()
|
|
34
|
+
cumulativeTokenCountCompletion = auto()
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def column_name(self) -> str:
|
|
38
|
+
return "f{self.name}_span_sort_column"
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def orm_expression(self) -> Any:
|
|
42
|
+
expr: Any
|
|
43
|
+
if self is SpanColumn.startTime:
|
|
44
|
+
expr = models.Span.start_time
|
|
45
|
+
elif self is SpanColumn.endTime:
|
|
46
|
+
expr = models.Span.end_time
|
|
47
|
+
elif self is SpanColumn.latencyMs:
|
|
48
|
+
expr = models.Span.latency_ms
|
|
49
|
+
elif self is SpanColumn.tokenCountTotal:
|
|
50
|
+
expr = models.Span.attributes[LLM_TOKEN_COUNT_TOTAL].as_float()
|
|
51
|
+
elif self is SpanColumn.tokenCountPrompt:
|
|
52
|
+
expr = models.Span.attributes[LLM_TOKEN_COUNT_PROMPT].as_float()
|
|
53
|
+
elif self is SpanColumn.tokenCountCompletion:
|
|
54
|
+
expr = models.Span.attributes[LLM_TOKEN_COUNT_COMPLETION].as_float()
|
|
55
|
+
elif self is SpanColumn.cumulativeTokenCountTotal:
|
|
56
|
+
expr = (
|
|
57
|
+
models.Span.cumulative_llm_token_count_prompt
|
|
58
|
+
+ models.Span.cumulative_llm_token_count_completion
|
|
59
|
+
)
|
|
60
|
+
elif self is SpanColumn.cumulativeTokenCountPrompt:
|
|
61
|
+
expr = models.Span.cumulative_llm_token_count_prompt
|
|
62
|
+
elif self is SpanColumn.cumulativeTokenCountCompletion:
|
|
63
|
+
expr = models.Span.cumulative_llm_token_count_completion
|
|
64
|
+
else:
|
|
65
|
+
assert_never(self)
|
|
66
|
+
return expr.label(self.column_name)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def data_type(self) -> CursorSortColumnDataType:
|
|
70
|
+
if (
|
|
71
|
+
self is SpanColumn.cumulativeTokenCountTotal
|
|
72
|
+
or self is SpanColumn.cumulativeTokenCountPrompt
|
|
73
|
+
or self is SpanColumn.cumulativeTokenCountCompletion
|
|
74
|
+
):
|
|
75
|
+
return CursorSortColumnDataType.INT
|
|
76
|
+
if (
|
|
77
|
+
self is SpanColumn.latencyMs
|
|
78
|
+
or self is SpanColumn.tokenCountTotal
|
|
79
|
+
or self is SpanColumn.tokenCountPrompt
|
|
80
|
+
or self is SpanColumn.tokenCountCompletion
|
|
81
|
+
):
|
|
82
|
+
return CursorSortColumnDataType.FLOAT
|
|
83
|
+
if self is SpanColumn.startTime or self is SpanColumn.endTime:
|
|
84
|
+
return CursorSortColumnDataType.DATETIME
|
|
85
|
+
assert_never(self)
|
|
28
86
|
|
|
29
87
|
|
|
30
88
|
@strawberry.enum
|
|
@@ -32,6 +90,29 @@ class EvalAttr(Enum):
|
|
|
32
90
|
score = "score"
|
|
33
91
|
label = "label"
|
|
34
92
|
|
|
93
|
+
@property
|
|
94
|
+
def column_name(self) -> str:
|
|
95
|
+
return f"{self.value}_eval_sort_column"
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def orm_expression(self) -> Any:
|
|
99
|
+
expr: InstrumentedAttribute[Any]
|
|
100
|
+
if self is EvalAttr.score:
|
|
101
|
+
expr = models.SpanAnnotation.score
|
|
102
|
+
elif self is EvalAttr.label:
|
|
103
|
+
expr = models.SpanAnnotation.label
|
|
104
|
+
else:
|
|
105
|
+
assert_never(self)
|
|
106
|
+
return expr.label(self.column_name)
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def data_type(self) -> CursorSortColumnDataType:
|
|
110
|
+
if self is EvalAttr.label:
|
|
111
|
+
return CursorSortColumnDataType.STRING
|
|
112
|
+
if self is EvalAttr.score:
|
|
113
|
+
return CursorSortColumnDataType.FLOAT
|
|
114
|
+
assert_never(self)
|
|
115
|
+
|
|
35
116
|
|
|
36
117
|
@strawberry.input
|
|
37
118
|
class EvalResultKey:
|
|
@@ -43,6 +124,15 @@ class SupportsGetSpanEvaluation(Protocol):
|
|
|
43
124
|
def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]: ...
|
|
44
125
|
|
|
45
126
|
|
|
127
|
+
@dataclass(frozen=True)
|
|
128
|
+
class SpanSortConfig:
|
|
129
|
+
stmt: Select[Any]
|
|
130
|
+
orm_expression: Any
|
|
131
|
+
dir: SortDir
|
|
132
|
+
column_name: str
|
|
133
|
+
column_data_type: CursorSortColumnDataType
|
|
134
|
+
|
|
135
|
+
|
|
46
136
|
@strawberry.input(
|
|
47
137
|
description="The sort key and direction for span connections. Must "
|
|
48
138
|
"specify one and only one of either `col` or `evalResultKey`."
|
|
@@ -52,58 +142,38 @@ class SpanSort:
|
|
|
52
142
|
eval_result_key: Optional[EvalResultKey] = UNSET
|
|
53
143
|
dir: SortDir
|
|
54
144
|
|
|
55
|
-
def
|
|
56
|
-
self
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
eval_attr=self.eval_result_key.attr,
|
|
68
|
-
evals=evals,
|
|
145
|
+
def update_orm_expr(self, stmt: Select[Any]) -> SpanSortConfig:
|
|
146
|
+
if (col := self.col) and not self.eval_result_key:
|
|
147
|
+
expr = col.orm_expression
|
|
148
|
+
stmt = stmt.add_columns(expr)
|
|
149
|
+
if self.dir == SortDir.desc:
|
|
150
|
+
expr = desc(expr)
|
|
151
|
+
return SpanSortConfig(
|
|
152
|
+
stmt=stmt.order_by(nulls_last(expr)),
|
|
153
|
+
orm_expression=col.orm_expression,
|
|
154
|
+
dir=self.dir,
|
|
155
|
+
column_name=col.column_name,
|
|
156
|
+
column_data_type=col.data_type,
|
|
69
157
|
)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
158
|
+
if (eval_result_key := self.eval_result_key) and not col:
|
|
159
|
+
eval_name = eval_result_key.name
|
|
160
|
+
eval_attr = eval_result_key.attr
|
|
161
|
+
expr = eval_result_key.attr.orm_expression
|
|
162
|
+
stmt = stmt.add_columns(expr)
|
|
163
|
+
if self.dir == SortDir.desc:
|
|
164
|
+
expr = desc(expr)
|
|
165
|
+
stmt = stmt.join(
|
|
166
|
+
models.SpanAnnotation,
|
|
167
|
+
onclause=and_(
|
|
168
|
+
models.SpanAnnotation.span_rowid == models.Span.id,
|
|
169
|
+
models.SpanAnnotation.name == eval_name,
|
|
170
|
+
),
|
|
171
|
+
).order_by(expr)
|
|
172
|
+
return SpanSortConfig(
|
|
173
|
+
stmt=stmt,
|
|
174
|
+
orm_expression=eval_result_key.attr.orm_expression,
|
|
175
|
+
dir=self.dir,
|
|
176
|
+
column_name=eval_attr.column_name,
|
|
177
|
+
column_data_type=eval_attr.data_type,
|
|
74
178
|
)
|
|
75
|
-
|
|
76
|
-
key=lambda series: series.apply(get_sort_key_value),
|
|
77
|
-
ascending=self.dir.value == SortDir.asc.value,
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _get_column_value(span: WrappedSpan, span_column: SpanColumn) -> Any:
|
|
82
|
-
if span_column is SpanColumn.startTime:
|
|
83
|
-
return span.start_time
|
|
84
|
-
if span_column is SpanColumn.endTime:
|
|
85
|
-
return span.end_time
|
|
86
|
-
return span[span_column.value]
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def _get_eval_result_value(
|
|
90
|
-
span: WrappedSpan,
|
|
91
|
-
eval_name: str,
|
|
92
|
-
eval_attr: EvalAttr,
|
|
93
|
-
evals: Optional[SupportsGetSpanEvaluation] = None,
|
|
94
|
-
) -> Any:
|
|
95
|
-
"""
|
|
96
|
-
Returns the evaluation result for the given span
|
|
97
|
-
"""
|
|
98
|
-
if evals is None:
|
|
99
|
-
return None
|
|
100
|
-
span_id = span.context.span_id
|
|
101
|
-
evaluation = evals.get_span_evaluation(span_id, eval_name)
|
|
102
|
-
if evaluation is None:
|
|
103
|
-
return None
|
|
104
|
-
result = evaluation.result
|
|
105
|
-
if eval_attr is EvalAttr.score:
|
|
106
|
-
return result.score.value if result.HasField("score") else None
|
|
107
|
-
if eval_attr is EvalAttr.label:
|
|
108
|
-
return result.label.value if result.HasField("label") else None
|
|
109
|
-
assert_never(eval_attr)
|
|
179
|
+
raise ValueError("Exactly one of `col` or `evalResultKey` must be specified on `SpanSort`.")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from starlette.routing import Route
|
|
2
|
+
|
|
3
|
+
from . import evaluations, spans, traces
|
|
4
|
+
|
|
5
|
+
V1_ROUTES = [
|
|
6
|
+
Route("/v1/evaluations", evaluations.post_evaluations, methods=["POST"]),
|
|
7
|
+
Route("/v1/evaluations", evaluations.get_evaluations, methods=["GET"]),
|
|
8
|
+
Route("/v1/traces", traces.post_traces, methods=["POST"]),
|
|
9
|
+
Route("/v1/spans", spans.query_spans_handler, methods=["POST"]),
|
|
10
|
+
Route("/v1/spans", spans.get_spans_handler, methods=["GET"]),
|
|
11
|
+
]
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import gzip
|
|
2
|
+
from itertools import chain
|
|
3
|
+
from typing import AsyncContextManager, Callable, Iterator, Tuple
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pyarrow as pa
|
|
7
|
+
from google.protobuf.message import DecodeError
|
|
8
|
+
from pandas import DataFrame
|
|
9
|
+
from sqlalchemy import select
|
|
10
|
+
from sqlalchemy.engine import Connectable
|
|
11
|
+
from sqlalchemy.ext.asyncio import (
|
|
12
|
+
AsyncSession,
|
|
13
|
+
)
|
|
14
|
+
from starlette.background import BackgroundTask
|
|
15
|
+
from starlette.datastructures import State
|
|
16
|
+
from starlette.requests import Request
|
|
17
|
+
from starlette.responses import Response, StreamingResponse
|
|
18
|
+
from starlette.status import (
|
|
19
|
+
HTTP_403_FORBIDDEN,
|
|
20
|
+
HTTP_404_NOT_FOUND,
|
|
21
|
+
HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
22
|
+
HTTP_422_UNPROCESSABLE_ENTITY,
|
|
23
|
+
)
|
|
24
|
+
from typing_extensions import TypeAlias
|
|
25
|
+
|
|
26
|
+
import phoenix.trace.v1 as pb
|
|
27
|
+
from phoenix.config import DEFAULT_PROJECT_NAME
|
|
28
|
+
from phoenix.db import models
|
|
29
|
+
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
30
|
+
from phoenix.server.api.routers.utils import table_to_bytes
|
|
31
|
+
from phoenix.session.evaluation import encode_evaluations
|
|
32
|
+
from phoenix.trace.span_evaluations import (
|
|
33
|
+
DocumentEvaluations,
|
|
34
|
+
Evaluations,
|
|
35
|
+
SpanEvaluations,
|
|
36
|
+
TraceEvaluations,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
EvaluationName: TypeAlias = str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def post_evaluations(request: Request) -> Response:
|
|
43
|
+
"""
|
|
44
|
+
summary: Add evaluations to a span, trace, or document
|
|
45
|
+
operationId: addEvaluations
|
|
46
|
+
tags:
|
|
47
|
+
- evaluations
|
|
48
|
+
parameters:
|
|
49
|
+
- name: project-name
|
|
50
|
+
in: query
|
|
51
|
+
schema:
|
|
52
|
+
type: string
|
|
53
|
+
default: default
|
|
54
|
+
description: The project name to add the evaluation to
|
|
55
|
+
requestBody:
|
|
56
|
+
required: true
|
|
57
|
+
content:
|
|
58
|
+
application/x-protobuf:
|
|
59
|
+
schema:
|
|
60
|
+
type: string
|
|
61
|
+
format: binary
|
|
62
|
+
application/x-pandas-arrow:
|
|
63
|
+
schema:
|
|
64
|
+
type: string
|
|
65
|
+
format: binary
|
|
66
|
+
responses:
|
|
67
|
+
200:
|
|
68
|
+
description: Success
|
|
69
|
+
403:
|
|
70
|
+
description: Forbidden
|
|
71
|
+
415:
|
|
72
|
+
description: Unsupported content type, only gzipped protobuf and pandas-arrow are supported
|
|
73
|
+
422:
|
|
74
|
+
description: Request body is invalid
|
|
75
|
+
"""
|
|
76
|
+
if request.app.state.read_only:
|
|
77
|
+
return Response(status_code=HTTP_403_FORBIDDEN)
|
|
78
|
+
content_type = request.headers.get("content-type")
|
|
79
|
+
if content_type == "application/x-pandas-arrow":
|
|
80
|
+
return await _process_pyarrow(request)
|
|
81
|
+
if content_type != "application/x-protobuf":
|
|
82
|
+
return Response("Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
|
|
83
|
+
body = await request.body()
|
|
84
|
+
content_encoding = request.headers.get("content-encoding")
|
|
85
|
+
if content_encoding == "gzip":
|
|
86
|
+
body = gzip.decompress(body)
|
|
87
|
+
elif content_encoding:
|
|
88
|
+
return Response("Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
|
|
89
|
+
evaluation = pb.Evaluation()
|
|
90
|
+
try:
|
|
91
|
+
evaluation.ParseFromString(body)
|
|
92
|
+
except DecodeError:
|
|
93
|
+
return Response("Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY)
|
|
94
|
+
if not evaluation.name.strip():
|
|
95
|
+
return Response(
|
|
96
|
+
"Evaluation name must not be blank/empty",
|
|
97
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
98
|
+
)
|
|
99
|
+
await request.state.queue_evaluation_for_bulk_insert(evaluation)
|
|
100
|
+
return Response()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
async def get_evaluations(request: Request) -> Response:
|
|
104
|
+
"""
|
|
105
|
+
summary: Get evaluations from Phoenix
|
|
106
|
+
operationId: getEvaluation
|
|
107
|
+
tags:
|
|
108
|
+
- evaluations
|
|
109
|
+
parameters:
|
|
110
|
+
- name: project-name
|
|
111
|
+
in: query
|
|
112
|
+
schema:
|
|
113
|
+
type: string
|
|
114
|
+
default: default
|
|
115
|
+
description: The project name to get evaluations from
|
|
116
|
+
responses:
|
|
117
|
+
200:
|
|
118
|
+
description: Success
|
|
119
|
+
404:
|
|
120
|
+
description: Not found
|
|
121
|
+
"""
|
|
122
|
+
project_name = (
|
|
123
|
+
request.query_params.get("project-name")
|
|
124
|
+
# read from headers for backwards compatibility
|
|
125
|
+
or request.headers.get("project-name")
|
|
126
|
+
or DEFAULT_PROJECT_NAME
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
db: Callable[[], AsyncContextManager[AsyncSession]] = request.app.state.db
|
|
130
|
+
async with db() as session:
|
|
131
|
+
connection = await session.connection()
|
|
132
|
+
trace_evals_dataframe = await connection.run_sync(
|
|
133
|
+
_read_sql_trace_evaluations_into_dataframe,
|
|
134
|
+
project_name,
|
|
135
|
+
)
|
|
136
|
+
span_evals_dataframe = await connection.run_sync(
|
|
137
|
+
_read_sql_span_evaluations_into_dataframe,
|
|
138
|
+
project_name,
|
|
139
|
+
)
|
|
140
|
+
document_evals_dataframe = await connection.run_sync(
|
|
141
|
+
_read_sql_document_evaluations_into_dataframe,
|
|
142
|
+
project_name,
|
|
143
|
+
)
|
|
144
|
+
if (
|
|
145
|
+
trace_evals_dataframe.empty
|
|
146
|
+
and span_evals_dataframe.empty
|
|
147
|
+
and document_evals_dataframe.empty
|
|
148
|
+
):
|
|
149
|
+
return Response(status_code=HTTP_404_NOT_FOUND)
|
|
150
|
+
|
|
151
|
+
evals = chain(
|
|
152
|
+
map(
|
|
153
|
+
lambda args: TraceEvaluations(*args),
|
|
154
|
+
_groupby_eval_name(trace_evals_dataframe),
|
|
155
|
+
),
|
|
156
|
+
map(
|
|
157
|
+
lambda args: SpanEvaluations(*args),
|
|
158
|
+
_groupby_eval_name(span_evals_dataframe),
|
|
159
|
+
),
|
|
160
|
+
map(
|
|
161
|
+
lambda args: DocumentEvaluations(*args),
|
|
162
|
+
_groupby_eval_name(document_evals_dataframe),
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
bytestream = map(lambda evals: table_to_bytes(evals.to_pyarrow_table()), evals)
|
|
166
|
+
return StreamingResponse(
|
|
167
|
+
content=bytestream,
|
|
168
|
+
media_type="application/x-pandas-arrow",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
async def _process_pyarrow(request: Request) -> Response:
|
|
173
|
+
body = await request.body()
|
|
174
|
+
try:
|
|
175
|
+
reader = pa.ipc.open_stream(body)
|
|
176
|
+
except pa.ArrowInvalid:
|
|
177
|
+
return Response(
|
|
178
|
+
content="Request body is not valid pyarrow",
|
|
179
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
180
|
+
)
|
|
181
|
+
try:
|
|
182
|
+
evaluations = Evaluations.from_pyarrow_reader(reader)
|
|
183
|
+
except Exception as e:
|
|
184
|
+
if isinstance(e, PhoenixEvaluationNameIsMissing):
|
|
185
|
+
return Response(
|
|
186
|
+
"Evaluation name must not be blank/empty",
|
|
187
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
188
|
+
)
|
|
189
|
+
return Response(
|
|
190
|
+
content="Invalid data in request body",
|
|
191
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
192
|
+
)
|
|
193
|
+
return Response(background=BackgroundTask(_add_evaluations, request.state, evaluations))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
async def _add_evaluations(state: State, evaluations: Evaluations) -> None:
|
|
197
|
+
for evaluation in encode_evaluations(evaluations):
|
|
198
|
+
await state.queue_evaluation_for_bulk_insert(evaluation)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _read_sql_trace_evaluations_into_dataframe(
|
|
202
|
+
connectable: Connectable,
|
|
203
|
+
project_name: str,
|
|
204
|
+
) -> DataFrame:
|
|
205
|
+
"""
|
|
206
|
+
Reads a project's trace evaluations into a pandas dataframe.
|
|
207
|
+
|
|
208
|
+
Inputs a synchronous connectable to pandas.read_sql since it does not
|
|
209
|
+
support async connectables. For more information, see:
|
|
210
|
+
|
|
211
|
+
https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
|
|
212
|
+
"""
|
|
213
|
+
return pd.read_sql(
|
|
214
|
+
select(models.TraceAnnotation, models.Trace.trace_id)
|
|
215
|
+
.join_from(models.TraceAnnotation, models.Trace)
|
|
216
|
+
.join_from(models.Trace, models.Project)
|
|
217
|
+
.where(models.Project.name == project_name)
|
|
218
|
+
.where(models.TraceAnnotation.annotator_kind == "LLM"),
|
|
219
|
+
connectable,
|
|
220
|
+
index_col="trace_id",
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _read_sql_span_evaluations_into_dataframe(
|
|
225
|
+
connectable: Connectable,
|
|
226
|
+
project_name: str,
|
|
227
|
+
) -> DataFrame:
|
|
228
|
+
"""
|
|
229
|
+
Reads a project's span evaluations into a pandas dataframe.
|
|
230
|
+
|
|
231
|
+
Inputs a synchronous connectable to pandas.read_sql since it does not
|
|
232
|
+
support async connectables. For more information, see:
|
|
233
|
+
|
|
234
|
+
https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
|
|
235
|
+
"""
|
|
236
|
+
return pd.read_sql_query(
|
|
237
|
+
select(models.SpanAnnotation, models.Span.span_id)
|
|
238
|
+
.join_from(models.SpanAnnotation, models.Span)
|
|
239
|
+
.join_from(models.Span, models.Trace)
|
|
240
|
+
.join_from(models.Trace, models.Project)
|
|
241
|
+
.where(models.Project.name == project_name)
|
|
242
|
+
.where(models.SpanAnnotation.annotator_kind == "LLM"),
|
|
243
|
+
connectable,
|
|
244
|
+
index_col="span_id",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _read_sql_document_evaluations_into_dataframe(
|
|
249
|
+
connectable: Connectable,
|
|
250
|
+
project_name: str,
|
|
251
|
+
) -> DataFrame:
|
|
252
|
+
"""
|
|
253
|
+
Reads a project's document evaluations into a pandas dataframe.
|
|
254
|
+
|
|
255
|
+
Inputs a synchronous connectable to pandas.read_sql since it does not
|
|
256
|
+
support async connectables. For more information, see:
|
|
257
|
+
|
|
258
|
+
https://stackoverflow.com/questions/70848256/how-can-i-use-pandas-read-sql-on-an-async-connection
|
|
259
|
+
"""
|
|
260
|
+
return pd.read_sql(
|
|
261
|
+
select(models.DocumentAnnotation, models.Span.span_id)
|
|
262
|
+
.join_from(models.DocumentAnnotation, models.Span)
|
|
263
|
+
.join_from(models.Span, models.Trace)
|
|
264
|
+
.join_from(models.Trace, models.Project)
|
|
265
|
+
.where(models.Project.name == project_name)
|
|
266
|
+
.where(models.DocumentAnnotation.annotator_kind == "LLM"),
|
|
267
|
+
connectable,
|
|
268
|
+
).set_index(["span_id", "document_position"])
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _groupby_eval_name(
|
|
272
|
+
evals_dataframe: DataFrame,
|
|
273
|
+
) -> Iterator[Tuple[EvaluationName, DataFrame]]:
|
|
274
|
+
for eval_name, evals_dataframe_for_name in evals_dataframe.groupby("name", as_index=False):
|
|
275
|
+
yield str(eval_name), evals_dataframe_for_name
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from datetime import timezone
|
|
2
|
+
from typing import AsyncIterator
|
|
3
|
+
|
|
4
|
+
from starlette.requests import Request
|
|
5
|
+
from starlette.responses import Response, StreamingResponse
|
|
6
|
+
from starlette.status import HTTP_404_NOT_FOUND, HTTP_422_UNPROCESSABLE_ENTITY
|
|
7
|
+
|
|
8
|
+
from phoenix.config import DEFAULT_PROJECT_NAME
|
|
9
|
+
from phoenix.datetime_utils import normalize_datetime
|
|
10
|
+
from phoenix.server.api.routers.utils import df_to_bytes, from_iso_format
|
|
11
|
+
from phoenix.trace.dsl import SpanQuery
|
|
12
|
+
|
|
13
|
+
DEFAULT_SPAN_LIMIT = 1000
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# TODO: Add property details to SpanQuery schema
|
|
17
|
+
async def query_spans_handler(request: Request) -> Response:
|
|
18
|
+
"""
|
|
19
|
+
summary: Query spans using query DSL
|
|
20
|
+
operationId: querySpans
|
|
21
|
+
tags:
|
|
22
|
+
- spans
|
|
23
|
+
parameters:
|
|
24
|
+
- name: project-name
|
|
25
|
+
in: query
|
|
26
|
+
schema:
|
|
27
|
+
type: string
|
|
28
|
+
default: default
|
|
29
|
+
description: The project name to get evaluations from
|
|
30
|
+
requestBody:
|
|
31
|
+
required: true
|
|
32
|
+
content:
|
|
33
|
+
application/json:
|
|
34
|
+
schema:
|
|
35
|
+
type: object
|
|
36
|
+
properties:
|
|
37
|
+
queries:
|
|
38
|
+
type: array
|
|
39
|
+
items:
|
|
40
|
+
type: object
|
|
41
|
+
properties:
|
|
42
|
+
select:
|
|
43
|
+
type: object
|
|
44
|
+
filter:
|
|
45
|
+
type: object
|
|
46
|
+
explode:
|
|
47
|
+
type: object
|
|
48
|
+
concat:
|
|
49
|
+
type: object
|
|
50
|
+
rename:
|
|
51
|
+
type: object
|
|
52
|
+
index:
|
|
53
|
+
type: object
|
|
54
|
+
start_time:
|
|
55
|
+
type: string
|
|
56
|
+
format: date-time
|
|
57
|
+
end_time:
|
|
58
|
+
type: string
|
|
59
|
+
format: date-time
|
|
60
|
+
nullable: true
|
|
61
|
+
limit:
|
|
62
|
+
type: integer
|
|
63
|
+
nullable: true
|
|
64
|
+
default: 1000
|
|
65
|
+
root_spans_only:
|
|
66
|
+
type: boolean
|
|
67
|
+
nullable: true
|
|
68
|
+
responses:
|
|
69
|
+
200:
|
|
70
|
+
description: Success
|
|
71
|
+
404:
|
|
72
|
+
description: Not found
|
|
73
|
+
422:
|
|
74
|
+
description: Request body is invalid
|
|
75
|
+
"""
|
|
76
|
+
payload = await request.json()
|
|
77
|
+
queries = payload.pop("queries", [])
|
|
78
|
+
project_name = (
|
|
79
|
+
request.query_params.get("project-name")
|
|
80
|
+
# read from headers/payload for backward-compatibility
|
|
81
|
+
or request.headers.get("project-name")
|
|
82
|
+
or payload.get("project_name")
|
|
83
|
+
or DEFAULT_PROJECT_NAME
|
|
84
|
+
)
|
|
85
|
+
end_time = payload.get("end_time") or payload.get("stop_time")
|
|
86
|
+
try:
|
|
87
|
+
span_queries = [SpanQuery.from_dict(query) for query in queries]
|
|
88
|
+
except Exception as e:
|
|
89
|
+
return Response(
|
|
90
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
91
|
+
content=f"Invalid query: {e}",
|
|
92
|
+
)
|
|
93
|
+
async with request.app.state.db() as session:
|
|
94
|
+
results = []
|
|
95
|
+
for query in span_queries:
|
|
96
|
+
results.append(
|
|
97
|
+
await session.run_sync(
|
|
98
|
+
query,
|
|
99
|
+
project_name=project_name,
|
|
100
|
+
start_time=normalize_datetime(
|
|
101
|
+
from_iso_format(payload.get("start_time")),
|
|
102
|
+
timezone.utc,
|
|
103
|
+
),
|
|
104
|
+
end_time=normalize_datetime(
|
|
105
|
+
from_iso_format(end_time),
|
|
106
|
+
timezone.utc,
|
|
107
|
+
),
|
|
108
|
+
limit=payload.get("limit", DEFAULT_SPAN_LIMIT),
|
|
109
|
+
root_spans_only=payload.get("root_spans_only"),
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
if not results:
|
|
113
|
+
return Response(status_code=HTTP_404_NOT_FOUND)
|
|
114
|
+
|
|
115
|
+
async def content() -> AsyncIterator[bytes]:
|
|
116
|
+
for result in results:
|
|
117
|
+
yield df_to_bytes(result)
|
|
118
|
+
|
|
119
|
+
return StreamingResponse(
|
|
120
|
+
content=content(),
|
|
121
|
+
media_type="application/x-pandas-arrow",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def get_spans_handler(request: Request) -> Response:
|
|
126
|
+
return await query_spans_handler(request)
|