arize-phoenix 11.29.0__py3-none-any.whl → 11.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/METADATA +17 -17
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/RECORD +28 -26
- phoenix/db/types/trace_retention.py +1 -1
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +1 -4
- phoenix/server/api/dataloaders/document_evaluations.py +6 -9
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +1 -1
- phoenix/server/api/routers/v1/__init__.py +2 -0
- phoenix/server/api/routers/v1/annotations.py +128 -5
- phoenix/server/api/routers/v1/documents.py +143 -0
- phoenix/server/api/routers/v1/spans.py +7 -51
- phoenix/server/api/routers/v1/traces.py +24 -58
- phoenix/server/api/types/Dataset.py +8 -66
- phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
- phoenix/server/api/types/DocumentAnnotation.py +92 -0
- phoenix/server/api/types/Experiment.py +2 -2
- phoenix/server/api/types/Span.py +9 -3
- phoenix/server/api/types/TraceAnnotation.py +8 -5
- phoenix/server/cost_tracking/model_cost_manifest.json +91 -0
- phoenix/server/static/.vite/manifest.json +9 -9
- phoenix/server/static/assets/{components-dCdVienD.js → components-BjW5gAwL.js} +1 -1
- phoenix/server/static/assets/{index-Bp44T8N2.js → index-3OI8VV_W.js} +1 -1
- phoenix/server/static/assets/{pages-CA4bKhm9.js → pages-CQfUODtD.js} +312 -307
- phoenix/trace/projects.py +6 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/Evaluation.py +0 -39
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-11.29.0.dist-info → arize_phoenix-11.31.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from starlette.requests import Request
|
|
7
|
+
from starlette.status import HTTP_404_NOT_FOUND
|
|
8
|
+
from strawberry.relay import GlobalID
|
|
9
|
+
|
|
10
|
+
from phoenix.db import models
|
|
11
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
12
|
+
from phoenix.db.insertion.helpers import as_kv, insert_on_conflict
|
|
13
|
+
from phoenix.server.api.routers.v1.annotations import SpanDocumentAnnotationData
|
|
14
|
+
from phoenix.server.api.types.DocumentAnnotation import DocumentAnnotation
|
|
15
|
+
from phoenix.server.authorization import is_not_locked
|
|
16
|
+
from phoenix.server.bearer_auth import PhoenixUser
|
|
17
|
+
from phoenix.server.dml_event import DocumentAnnotationInsertEvent
|
|
18
|
+
|
|
19
|
+
from .models import V1RoutesBaseModel
|
|
20
|
+
from .utils import RequestBody, ResponseBody, add_errors_to_responses
|
|
21
|
+
|
|
22
|
+
# Since the document annotations are spans related, we place it under spans
|
|
23
|
+
router = APIRouter(tags=["spans"])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AnnotateSpanDocumentsRequestBody(RequestBody[list[SpanDocumentAnnotationData]]):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class InsertedSpanDocumentAnnotation(V1RoutesBaseModel):
|
|
31
|
+
id: str = Field(description="The ID of the inserted span document annotation")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AnnotateSpanDocumentsResponseBody(ResponseBody[list[InsertedSpanDocumentAnnotation]]):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.post(
|
|
39
|
+
"/document_annotations",
|
|
40
|
+
dependencies=[Depends(is_not_locked)],
|
|
41
|
+
operation_id="annotateSpanDocuments",
|
|
42
|
+
responses=add_errors_to_responses(
|
|
43
|
+
[
|
|
44
|
+
{
|
|
45
|
+
"status_code": HTTP_404_NOT_FOUND,
|
|
46
|
+
"description": "Span not found",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"status_code": 422,
|
|
50
|
+
"description": "Invalid request - non-empty identifier not supported",
|
|
51
|
+
},
|
|
52
|
+
]
|
|
53
|
+
),
|
|
54
|
+
response_description="Span document annotation inserted successfully",
|
|
55
|
+
include_in_schema=True,
|
|
56
|
+
)
|
|
57
|
+
async def annotate_span_documents(
|
|
58
|
+
request: Request,
|
|
59
|
+
request_body: AnnotateSpanDocumentsRequestBody,
|
|
60
|
+
sync: bool = Query(
|
|
61
|
+
default=False, description="If set to true, the annotations are inserted synchronously."
|
|
62
|
+
),
|
|
63
|
+
) -> AnnotateSpanDocumentsResponseBody:
|
|
64
|
+
if not request_body.data:
|
|
65
|
+
return AnnotateSpanDocumentsResponseBody(data=[])
|
|
66
|
+
|
|
67
|
+
# Validate that identifiers are empty or only whitespace
|
|
68
|
+
for annotation in request_body.data:
|
|
69
|
+
if annotation.identifier.strip():
|
|
70
|
+
raise HTTPException(
|
|
71
|
+
detail=f"Non-empty identifier '{annotation.identifier}' is not supported",
|
|
72
|
+
status_code=422, # Unprocessable Entity
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
user_id: Optional[int] = None
|
|
76
|
+
if request.app.state.authentication_enabled and isinstance(request.user, PhoenixUser):
|
|
77
|
+
user_id = int(request.user.identity)
|
|
78
|
+
|
|
79
|
+
span_document_annotations = request_body.data
|
|
80
|
+
|
|
81
|
+
precursors = [
|
|
82
|
+
annotation.as_precursor(user_id=user_id) for annotation in span_document_annotations
|
|
83
|
+
]
|
|
84
|
+
if not sync:
|
|
85
|
+
await request.state.enqueue(*precursors)
|
|
86
|
+
return AnnotateSpanDocumentsResponseBody(data=[])
|
|
87
|
+
|
|
88
|
+
span_ids = {p.span_id for p in precursors}
|
|
89
|
+
# Account for the fact that the spans could arrive after the annotation
|
|
90
|
+
async with request.app.state.db() as session:
|
|
91
|
+
existing_spans = {
|
|
92
|
+
span_id: (id_, num_docs)
|
|
93
|
+
async for span_id, id_, num_docs in await session.stream(
|
|
94
|
+
select(models.Span.span_id, models.Span.id, models.Span.num_documents).filter(
|
|
95
|
+
models.Span.span_id.in_(span_ids)
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
missing_span_ids = span_ids - set(existing_spans.keys())
|
|
101
|
+
# We prefer to fail the entire operation if there are missing spans in sync mode
|
|
102
|
+
if missing_span_ids:
|
|
103
|
+
raise HTTPException(
|
|
104
|
+
detail=f"Spans with IDs {', '.join(missing_span_ids)} do not exist.",
|
|
105
|
+
status_code=HTTP_404_NOT_FOUND,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Validate that document positions are within bounds
|
|
109
|
+
for annotation in span_document_annotations:
|
|
110
|
+
_, num_docs = existing_spans[annotation.span_id]
|
|
111
|
+
if annotation.document_position not in range(num_docs):
|
|
112
|
+
raise HTTPException(
|
|
113
|
+
detail=f"Document position {annotation.document_position} is out of bounds for "
|
|
114
|
+
f"span {annotation.span_id} (max: {num_docs - 1})",
|
|
115
|
+
status_code=422, # Unprocessable Entity
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
inserted_document_annotation_ids = []
|
|
119
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
120
|
+
for anno in precursors:
|
|
121
|
+
span_rowid, _ = existing_spans[anno.span_id]
|
|
122
|
+
values = dict(as_kv(anno.as_insertable(span_rowid).row))
|
|
123
|
+
span_document_annotation_id = await session.scalar(
|
|
124
|
+
insert_on_conflict(
|
|
125
|
+
values,
|
|
126
|
+
dialect=dialect,
|
|
127
|
+
table=models.DocumentAnnotation,
|
|
128
|
+
unique_by=("name", "span_rowid", "identifier", "document_position"),
|
|
129
|
+
constraint_name="uq_document_annotations_name_span_rowid_document_pos_identifier",
|
|
130
|
+
).returning(models.DocumentAnnotation.id)
|
|
131
|
+
)
|
|
132
|
+
inserted_document_annotation_ids.append(span_document_annotation_id)
|
|
133
|
+
|
|
134
|
+
# We queue an event to let the application know that annotations have changed
|
|
135
|
+
request.state.event_queue.put(
|
|
136
|
+
DocumentAnnotationInsertEvent(tuple(inserted_document_annotation_ids))
|
|
137
|
+
)
|
|
138
|
+
return AnnotateSpanDocumentsResponseBody(
|
|
139
|
+
data=[
|
|
140
|
+
InsertedSpanDocumentAnnotation(id=str(GlobalID(DocumentAnnotation.__name__, str(id_))))
|
|
141
|
+
for id_ in inserted_document_annotation_ids
|
|
142
|
+
]
|
|
143
|
+
)
|
|
@@ -5,7 +5,7 @@ from collections.abc import AsyncIterator
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from secrets import token_urlsafe
|
|
8
|
-
from typing import Annotated, Any,
|
|
8
|
+
from typing import Annotated, Any, Optional, Union
|
|
9
9
|
|
|
10
10
|
import pandas as pd
|
|
11
11
|
import sqlalchemy as sa
|
|
@@ -27,8 +27,8 @@ from phoenix.datetime_utils import normalize_datetime
|
|
|
27
27
|
from phoenix.db import models
|
|
28
28
|
from phoenix.db.helpers import SupportedSQLDialect, get_ancestor_span_rowids
|
|
29
29
|
from phoenix.db.insertion.helpers import as_kv, insert_on_conflict
|
|
30
|
-
from phoenix.db.insertion.types import Precursors
|
|
31
30
|
from phoenix.server.api.routers.utils import df_to_bytes
|
|
31
|
+
from phoenix.server.api.routers.v1.annotations import SpanAnnotationData
|
|
32
32
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
33
33
|
from phoenix.server.authorization import is_not_locked
|
|
34
34
|
from phoenix.server.bearer_auth import PhoenixUser
|
|
@@ -850,52 +850,6 @@ async def get_spans_handler(
|
|
|
850
850
|
return await query_spans_handler(request, request_body, project_name)
|
|
851
851
|
|
|
852
852
|
|
|
853
|
-
class SpanAnnotationResult(V1RoutesBaseModel):
|
|
854
|
-
label: Optional[str] = Field(default=None, description="The label assigned by the annotation")
|
|
855
|
-
score: Optional[float] = Field(default=None, description="The score assigned by the annotation")
|
|
856
|
-
explanation: Optional[str] = Field(
|
|
857
|
-
default=None, description="Explanation of the annotation result"
|
|
858
|
-
)
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
class SpanAnnotationData(V1RoutesBaseModel):
|
|
862
|
-
span_id: str = Field(description="OpenTelemetry Span ID (hex format w/o 0x prefix)")
|
|
863
|
-
name: str = Field(description="The name of the annotation")
|
|
864
|
-
annotator_kind: Literal["LLM", "CODE", "HUMAN"] = Field(
|
|
865
|
-
description="The kind of annotator used for the annotation"
|
|
866
|
-
)
|
|
867
|
-
result: Optional[SpanAnnotationResult] = Field(
|
|
868
|
-
default=None, description="The result of the annotation"
|
|
869
|
-
)
|
|
870
|
-
metadata: Optional[dict[str, Any]] = Field(
|
|
871
|
-
default=None, description="Metadata for the annotation"
|
|
872
|
-
)
|
|
873
|
-
identifier: str = Field(
|
|
874
|
-
default="",
|
|
875
|
-
description=(
|
|
876
|
-
"The identifier of the annotation. "
|
|
877
|
-
"If provided, the annotation will be updated if it already exists."
|
|
878
|
-
),
|
|
879
|
-
)
|
|
880
|
-
|
|
881
|
-
def as_precursor(self, *, user_id: Optional[int] = None) -> Precursors.SpanAnnotation:
|
|
882
|
-
return Precursors.SpanAnnotation(
|
|
883
|
-
datetime.now(timezone.utc),
|
|
884
|
-
self.span_id,
|
|
885
|
-
models.SpanAnnotation(
|
|
886
|
-
name=self.name,
|
|
887
|
-
annotator_kind=self.annotator_kind,
|
|
888
|
-
score=self.result.score if self.result else None,
|
|
889
|
-
label=self.result.label if self.result else None,
|
|
890
|
-
explanation=self.result.explanation if self.result else None,
|
|
891
|
-
metadata_=self.metadata or {},
|
|
892
|
-
identifier=self.identifier,
|
|
893
|
-
source="API",
|
|
894
|
-
user_id=user_id,
|
|
895
|
-
),
|
|
896
|
-
)
|
|
897
|
-
|
|
898
|
-
|
|
899
853
|
class AnnotateSpansRequestBody(RequestBody[list[SpanAnnotationData]]):
|
|
900
854
|
data: list[SpanAnnotationData]
|
|
901
855
|
|
|
@@ -949,9 +903,11 @@ async def annotate_spans(
|
|
|
949
903
|
span_ids = {p.span_id for p in precursors}
|
|
950
904
|
async with request.app.state.db() as session:
|
|
951
905
|
existing_spans = {
|
|
952
|
-
|
|
953
|
-
async for
|
|
954
|
-
select(models.Span
|
|
906
|
+
span_id: id_
|
|
907
|
+
async for span_id, id_ in await session.stream(
|
|
908
|
+
select(models.Span.span_id, models.Span.id).filter(
|
|
909
|
+
models.Span.span_id.in_(span_ids)
|
|
910
|
+
)
|
|
955
911
|
)
|
|
956
912
|
}
|
|
957
913
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
import zlib
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, Literal, Optional
|
|
3
|
+
from typing import Optional
|
|
5
4
|
|
|
6
5
|
from fastapi import APIRouter, BackgroundTasks, Depends, Header, HTTPException, Path, Query
|
|
7
6
|
from google.protobuf.message import DecodeError
|
|
@@ -10,7 +9,7 @@ from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import (
|
|
|
10
9
|
ExportTraceServiceResponse,
|
|
11
10
|
)
|
|
12
11
|
from pydantic import Field
|
|
13
|
-
from sqlalchemy import delete,
|
|
12
|
+
from sqlalchemy import delete, select
|
|
14
13
|
from starlette.concurrency import run_in_threadpool
|
|
15
14
|
from starlette.datastructures import State
|
|
16
15
|
from starlette.requests import Request
|
|
@@ -23,8 +22,9 @@ from starlette.status import (
|
|
|
23
22
|
from strawberry.relay import GlobalID
|
|
24
23
|
|
|
25
24
|
from phoenix.db import models
|
|
26
|
-
from phoenix.db.
|
|
27
|
-
from phoenix.db.insertion.
|
|
25
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
26
|
+
from phoenix.db.insertion.helpers import as_kv, insert_on_conflict
|
|
27
|
+
from phoenix.server.api.routers.v1.annotations import TraceAnnotationData
|
|
28
28
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
29
29
|
from phoenix.server.authorization import is_not_locked
|
|
30
30
|
from phoenix.server.bearer_auth import PhoenixUser
|
|
@@ -33,7 +33,11 @@ from phoenix.trace.otel import decode_otlp_span
|
|
|
33
33
|
from phoenix.utilities.project import get_project_name
|
|
34
34
|
|
|
35
35
|
from .models import V1RoutesBaseModel
|
|
36
|
-
from .utils import
|
|
36
|
+
from .utils import (
|
|
37
|
+
RequestBody,
|
|
38
|
+
ResponseBody,
|
|
39
|
+
add_errors_to_responses,
|
|
40
|
+
)
|
|
37
41
|
|
|
38
42
|
router = APIRouter(tags=["traces"])
|
|
39
43
|
|
|
@@ -105,54 +109,8 @@ async def post_traces(
|
|
|
105
109
|
)
|
|
106
110
|
|
|
107
111
|
|
|
108
|
-
class
|
|
109
|
-
|
|
110
|
-
score: Optional[float] = Field(default=None, description="The score assigned by the annotation")
|
|
111
|
-
explanation: Optional[str] = Field(
|
|
112
|
-
default=None, description="Explanation of the annotation result"
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
class TraceAnnotation(V1RoutesBaseModel):
|
|
117
|
-
trace_id: str = Field(description="OpenTelemetry Trace ID (hex format w/o 0x prefix)")
|
|
118
|
-
name: str = Field(description="The name of the annotation")
|
|
119
|
-
annotator_kind: Literal["LLM", "HUMAN"] = Field(
|
|
120
|
-
description="The kind of annotator used for the annotation"
|
|
121
|
-
)
|
|
122
|
-
result: Optional[TraceAnnotationResult] = Field(
|
|
123
|
-
default=None, description="The result of the annotation"
|
|
124
|
-
)
|
|
125
|
-
metadata: Optional[dict[str, Any]] = Field(
|
|
126
|
-
default=None, description="Metadata for the annotation"
|
|
127
|
-
)
|
|
128
|
-
identifier: str = Field(
|
|
129
|
-
default="",
|
|
130
|
-
description=(
|
|
131
|
-
"The identifier of the annotation. "
|
|
132
|
-
"If provided, the annotation will be updated if it already exists."
|
|
133
|
-
),
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
def as_precursor(self, *, user_id: Optional[int] = None) -> Precursors.TraceAnnotation:
|
|
137
|
-
return Precursors.TraceAnnotation(
|
|
138
|
-
datetime.now(timezone.utc),
|
|
139
|
-
self.trace_id,
|
|
140
|
-
models.TraceAnnotation(
|
|
141
|
-
name=self.name,
|
|
142
|
-
annotator_kind=self.annotator_kind,
|
|
143
|
-
score=self.result.score if self.result else None,
|
|
144
|
-
label=self.result.label if self.result else None,
|
|
145
|
-
explanation=self.result.explanation if self.result else None,
|
|
146
|
-
metadata_=self.metadata or {},
|
|
147
|
-
identifier=self.identifier,
|
|
148
|
-
source="APP",
|
|
149
|
-
user_id=user_id,
|
|
150
|
-
),
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class AnnotateTracesRequestBody(RequestBody[list[TraceAnnotation]]):
|
|
155
|
-
data: list[TraceAnnotation] = Field(description="The trace annotations to be upserted")
|
|
112
|
+
class AnnotateTracesRequestBody(RequestBody[list[TraceAnnotationData]]):
|
|
113
|
+
data: list[TraceAnnotationData] = Field(description="The trace annotations to be upserted")
|
|
156
114
|
|
|
157
115
|
|
|
158
116
|
class InsertedTraceAnnotation(V1RoutesBaseModel):
|
|
@@ -193,9 +151,11 @@ async def annotate_traces(
|
|
|
193
151
|
trace_ids = {p.trace_id for p in precursors}
|
|
194
152
|
async with request.app.state.db() as session:
|
|
195
153
|
existing_traces = {
|
|
196
|
-
|
|
197
|
-
async for
|
|
198
|
-
select(models.Trace
|
|
154
|
+
trace_id: id_
|
|
155
|
+
async for trace_id, id_ in await session.stream(
|
|
156
|
+
select(models.Trace.trace_id, models.Trace.id).filter(
|
|
157
|
+
models.Trace.trace_id.in_(trace_ids)
|
|
158
|
+
)
|
|
199
159
|
)
|
|
200
160
|
}
|
|
201
161
|
|
|
@@ -206,10 +166,16 @@ async def annotate_traces(
|
|
|
206
166
|
status_code=HTTP_404_NOT_FOUND,
|
|
207
167
|
)
|
|
208
168
|
inserted_ids = []
|
|
169
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
209
170
|
for p in precursors:
|
|
210
171
|
values = dict(as_kv(p.as_insertable(existing_traces[p.trace_id]).row))
|
|
211
172
|
trace_annotation_id = await session.scalar(
|
|
212
|
-
|
|
173
|
+
insert_on_conflict(
|
|
174
|
+
values,
|
|
175
|
+
dialect=dialect,
|
|
176
|
+
table=models.TraceAnnotation,
|
|
177
|
+
unique_by=("name", "trace_rowid", "identifier"),
|
|
178
|
+
).returning(models.TraceAnnotation.id)
|
|
213
179
|
)
|
|
214
180
|
inserted_ids.append(trace_annotation_id)
|
|
215
181
|
request.state.event_queue.put(TraceAnnotationInsertEvent(tuple(inserted_ids)))
|
|
@@ -15,9 +15,11 @@ from phoenix.server.api.context import Context
|
|
|
15
15
|
from phoenix.server.api.exceptions import BadRequest
|
|
16
16
|
from phoenix.server.api.input_types.DatasetVersionSort import DatasetVersionSort
|
|
17
17
|
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
18
|
+
from phoenix.server.api.types.DatasetExperimentAnnotationSummary import (
|
|
19
|
+
DatasetExperimentAnnotationSummary,
|
|
20
|
+
)
|
|
18
21
|
from phoenix.server.api.types.DatasetVersion import DatasetVersion
|
|
19
22
|
from phoenix.server.api.types.Experiment import Experiment, to_gql_experiment
|
|
20
|
-
from phoenix.server.api.types.ExperimentAnnotationSummary import ExperimentAnnotationSummary
|
|
21
23
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
22
24
|
from phoenix.server.api.types.pagination import (
|
|
23
25
|
ConnectionArgs,
|
|
@@ -270,53 +272,13 @@ class Dataset(Node):
|
|
|
270
272
|
@strawberry.field
|
|
271
273
|
async def experiment_annotation_summaries(
|
|
272
274
|
self, info: Info[Context, None]
|
|
273
|
-
) -> list[
|
|
275
|
+
) -> list[DatasetExperimentAnnotationSummary]:
|
|
274
276
|
dataset_id = self.id_attr
|
|
275
|
-
|
|
276
|
-
select(
|
|
277
|
-
models.ExperimentRunAnnotation.name.label("annotation_name"),
|
|
278
|
-
func.avg(models.ExperimentRunAnnotation.score).label("mean_repetition_score"),
|
|
279
|
-
)
|
|
280
|
-
.select_from(models.ExperimentRunAnnotation)
|
|
281
|
-
.join(
|
|
282
|
-
models.ExperimentRun,
|
|
283
|
-
models.ExperimentRunAnnotation.experiment_run_id == models.ExperimentRun.id,
|
|
284
|
-
)
|
|
285
|
-
.join(
|
|
286
|
-
models.Experiment,
|
|
287
|
-
models.ExperimentRun.experiment_id == models.Experiment.id,
|
|
288
|
-
)
|
|
289
|
-
.where(models.Experiment.dataset_id == dataset_id)
|
|
290
|
-
.group_by(
|
|
291
|
-
models.ExperimentRun.dataset_example_id,
|
|
292
|
-
models.ExperimentRunAnnotation.name,
|
|
293
|
-
)
|
|
294
|
-
.subquery()
|
|
295
|
-
.alias("repetition_mean_scores_by_example")
|
|
296
|
-
)
|
|
297
|
-
repetition_mean_scores_subquery = (
|
|
298
|
-
select(
|
|
299
|
-
repetition_mean_scores_by_example_subquery.c.annotation_name.label(
|
|
300
|
-
"annotation_name"
|
|
301
|
-
),
|
|
302
|
-
func.avg(repetition_mean_scores_by_example_subquery.c.mean_repetition_score).label(
|
|
303
|
-
"mean_score"
|
|
304
|
-
),
|
|
305
|
-
)
|
|
306
|
-
.select_from(repetition_mean_scores_by_example_subquery)
|
|
307
|
-
.group_by(
|
|
308
|
-
repetition_mean_scores_by_example_subquery.c.annotation_name,
|
|
309
|
-
)
|
|
310
|
-
.subquery()
|
|
311
|
-
.alias("repetition_mean_scores")
|
|
312
|
-
)
|
|
313
|
-
repetitions_subquery = (
|
|
277
|
+
query = (
|
|
314
278
|
select(
|
|
315
279
|
models.ExperimentRunAnnotation.name.label("annotation_name"),
|
|
316
280
|
func.min(models.ExperimentRunAnnotation.score).label("min_score"),
|
|
317
281
|
func.max(models.ExperimentRunAnnotation.score).label("max_score"),
|
|
318
|
-
func.count().label("count"),
|
|
319
|
-
func.count(models.ExperimentRunAnnotation.error).label("error_count"),
|
|
320
282
|
)
|
|
321
283
|
.select_from(models.ExperimentRunAnnotation)
|
|
322
284
|
.join(
|
|
@@ -329,36 +291,16 @@ class Dataset(Node):
|
|
|
329
291
|
)
|
|
330
292
|
.where(models.Experiment.dataset_id == dataset_id)
|
|
331
293
|
.group_by(models.ExperimentRunAnnotation.name)
|
|
332
|
-
.
|
|
333
|
-
)
|
|
334
|
-
run_scores_query = (
|
|
335
|
-
select(
|
|
336
|
-
repetition_mean_scores_subquery.c.annotation_name.label("annotation_name"),
|
|
337
|
-
repetition_mean_scores_subquery.c.mean_score.label("mean_score"),
|
|
338
|
-
repetitions_subquery.c.min_score.label("min_score"),
|
|
339
|
-
repetitions_subquery.c.max_score.label("max_score"),
|
|
340
|
-
repetitions_subquery.c.count.label("count_"),
|
|
341
|
-
repetitions_subquery.c.error_count.label("error_count"),
|
|
342
|
-
)
|
|
343
|
-
.select_from(repetition_mean_scores_subquery)
|
|
344
|
-
.join(
|
|
345
|
-
repetitions_subquery,
|
|
346
|
-
repetitions_subquery.c.annotation_name
|
|
347
|
-
== repetition_mean_scores_subquery.c.annotation_name,
|
|
348
|
-
)
|
|
349
|
-
.order_by(repetition_mean_scores_subquery.c.annotation_name)
|
|
294
|
+
.order_by(models.ExperimentRunAnnotation.name)
|
|
350
295
|
)
|
|
351
296
|
async with info.context.db() as session:
|
|
352
297
|
return [
|
|
353
|
-
|
|
298
|
+
DatasetExperimentAnnotationSummary(
|
|
354
299
|
annotation_name=scores_tuple.annotation_name,
|
|
355
300
|
min_score=scores_tuple.min_score,
|
|
356
301
|
max_score=scores_tuple.max_score,
|
|
357
|
-
mean_score=scores_tuple.mean_score,
|
|
358
|
-
count=scores_tuple.count_,
|
|
359
|
-
error_count=scores_tuple.error_count,
|
|
360
302
|
)
|
|
361
|
-
async for scores_tuple in await session.stream(
|
|
303
|
+
async for scores_tuple in await session.stream(query)
|
|
362
304
|
]
|
|
363
305
|
|
|
364
306
|
@strawberry.field
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import TYPE_CHECKING, Annotated, Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from strawberry import Private
|
|
6
|
+
from strawberry.relay import Node, NodeID
|
|
7
|
+
from strawberry.scalars import JSON
|
|
8
|
+
from strawberry.types import Info
|
|
9
|
+
|
|
10
|
+
from phoenix.db import models
|
|
11
|
+
from phoenix.server.api.context import Context
|
|
12
|
+
from phoenix.server.api.interceptor import GqlValueMediator
|
|
13
|
+
|
|
14
|
+
from .Annotation import Annotation
|
|
15
|
+
from .AnnotationSource import AnnotationSource
|
|
16
|
+
from .AnnotatorKind import AnnotatorKind
|
|
17
|
+
from .User import User, to_gql_user
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from .Span import Span
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@strawberry.type
|
|
24
|
+
class DocumentAnnotation(Node, Annotation):
|
|
25
|
+
id_attr: NodeID[int]
|
|
26
|
+
user_id: Private[Optional[int]]
|
|
27
|
+
name: str = strawberry.field(
|
|
28
|
+
description="Name of the annotation, e.g. 'helpfulness' or 'relevance'."
|
|
29
|
+
)
|
|
30
|
+
annotator_kind: AnnotatorKind
|
|
31
|
+
label: Optional[str] = strawberry.field(
|
|
32
|
+
description="Value of the annotation in the form of a string, e.g. "
|
|
33
|
+
"'helpful' or 'not helpful'. Note that the label is not necessarily binary."
|
|
34
|
+
)
|
|
35
|
+
score: Optional[float] = strawberry.field(
|
|
36
|
+
description="Value of the annotation in the form of a numeric score.",
|
|
37
|
+
default=GqlValueMediator(),
|
|
38
|
+
)
|
|
39
|
+
explanation: Optional[str] = strawberry.field(
|
|
40
|
+
description="The annotator's explanation for the annotation result (i.e. "
|
|
41
|
+
"score or label, or both) given to the subject."
|
|
42
|
+
)
|
|
43
|
+
metadata: JSON
|
|
44
|
+
document_position: int
|
|
45
|
+
span_rowid: Private[int]
|
|
46
|
+
identifier: str
|
|
47
|
+
source: AnnotationSource
|
|
48
|
+
created_at: datetime = strawberry.field(
|
|
49
|
+
description="The date and time when the annotation was created."
|
|
50
|
+
)
|
|
51
|
+
updated_at: datetime = strawberry.field(
|
|
52
|
+
description="The date and time when the annotation was last updated."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
@strawberry.field
|
|
56
|
+
async def span(self) -> Annotated["Span", strawberry.lazy(".Span")]:
|
|
57
|
+
from phoenix.server.api.types.Span import Span
|
|
58
|
+
|
|
59
|
+
return Span(span_rowid=self.span_rowid)
|
|
60
|
+
|
|
61
|
+
@strawberry.field
|
|
62
|
+
async def user(
|
|
63
|
+
self,
|
|
64
|
+
info: Info[Context, None],
|
|
65
|
+
) -> Optional[User]:
|
|
66
|
+
if self.user_id is None:
|
|
67
|
+
return None
|
|
68
|
+
user = await info.context.data_loaders.users.load(self.user_id)
|
|
69
|
+
if user is None:
|
|
70
|
+
return None
|
|
71
|
+
return to_gql_user(user)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def to_gql_document_annotation(
|
|
75
|
+
annotation: models.DocumentAnnotation,
|
|
76
|
+
) -> DocumentAnnotation:
|
|
77
|
+
return DocumentAnnotation(
|
|
78
|
+
id_attr=annotation.id,
|
|
79
|
+
user_id=annotation.user_id,
|
|
80
|
+
name=annotation.name,
|
|
81
|
+
annotator_kind=AnnotatorKind(annotation.annotator_kind),
|
|
82
|
+
label=annotation.label,
|
|
83
|
+
score=annotation.score,
|
|
84
|
+
explanation=annotation.explanation,
|
|
85
|
+
metadata=annotation.metadata_,
|
|
86
|
+
span_rowid=annotation.span_rowid,
|
|
87
|
+
source=AnnotationSource(annotation.source),
|
|
88
|
+
identifier=annotation.identifier,
|
|
89
|
+
document_position=annotation.document_position,
|
|
90
|
+
created_at=annotation.created_at,
|
|
91
|
+
updated_at=annotation.updated_at,
|
|
92
|
+
)
|
|
@@ -109,10 +109,10 @@ class Experiment(Node):
|
|
|
109
109
|
|
|
110
110
|
@strawberry.field
|
|
111
111
|
async def average_run_latency_ms(self, info: Info[Context, None]) -> Optional[float]:
|
|
112
|
-
|
|
112
|
+
latency_ms = await info.context.data_loaders.average_experiment_run_latency.load(
|
|
113
113
|
self.id_attr
|
|
114
114
|
)
|
|
115
|
-
return
|
|
115
|
+
return latency_ms
|
|
116
116
|
|
|
117
117
|
@strawberry.field
|
|
118
118
|
async def project(self, info: Info[Context, None]) -> Optional[Project]:
|
phoenix/server/api/types/Span.py
CHANGED
|
@@ -34,8 +34,11 @@ from phoenix.server.api.input_types.SpanAnnotationSort import (
|
|
|
34
34
|
)
|
|
35
35
|
from phoenix.server.api.types.AnnotationSummary import AnnotationSummary
|
|
36
36
|
from phoenix.server.api.types.CostBreakdown import CostBreakdown
|
|
37
|
+
from phoenix.server.api.types.DocumentAnnotation import (
|
|
38
|
+
DocumentAnnotation,
|
|
39
|
+
to_gql_document_annotation,
|
|
40
|
+
)
|
|
37
41
|
from phoenix.server.api.types.DocumentRetrievalMetrics import DocumentRetrievalMetrics
|
|
38
|
-
from phoenix.server.api.types.Evaluation import DocumentEvaluation
|
|
39
42
|
from phoenix.server.api.types.ExampleRevisionInterface import ExampleRevision
|
|
40
43
|
from phoenix.server.api.types.GenerativeProvider import GenerativeProvider
|
|
41
44
|
from phoenix.server.api.types.MimeType import MimeType
|
|
@@ -638,8 +641,11 @@ class Span(Node):
|
|
|
638
641
|
async def document_evaluations(
|
|
639
642
|
self,
|
|
640
643
|
info: Info[Context, None],
|
|
641
|
-
) -> list[
|
|
642
|
-
return
|
|
644
|
+
) -> list[DocumentAnnotation]:
|
|
645
|
+
return [
|
|
646
|
+
to_gql_document_annotation(anno)
|
|
647
|
+
for anno in await info.context.data_loaders.document_evaluations.load(self.span_rowid)
|
|
648
|
+
]
|
|
643
649
|
|
|
644
650
|
@strawberry.field(
|
|
645
651
|
description="Retrieval metrics: NDCG@K, Precision@K, Reciprocal Rank, etc.",
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import TYPE_CHECKING, Annotated, Optional
|
|
2
2
|
|
|
3
3
|
import strawberry
|
|
4
4
|
from strawberry import Private
|
|
5
|
-
from strawberry.relay import
|
|
5
|
+
from strawberry.relay import Node, NodeID
|
|
6
6
|
from strawberry.scalars import JSON
|
|
7
7
|
from strawberry.types import Info
|
|
8
8
|
|
|
@@ -13,6 +13,9 @@ from phoenix.server.api.types.AnnotatorKind import AnnotatorKind
|
|
|
13
13
|
from .AnnotationSource import AnnotationSource
|
|
14
14
|
from .User import User, to_gql_user
|
|
15
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from .Trace import Trace
|
|
18
|
+
|
|
16
19
|
|
|
17
20
|
@strawberry.type
|
|
18
21
|
class TraceAnnotation(Node):
|
|
@@ -24,15 +27,15 @@ class TraceAnnotation(Node):
|
|
|
24
27
|
score: Optional[float]
|
|
25
28
|
explanation: Optional[str]
|
|
26
29
|
metadata: JSON
|
|
27
|
-
trace_rowid: Private[
|
|
30
|
+
trace_rowid: Private[int]
|
|
28
31
|
identifier: str
|
|
29
32
|
source: AnnotationSource
|
|
30
33
|
|
|
31
34
|
@strawberry.field
|
|
32
|
-
async def
|
|
35
|
+
async def trace(self) -> Annotated["Trace", strawberry.lazy(".Trace")]:
|
|
33
36
|
from phoenix.server.api.types.Trace import Trace
|
|
34
37
|
|
|
35
|
-
return
|
|
38
|
+
return Trace(trace_rowid=self.trace_rowid)
|
|
36
39
|
|
|
37
40
|
@strawberry.field
|
|
38
41
|
async def user(
|