arize-phoenix 4.14.1__py3-none-any.whl → 4.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/METADATA +5 -3
- {arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/RECORD +81 -71
- phoenix/db/bulk_inserter.py +131 -5
- phoenix/db/engines.py +2 -1
- phoenix/db/helpers.py +23 -1
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/document_annotation.py +157 -0
- phoenix/db/insertion/helpers.py +13 -0
- phoenix/db/insertion/span_annotation.py +144 -0
- phoenix/db/insertion/trace_annotation.py +144 -0
- phoenix/db/insertion/types.py +261 -0
- phoenix/experiments/functions.py +3 -2
- phoenix/experiments/types.py +3 -3
- phoenix/server/api/context.py +7 -9
- phoenix/server/api/dataloaders/__init__.py +2 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +3 -3
- phoenix/server/api/dataloaders/dataset_example_revisions.py +2 -4
- phoenix/server/api/dataloaders/dataset_example_spans.py +2 -4
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -4
- phoenix/server/api/dataloaders/document_evaluations.py +2 -4
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +2 -4
- phoenix/server/api/dataloaders/evaluation_summaries.py +2 -4
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +2 -4
- phoenix/server/api/dataloaders/experiment_error_rates.py +2 -4
- phoenix/server/api/dataloaders/experiment_run_counts.py +2 -4
- phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -4
- phoenix/server/api/dataloaders/latency_ms_quantile.py +2 -3
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +2 -4
- phoenix/server/api/dataloaders/project_by_name.py +3 -3
- phoenix/server/api/dataloaders/record_counts.py +2 -4
- phoenix/server/api/dataloaders/span_annotations.py +2 -4
- phoenix/server/api/dataloaders/span_dataset_examples.py +36 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -4
- phoenix/server/api/dataloaders/span_evaluations.py +2 -4
- phoenix/server/api/dataloaders/span_projects.py +3 -3
- phoenix/server/api/dataloaders/token_counts.py +2 -4
- phoenix/server/api/dataloaders/trace_evaluations.py +2 -4
- phoenix/server/api/dataloaders/trace_row_ids.py +2 -4
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +8 -3
- phoenix/server/api/mutations/trace_annotations_mutations.py +8 -3
- phoenix/server/api/openapi/main.py +18 -2
- phoenix/server/api/openapi/schema.py +12 -12
- phoenix/server/api/routers/v1/__init__.py +36 -83
- phoenix/server/api/routers/v1/datasets.py +515 -509
- phoenix/server/api/routers/v1/evaluations.py +164 -73
- phoenix/server/api/routers/v1/experiment_evaluations.py +68 -91
- phoenix/server/api/routers/v1/experiment_runs.py +98 -155
- phoenix/server/api/routers/v1/experiments.py +132 -181
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +164 -203
- phoenix/server/api/routers/v1/traces.py +134 -159
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/types/Span.py +27 -3
- phoenix/server/api/types/Trace.py +21 -4
- phoenix/server/api/utils.py +4 -4
- phoenix/server/app.py +172 -192
- phoenix/server/grpc_server.py +2 -2
- phoenix/server/main.py +5 -9
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/components-Ci5kMOk5.js +1175 -0
- phoenix/server/static/assets/{index-CQgXRwU0.js → index-BQG5WVX7.js} +2 -2
- phoenix/server/static/assets/{pages-hdjlFZhO.js → pages-BrevprVW.js} +451 -275
- phoenix/server/static/assets/{vendor-DPvSDRn3.js → vendor-CP0b0YG0.js} +2 -2
- phoenix/server/static/assets/{vendor-arizeai-CkvPT67c.js → vendor-arizeai-DTbiPGp6.js} +27 -27
- phoenix/server/static/assets/vendor-codemirror-DtdPDzrv.js +15 -0
- phoenix/server/static/assets/{vendor-recharts-5jlNaZuF.js → vendor-recharts-A0DA1O99.js} +1 -1
- phoenix/server/thread_server.py +2 -2
- phoenix/server/types.py +18 -0
- phoenix/session/client.py +5 -3
- phoenix/session/session.py +2 -2
- phoenix/trace/dsl/filter.py +2 -6
- phoenix/trace/fixtures.py +17 -23
- phoenix/trace/utils.py +23 -0
- phoenix/utilities/client.py +116 -0
- phoenix/utilities/project.py +1 -1
- phoenix/version.py +1 -1
- phoenix/server/api/routers/v1/dataset_examples.py +0 -178
- phoenix/server/openapi/docs.py +0 -221
- phoenix/server/static/assets/components-DeS0YEmv.js +0 -1142
- phoenix/server/static/assets/vendor-codemirror-Cqwpwlua.js +0 -12
- {arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.14.1.dist-info → arize_phoenix-4.16.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
from itertools import chain
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Callable, Iterator, Optional, Tuple, Union, cast
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import pyarrow as pa
|
|
7
|
+
from fastapi import APIRouter, Header, HTTPException, Query
|
|
7
8
|
from google.protobuf.message import DecodeError
|
|
8
9
|
from pandas import DataFrame
|
|
9
10
|
from sqlalchemy import select
|
|
10
11
|
from sqlalchemy.engine import Connectable
|
|
11
|
-
from sqlalchemy.ext.asyncio import (
|
|
12
|
-
AsyncSession,
|
|
13
|
-
)
|
|
14
12
|
from starlette.background import BackgroundTask
|
|
15
13
|
from starlette.datastructures import State
|
|
16
14
|
from starlette.requests import Request
|
|
17
15
|
from starlette.responses import Response, StreamingResponse
|
|
18
16
|
from starlette.status import (
|
|
19
|
-
|
|
17
|
+
HTTP_204_NO_CONTENT,
|
|
20
18
|
HTTP_404_NOT_FOUND,
|
|
21
19
|
HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
22
20
|
HTTP_422_UNPROCESSABLE_ENTITY,
|
|
@@ -26,9 +24,10 @@ from typing_extensions import TypeAlias
|
|
|
26
24
|
import phoenix.trace.v1 as pb
|
|
27
25
|
from phoenix.config import DEFAULT_PROJECT_NAME
|
|
28
26
|
from phoenix.db import models
|
|
27
|
+
from phoenix.db.insertion.types import Precursors
|
|
29
28
|
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
30
29
|
from phoenix.server.api.routers.utils import table_to_bytes
|
|
31
|
-
from phoenix.
|
|
30
|
+
from phoenix.server.types import DbSessionFactory
|
|
32
31
|
from phoenix.trace.span_evaluations import (
|
|
33
32
|
DocumentEvaluations,
|
|
34
33
|
Evaluations,
|
|
@@ -36,92 +35,98 @@ from phoenix.trace.span_evaluations import (
|
|
|
36
35
|
TraceEvaluations,
|
|
37
36
|
)
|
|
38
37
|
|
|
38
|
+
from .utils import add_errors_to_responses
|
|
39
|
+
|
|
39
40
|
EvaluationName: TypeAlias = str
|
|
40
41
|
|
|
42
|
+
router = APIRouter(tags=["traces"], include_in_schema=False)
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
44
|
+
|
|
45
|
+
@router.post(
|
|
46
|
+
"/evaluations",
|
|
47
|
+
operation_id="addEvaluations",
|
|
48
|
+
summary="Add span, trace, or document evaluations",
|
|
49
|
+
status_code=HTTP_204_NO_CONTENT,
|
|
50
|
+
responses=add_errors_to_responses(
|
|
51
|
+
[
|
|
52
|
+
{
|
|
53
|
+
"status_code": HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
54
|
+
"description": (
|
|
55
|
+
"Unsupported content type, "
|
|
56
|
+
"only gzipped protobuf and pandas-arrow are supported"
|
|
57
|
+
),
|
|
58
|
+
},
|
|
59
|
+
HTTP_422_UNPROCESSABLE_ENTITY,
|
|
60
|
+
]
|
|
61
|
+
),
|
|
62
|
+
openapi_extra={
|
|
63
|
+
"requestBody": {
|
|
64
|
+
"required": True,
|
|
65
|
+
"content": {
|
|
66
|
+
"application/x-protobuf": {"schema": {"type": "string", "format": "binary"}},
|
|
67
|
+
"application/x-pandas-arrow": {"schema": {"type": "string", "format": "binary"}},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
async def post_evaluations(
|
|
73
|
+
request: Request,
|
|
74
|
+
content_type: Optional[str] = Header(default=None),
|
|
75
|
+
content_encoding: Optional[str] = Header(default=None),
|
|
76
|
+
) -> Response:
|
|
72
77
|
if content_type == "application/x-pandas-arrow":
|
|
73
78
|
return await _process_pyarrow(request)
|
|
74
79
|
if content_type != "application/x-protobuf":
|
|
75
|
-
|
|
80
|
+
raise HTTPException(
|
|
81
|
+
detail="Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
|
|
82
|
+
)
|
|
76
83
|
body = await request.body()
|
|
77
|
-
content_encoding = request.headers.get("content-encoding")
|
|
78
84
|
if content_encoding == "gzip":
|
|
79
85
|
body = gzip.decompress(body)
|
|
80
86
|
elif content_encoding:
|
|
81
|
-
|
|
87
|
+
raise HTTPException(
|
|
88
|
+
detail="Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
|
|
89
|
+
)
|
|
82
90
|
evaluation = pb.Evaluation()
|
|
83
91
|
try:
|
|
84
92
|
evaluation.ParseFromString(body)
|
|
85
93
|
except DecodeError:
|
|
86
|
-
|
|
94
|
+
raise HTTPException(
|
|
95
|
+
detail="Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY
|
|
96
|
+
)
|
|
87
97
|
if not evaluation.name.strip():
|
|
88
|
-
|
|
89
|
-
"Evaluation name must not be blank/empty",
|
|
98
|
+
raise HTTPException(
|
|
99
|
+
detail="Evaluation name must not be blank/empty",
|
|
90
100
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
91
101
|
)
|
|
92
102
|
await request.state.queue_evaluation_for_bulk_insert(evaluation)
|
|
93
103
|
return Response()
|
|
94
104
|
|
|
95
105
|
|
|
96
|
-
|
|
97
|
-
""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
403:
|
|
113
|
-
description: Forbidden
|
|
114
|
-
404:
|
|
115
|
-
description: Not found
|
|
116
|
-
"""
|
|
106
|
+
@router.get(
|
|
107
|
+
"/evaluations",
|
|
108
|
+
operation_id="getEvaluations",
|
|
109
|
+
summary="Get span, trace, or document evaluations from a project",
|
|
110
|
+
responses=add_errors_to_responses([HTTP_404_NOT_FOUND]),
|
|
111
|
+
)
|
|
112
|
+
async def get_evaluations(
|
|
113
|
+
request: Request,
|
|
114
|
+
project_name: Optional[str] = Query(
|
|
115
|
+
default=None,
|
|
116
|
+
description=(
|
|
117
|
+
"The name of the project to get evaluations from (if omitted, "
|
|
118
|
+
f"evaluations will be drawn from the `{DEFAULT_PROJECT_NAME}` project)"
|
|
119
|
+
),
|
|
120
|
+
),
|
|
121
|
+
) -> Response:
|
|
117
122
|
project_name = (
|
|
118
|
-
|
|
123
|
+
project_name
|
|
119
124
|
or request.query_params.get("project-name") # for backward compatibility
|
|
120
125
|
or request.headers.get("project-name") # read from headers for backwards compatibility
|
|
121
126
|
or DEFAULT_PROJECT_NAME
|
|
122
127
|
)
|
|
123
128
|
|
|
124
|
-
db:
|
|
129
|
+
db: DbSessionFactory = request.app.state.db
|
|
125
130
|
async with db() as session:
|
|
126
131
|
connection = await session.connection()
|
|
127
132
|
trace_evals_dataframe = await connection.run_sync(
|
|
@@ -169,28 +174,114 @@ async def _process_pyarrow(request: Request) -> Response:
|
|
|
169
174
|
try:
|
|
170
175
|
reader = pa.ipc.open_stream(body)
|
|
171
176
|
except pa.ArrowInvalid:
|
|
172
|
-
|
|
173
|
-
|
|
177
|
+
raise HTTPException(
|
|
178
|
+
detail="Request body is not valid pyarrow",
|
|
174
179
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
175
180
|
)
|
|
176
181
|
try:
|
|
177
182
|
evaluations = Evaluations.from_pyarrow_reader(reader)
|
|
178
183
|
except Exception as e:
|
|
179
184
|
if isinstance(e, PhoenixEvaluationNameIsMissing):
|
|
180
|
-
|
|
181
|
-
"Evaluation name must not be blank/empty",
|
|
185
|
+
raise HTTPException(
|
|
186
|
+
detail="Evaluation name must not be blank/empty",
|
|
182
187
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
183
188
|
)
|
|
184
|
-
|
|
185
|
-
|
|
189
|
+
raise HTTPException(
|
|
190
|
+
detail="Invalid data in request body",
|
|
186
191
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
187
192
|
)
|
|
188
193
|
return Response(background=BackgroundTask(_add_evaluations, request.state, evaluations))
|
|
189
194
|
|
|
190
195
|
|
|
191
196
|
async def _add_evaluations(state: State, evaluations: Evaluations) -> None:
|
|
192
|
-
|
|
193
|
-
|
|
197
|
+
dataframe = evaluations.dataframe
|
|
198
|
+
eval_name = evaluations.eval_name
|
|
199
|
+
names = dataframe.index.names
|
|
200
|
+
if (
|
|
201
|
+
len(names) == 2
|
|
202
|
+
and "document_position" in names
|
|
203
|
+
and ("context.span_id" in names or "span_id" in names)
|
|
204
|
+
):
|
|
205
|
+
cls = _document_annotation_factory(
|
|
206
|
+
names.index("span_id") if "span_id" in names else names.index("context.span_id"),
|
|
207
|
+
names.index("document_position"),
|
|
208
|
+
)
|
|
209
|
+
for index, row in dataframe.iterrows():
|
|
210
|
+
score, label, explanation = _get_annotation_result(row)
|
|
211
|
+
document_annotation = cls(cast(Union[Tuple[str, int], Tuple[int, str]], index))(
|
|
212
|
+
name=eval_name,
|
|
213
|
+
annotator_kind="LLM",
|
|
214
|
+
score=score,
|
|
215
|
+
label=label,
|
|
216
|
+
explanation=explanation,
|
|
217
|
+
metadata_={},
|
|
218
|
+
)
|
|
219
|
+
await state.enqueue(document_annotation)
|
|
220
|
+
elif len(names) == 1 and names[0] in ("context.span_id", "span_id"):
|
|
221
|
+
for index, row in dataframe.iterrows():
|
|
222
|
+
score, label, explanation = _get_annotation_result(row)
|
|
223
|
+
span_annotation = _span_annotation_factory(cast(str, index))(
|
|
224
|
+
name=eval_name,
|
|
225
|
+
annotator_kind="LLM",
|
|
226
|
+
score=score,
|
|
227
|
+
label=label,
|
|
228
|
+
explanation=explanation,
|
|
229
|
+
metadata_={},
|
|
230
|
+
)
|
|
231
|
+
await state.enqueue(span_annotation)
|
|
232
|
+
elif len(names) == 1 and names[0] in ("context.trace_id", "trace_id"):
|
|
233
|
+
for index, row in dataframe.iterrows():
|
|
234
|
+
score, label, explanation = _get_annotation_result(row)
|
|
235
|
+
trace_annotation = _trace_annotation_factory(cast(str, index))(
|
|
236
|
+
name=eval_name,
|
|
237
|
+
annotator_kind="LLM",
|
|
238
|
+
score=score,
|
|
239
|
+
label=label,
|
|
240
|
+
explanation=explanation,
|
|
241
|
+
metadata_={},
|
|
242
|
+
)
|
|
243
|
+
await state.enqueue(trace_annotation)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _get_annotation_result(
|
|
247
|
+
row: "pd.Series[Any]",
|
|
248
|
+
) -> Tuple[Optional[float], Optional[str], Optional[str]]:
|
|
249
|
+
return (
|
|
250
|
+
cast(Optional[float], row.get("score")),
|
|
251
|
+
cast(Optional[str], row.get("label")),
|
|
252
|
+
cast(Optional[str], row.get("explanation")),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _document_annotation_factory(
|
|
257
|
+
span_id_idx: int,
|
|
258
|
+
document_position_idx: int,
|
|
259
|
+
) -> Callable[
|
|
260
|
+
[Union[Tuple[str, int], Tuple[int, str]]],
|
|
261
|
+
Callable[..., Precursors.DocumentAnnotation],
|
|
262
|
+
]:
|
|
263
|
+
return lambda index: lambda **kwargs: Precursors.DocumentAnnotation(
|
|
264
|
+
span_id=str(index[span_id_idx]),
|
|
265
|
+
document_position=int(index[document_position_idx]),
|
|
266
|
+
obj=models.DocumentAnnotation(
|
|
267
|
+
document_position=int(index[document_position_idx]),
|
|
268
|
+
**kwargs,
|
|
269
|
+
),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _span_annotation_factory(span_id: str) -> Callable[..., Precursors.SpanAnnotation]:
|
|
274
|
+
return lambda **kwargs: Precursors.SpanAnnotation(
|
|
275
|
+
span_id=str(span_id),
|
|
276
|
+
obj=models.SpanAnnotation(**kwargs),
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _trace_annotation_factory(trace_id: str) -> Callable[..., Precursors.TraceAnnotation]:
|
|
281
|
+
return lambda **kwargs: Precursors.TraceAnnotation(
|
|
282
|
+
trace_id=str(trace_id),
|
|
283
|
+
obj=models.TraceAnnotation(**kwargs),
|
|
284
|
+
)
|
|
194
285
|
|
|
195
286
|
|
|
196
287
|
def _read_sql_trace_evaluations_into_dataframe(
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
+
from typing import Any, Dict, Literal, Optional
|
|
2
3
|
|
|
4
|
+
from fastapi import APIRouter, HTTPException
|
|
5
|
+
from pydantic import Field
|
|
3
6
|
from starlette.requests import Request
|
|
4
|
-
from starlette.responses import JSONResponse, Response
|
|
5
7
|
from starlette.status import HTTP_404_NOT_FOUND
|
|
6
8
|
from strawberry.relay import GlobalID
|
|
7
9
|
|
|
@@ -10,103 +12,76 @@ from phoenix.db.helpers import SupportedSQLDialect
|
|
|
10
12
|
from phoenix.db.insertion.helpers import insert_on_conflict
|
|
11
13
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
12
14
|
|
|
15
|
+
from .pydantic_compat import V1RoutesBaseModel
|
|
16
|
+
from .utils import ResponseBody, add_errors_to_responses
|
|
13
17
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
type: string
|
|
65
|
-
format: date-time
|
|
66
|
-
description: The end time of the evaluation in ISO format
|
|
67
|
-
trace_id:
|
|
68
|
-
type: string
|
|
69
|
-
description: Optional trace ID for tracking
|
|
70
|
-
required:
|
|
71
|
-
- experiment_run_id
|
|
72
|
-
- name
|
|
73
|
-
- annotator_kind
|
|
74
|
-
- start_time
|
|
75
|
-
- end_time
|
|
76
|
-
responses:
|
|
77
|
-
200:
|
|
78
|
-
description: Experiment evaluation upserted successfully
|
|
79
|
-
content:
|
|
80
|
-
application/json:
|
|
81
|
-
schema:
|
|
82
|
-
type: object
|
|
83
|
-
properties:
|
|
84
|
-
data:
|
|
85
|
-
type: object
|
|
86
|
-
properties:
|
|
87
|
-
id:
|
|
88
|
-
type: string
|
|
89
|
-
description: The ID of the upserted experiment evaluation
|
|
90
|
-
404:
|
|
91
|
-
description: ExperimentRun not found
|
|
92
|
-
"""
|
|
18
|
+
router = APIRouter(tags=["experiments"], include_in_schema=False)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ExperimentEvaluationResult(V1RoutesBaseModel):
|
|
22
|
+
label: Optional[str] = Field(default=None, description="The label assigned by the evaluation")
|
|
23
|
+
score: Optional[float] = Field(default=None, description="The score assigned by the evaluation")
|
|
24
|
+
explanation: Optional[str] = Field(
|
|
25
|
+
default=None, description="Explanation of the evaluation result"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UpsertExperimentEvaluationRequestBody(V1RoutesBaseModel):
|
|
30
|
+
experiment_run_id: str = Field(description="The ID of the experiment run being evaluated")
|
|
31
|
+
name: str = Field(description="The name of the evaluation")
|
|
32
|
+
annotator_kind: Literal["LLM", "CODE", "HUMAN"] = Field(
|
|
33
|
+
description="The kind of annotator used for the evaluation"
|
|
34
|
+
)
|
|
35
|
+
start_time: datetime = Field(description="The start time of the evaluation in ISO format")
|
|
36
|
+
end_time: datetime = Field(description="The end time of the evaluation in ISO format")
|
|
37
|
+
result: ExperimentEvaluationResult = Field(description="The result of the evaluation")
|
|
38
|
+
error: Optional[str] = Field(
|
|
39
|
+
None, description="Optional error message if the evaluation encountered an error"
|
|
40
|
+
)
|
|
41
|
+
metadata: Optional[Dict[str, Any]] = Field(
|
|
42
|
+
default=None, description="Metadata for the evaluation"
|
|
43
|
+
)
|
|
44
|
+
trace_id: Optional[str] = Field(default=None, description="Optional trace ID for tracking")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class UpsertExperimentEvaluationResponseBodyData(V1RoutesBaseModel):
|
|
48
|
+
id: str = Field(description="The ID of the upserted experiment evaluation")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class UpsertExperimentEvaluationResponseBody(
|
|
52
|
+
ResponseBody[UpsertExperimentEvaluationResponseBodyData]
|
|
53
|
+
):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@router.post(
|
|
58
|
+
"/experiment_evaluations",
|
|
59
|
+
operation_id="upsertExperimentEvaluation",
|
|
60
|
+
summary="Create or update evaluation for an experiment run",
|
|
61
|
+
responses=add_errors_to_responses(
|
|
62
|
+
[{"status_code": HTTP_404_NOT_FOUND, "description": "Experiment run not found"}]
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
async def upsert_experiment_evaluation(
|
|
66
|
+
request: Request, request_body: UpsertExperimentEvaluationRequestBody
|
|
67
|
+
) -> UpsertExperimentEvaluationResponseBody:
|
|
93
68
|
payload = await request.json()
|
|
94
69
|
experiment_run_gid = GlobalID.from_id(payload["experiment_run_id"])
|
|
95
70
|
try:
|
|
96
71
|
experiment_run_id = from_global_id_with_expected_type(experiment_run_gid, "ExperimentRun")
|
|
97
72
|
except ValueError:
|
|
98
|
-
|
|
99
|
-
|
|
73
|
+
raise HTTPException(
|
|
74
|
+
detail=f"ExperimentRun with ID {experiment_run_gid} does not exist",
|
|
100
75
|
status_code=HTTP_404_NOT_FOUND,
|
|
101
76
|
)
|
|
102
|
-
name =
|
|
103
|
-
annotator_kind =
|
|
104
|
-
result =
|
|
105
|
-
label = result.
|
|
106
|
-
score = result.
|
|
107
|
-
explanation = result.
|
|
108
|
-
error =
|
|
109
|
-
metadata =
|
|
77
|
+
name = request_body.name
|
|
78
|
+
annotator_kind = request_body.annotator_kind
|
|
79
|
+
result = request_body.result
|
|
80
|
+
label = result.label if result else None
|
|
81
|
+
score = result.score if result else None
|
|
82
|
+
explanation = result.explanation if result else None
|
|
83
|
+
error = request_body.error
|
|
84
|
+
metadata = request_body.metadata or {}
|
|
110
85
|
start_time = payload["start_time"]
|
|
111
86
|
end_time = payload["end_time"]
|
|
112
87
|
async with request.app.state.db() as session:
|
|
@@ -133,4 +108,6 @@ async def upsert_experiment_evaluation(request: Request) -> Response:
|
|
|
133
108
|
).returning(models.ExperimentRunAnnotation)
|
|
134
109
|
)
|
|
135
110
|
evaluation_gid = GlobalID("ExperimentEvaluation", str(exp_eval_run.id))
|
|
136
|
-
return
|
|
111
|
+
return UpsertExperimentEvaluationResponseBody(
|
|
112
|
+
data=UpsertExperimentEvaluationResponseBodyData(id=str(evaluation_gid))
|
|
113
|
+
)
|