arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/METADATA +12 -9
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/RECORD +48 -49
- phoenix/db/bulk_inserter.py +3 -1
- phoenix/experiments/evaluators/base.py +4 -0
- phoenix/experiments/evaluators/code_evaluators.py +80 -0
- phoenix/experiments/evaluators/llm_evaluators.py +77 -1
- phoenix/experiments/evaluators/utils.py +70 -21
- phoenix/experiments/functions.py +14 -14
- phoenix/server/api/context.py +7 -3
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
- phoenix/server/api/dataloaders/experiment_error_rates.py +30 -10
- phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
- phoenix/server/api/input_types/{CreateSpanAnnotationsInput.py → CreateSpanAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{CreateTraceAnnotationsInput.py → CreateTraceAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{PatchAnnotationsInput.py → PatchAnnotationInput.py} +4 -2
- phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
- phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
- phoenix/server/api/openapi/main.py +2 -18
- phoenix/server/api/openapi/schema.py +12 -12
- phoenix/server/api/routers/v1/__init__.py +83 -36
- phoenix/server/api/routers/v1/dataset_examples.py +123 -102
- phoenix/server/api/routers/v1/datasets.py +506 -390
- phoenix/server/api/routers/v1/evaluations.py +66 -73
- phoenix/server/api/routers/v1/experiment_evaluations.py +91 -68
- phoenix/server/api/routers/v1/experiment_runs.py +155 -98
- phoenix/server/api/routers/v1/experiments.py +181 -132
- phoenix/server/api/routers/v1/spans.py +173 -144
- phoenix/server/api/routers/v1/traces.py +128 -115
- phoenix/server/api/types/Experiment.py +2 -2
- phoenix/server/api/types/Inferences.py +1 -2
- phoenix/server/api/types/Model.py +1 -2
- phoenix/server/app.py +177 -152
- phoenix/server/openapi/docs.py +221 -0
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/{components-C8sm_r1F.js → components-DeS0YEmv.js} +2 -2
- phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
- phoenix/server/static/assets/{pages-bN7juCjh.js → pages-hdjlFZhO.js} +275 -198
- phoenix/server/static/assets/{vendor-CUDAPm8e.js → vendor-DPvSDRn3.js} +1 -1
- phoenix/server/static/assets/{vendor-arizeai-Do2HOmcL.js → vendor-arizeai-CkvPT67c.js} +2 -2
- phoenix/server/static/assets/{vendor-codemirror-CrdxOlMs.js → vendor-codemirror-Cqwpwlua.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-PKRvByVe.js → vendor-recharts-5jlNaZuF.js} +1 -1
- phoenix/server/thread_server.py +2 -2
- phoenix/session/client.py +9 -8
- phoenix/trace/dsl/filter.py +40 -25
- phoenix/version.py +1 -1
- phoenix/server/api/routers/v1/pydantic_compat.py +0 -78
- phoenix/server/api/routers/v1/utils.py +0 -95
- phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
from itertools import chain
|
|
3
|
-
from typing import AsyncContextManager, Callable, Iterator,
|
|
3
|
+
from typing import AsyncContextManager, Callable, Iterator, Tuple
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import pyarrow as pa
|
|
7
|
-
from fastapi import APIRouter, Header, HTTPException, Query
|
|
8
7
|
from google.protobuf.message import DecodeError
|
|
9
8
|
from pandas import DataFrame
|
|
10
9
|
from sqlalchemy import select
|
|
@@ -17,7 +16,7 @@ from starlette.datastructures import State
|
|
|
17
16
|
from starlette.requests import Request
|
|
18
17
|
from starlette.responses import Response, StreamingResponse
|
|
19
18
|
from starlette.status import (
|
|
20
|
-
|
|
19
|
+
HTTP_403_FORBIDDEN,
|
|
21
20
|
HTTP_404_NOT_FOUND,
|
|
22
21
|
HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
23
22
|
HTTP_422_UNPROCESSABLE_ENTITY,
|
|
@@ -37,92 +36,86 @@ from phoenix.trace.span_evaluations import (
|
|
|
37
36
|
TraceEvaluations,
|
|
38
37
|
)
|
|
39
38
|
|
|
40
|
-
from .utils import add_errors_to_responses
|
|
41
|
-
|
|
42
39
|
EvaluationName: TypeAlias = str
|
|
43
40
|
|
|
44
|
-
router = APIRouter(tags=["traces"], include_in_schema=False)
|
|
45
|
-
|
|
46
41
|
|
|
47
|
-
|
|
48
|
-
"
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
content_type
|
|
77
|
-
content_encoding: Optional[str] = Header(default=None),
|
|
78
|
-
) -> Response:
|
|
42
|
+
async def post_evaluations(request: Request) -> Response:
|
|
43
|
+
"""
|
|
44
|
+
summary: Add evaluations to a span, trace, or document
|
|
45
|
+
operationId: addEvaluations
|
|
46
|
+
tags:
|
|
47
|
+
- private
|
|
48
|
+
requestBody:
|
|
49
|
+
required: true
|
|
50
|
+
content:
|
|
51
|
+
application/x-protobuf:
|
|
52
|
+
schema:
|
|
53
|
+
type: string
|
|
54
|
+
format: binary
|
|
55
|
+
application/x-pandas-arrow:
|
|
56
|
+
schema:
|
|
57
|
+
type: string
|
|
58
|
+
format: binary
|
|
59
|
+
responses:
|
|
60
|
+
200:
|
|
61
|
+
description: Success
|
|
62
|
+
403:
|
|
63
|
+
description: Forbidden
|
|
64
|
+
415:
|
|
65
|
+
description: Unsupported content type, only gzipped protobuf and pandas-arrow are supported
|
|
66
|
+
422:
|
|
67
|
+
description: Request body is invalid
|
|
68
|
+
"""
|
|
69
|
+
if request.app.state.read_only:
|
|
70
|
+
return Response(status_code=HTTP_403_FORBIDDEN)
|
|
71
|
+
content_type = request.headers.get("content-type")
|
|
79
72
|
if content_type == "application/x-pandas-arrow":
|
|
80
73
|
return await _process_pyarrow(request)
|
|
81
74
|
if content_type != "application/x-protobuf":
|
|
82
|
-
|
|
83
|
-
detail="Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
|
|
84
|
-
)
|
|
75
|
+
return Response("Unsupported content type", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
|
|
85
76
|
body = await request.body()
|
|
77
|
+
content_encoding = request.headers.get("content-encoding")
|
|
86
78
|
if content_encoding == "gzip":
|
|
87
79
|
body = gzip.decompress(body)
|
|
88
80
|
elif content_encoding:
|
|
89
|
-
|
|
90
|
-
detail="Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE
|
|
91
|
-
)
|
|
81
|
+
return Response("Unsupported content encoding", status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE)
|
|
92
82
|
evaluation = pb.Evaluation()
|
|
93
83
|
try:
|
|
94
84
|
evaluation.ParseFromString(body)
|
|
95
85
|
except DecodeError:
|
|
96
|
-
|
|
97
|
-
detail="Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY
|
|
98
|
-
)
|
|
86
|
+
return Response("Request body is invalid", status_code=HTTP_422_UNPROCESSABLE_ENTITY)
|
|
99
87
|
if not evaluation.name.strip():
|
|
100
|
-
|
|
101
|
-
|
|
88
|
+
return Response(
|
|
89
|
+
"Evaluation name must not be blank/empty",
|
|
102
90
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
103
91
|
)
|
|
104
92
|
await request.state.queue_evaluation_for_bulk_insert(evaluation)
|
|
105
93
|
return Response()
|
|
106
94
|
|
|
107
95
|
|
|
108
|
-
|
|
109
|
-
"
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
96
|
+
async def get_evaluations(request: Request) -> Response:
|
|
97
|
+
"""
|
|
98
|
+
summary: Get evaluations from Phoenix
|
|
99
|
+
operationId: getEvaluation
|
|
100
|
+
tags:
|
|
101
|
+
- private
|
|
102
|
+
parameters:
|
|
103
|
+
- name: project_name
|
|
104
|
+
in: query
|
|
105
|
+
schema:
|
|
106
|
+
type: string
|
|
107
|
+
default: default
|
|
108
|
+
description: The project name to get evaluations from
|
|
109
|
+
responses:
|
|
110
|
+
200:
|
|
111
|
+
description: Success
|
|
112
|
+
403:
|
|
113
|
+
description: Forbidden
|
|
114
|
+
404:
|
|
115
|
+
description: Not found
|
|
116
|
+
"""
|
|
124
117
|
project_name = (
|
|
125
|
-
project_name
|
|
118
|
+
request.query_params.get("project_name")
|
|
126
119
|
or request.query_params.get("project-name") # for backward compatibility
|
|
127
120
|
or request.headers.get("project-name") # read from headers for backwards compatibility
|
|
128
121
|
or DEFAULT_PROJECT_NAME
|
|
@@ -176,20 +169,20 @@ async def _process_pyarrow(request: Request) -> Response:
|
|
|
176
169
|
try:
|
|
177
170
|
reader = pa.ipc.open_stream(body)
|
|
178
171
|
except pa.ArrowInvalid:
|
|
179
|
-
|
|
180
|
-
|
|
172
|
+
return Response(
|
|
173
|
+
content="Request body is not valid pyarrow",
|
|
181
174
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
182
175
|
)
|
|
183
176
|
try:
|
|
184
177
|
evaluations = Evaluations.from_pyarrow_reader(reader)
|
|
185
178
|
except Exception as e:
|
|
186
179
|
if isinstance(e, PhoenixEvaluationNameIsMissing):
|
|
187
|
-
|
|
188
|
-
|
|
180
|
+
return Response(
|
|
181
|
+
"Evaluation name must not be blank/empty",
|
|
189
182
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
190
183
|
)
|
|
191
|
-
|
|
192
|
-
|
|
184
|
+
return Response(
|
|
185
|
+
content="Invalid data in request body",
|
|
193
186
|
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
194
187
|
)
|
|
195
188
|
return Response(background=BackgroundTask(_add_evaluations, request.state, evaluations))
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Any, Dict, Literal, Optional
|
|
3
2
|
|
|
4
|
-
from fastapi import APIRouter, HTTPException
|
|
5
|
-
from pydantic import Field
|
|
6
3
|
from starlette.requests import Request
|
|
4
|
+
from starlette.responses import JSONResponse, Response
|
|
7
5
|
from starlette.status import HTTP_404_NOT_FOUND
|
|
8
6
|
from strawberry.relay import GlobalID
|
|
9
7
|
|
|
@@ -12,76 +10,103 @@ from phoenix.db.helpers import SupportedSQLDialect
|
|
|
12
10
|
from phoenix.db.insertion.helpers import insert_on_conflict
|
|
13
11
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
14
12
|
|
|
15
|
-
from .pydantic_compat import V1RoutesBaseModel
|
|
16
|
-
from .utils import ResponseBody, add_errors_to_responses
|
|
17
13
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
14
|
+
async def upsert_experiment_evaluation(request: Request) -> Response:
|
|
15
|
+
"""
|
|
16
|
+
summary: Create an evaluation for a specific experiment run
|
|
17
|
+
operationId: upsertExperimentEvaluation
|
|
18
|
+
tags:
|
|
19
|
+
- private
|
|
20
|
+
requestBody:
|
|
21
|
+
description: Details of the experiment evaluation to be upserted
|
|
22
|
+
required: true
|
|
23
|
+
content:
|
|
24
|
+
application/json:
|
|
25
|
+
schema:
|
|
26
|
+
type: object
|
|
27
|
+
properties:
|
|
28
|
+
experiment_run_id:
|
|
29
|
+
type: string
|
|
30
|
+
description: The ID of the experiment run being evaluated
|
|
31
|
+
name:
|
|
32
|
+
type: string
|
|
33
|
+
description: The name of the evaluation
|
|
34
|
+
annotator_kind:
|
|
35
|
+
type: string
|
|
36
|
+
description: The kind of annotator used for the evaluation
|
|
37
|
+
result:
|
|
38
|
+
type: object
|
|
39
|
+
description: The result of the evaluation
|
|
40
|
+
properties:
|
|
41
|
+
label:
|
|
42
|
+
type: string
|
|
43
|
+
description: The label assigned by the evaluation
|
|
44
|
+
score:
|
|
45
|
+
type: number
|
|
46
|
+
format: float
|
|
47
|
+
description: The score assigned by the evaluation
|
|
48
|
+
explanation:
|
|
49
|
+
type: string
|
|
50
|
+
description: Explanation of the evaluation result
|
|
51
|
+
error:
|
|
52
|
+
type: string
|
|
53
|
+
description: Optional error message if the evaluation encountered an error
|
|
54
|
+
metadata:
|
|
55
|
+
type: object
|
|
56
|
+
description: Metadata for the evaluation
|
|
57
|
+
additionalProperties:
|
|
58
|
+
type: string
|
|
59
|
+
start_time:
|
|
60
|
+
type: string
|
|
61
|
+
format: date-time
|
|
62
|
+
description: The start time of the evaluation in ISO format
|
|
63
|
+
end_time:
|
|
64
|
+
type: string
|
|
65
|
+
format: date-time
|
|
66
|
+
description: The end time of the evaluation in ISO format
|
|
67
|
+
trace_id:
|
|
68
|
+
type: string
|
|
69
|
+
description: Optional trace ID for tracking
|
|
70
|
+
required:
|
|
71
|
+
- experiment_run_id
|
|
72
|
+
- name
|
|
73
|
+
- annotator_kind
|
|
74
|
+
- start_time
|
|
75
|
+
- end_time
|
|
76
|
+
responses:
|
|
77
|
+
200:
|
|
78
|
+
description: Experiment evaluation upserted successfully
|
|
79
|
+
content:
|
|
80
|
+
application/json:
|
|
81
|
+
schema:
|
|
82
|
+
type: object
|
|
83
|
+
properties:
|
|
84
|
+
data:
|
|
85
|
+
type: object
|
|
86
|
+
properties:
|
|
87
|
+
id:
|
|
88
|
+
type: string
|
|
89
|
+
description: The ID of the upserted experiment evaluation
|
|
90
|
+
404:
|
|
91
|
+
description: ExperimentRun not found
|
|
92
|
+
"""
|
|
68
93
|
payload = await request.json()
|
|
69
94
|
experiment_run_gid = GlobalID.from_id(payload["experiment_run_id"])
|
|
70
95
|
try:
|
|
71
96
|
experiment_run_id = from_global_id_with_expected_type(experiment_run_gid, "ExperimentRun")
|
|
72
97
|
except ValueError:
|
|
73
|
-
|
|
74
|
-
|
|
98
|
+
return Response(
|
|
99
|
+
content=f"ExperimentRun with ID {experiment_run_gid} does not exist",
|
|
75
100
|
status_code=HTTP_404_NOT_FOUND,
|
|
76
101
|
)
|
|
77
|
-
name =
|
|
78
|
-
annotator_kind =
|
|
79
|
-
result =
|
|
80
|
-
label = result.label if result else None
|
|
81
|
-
score = result.score if result else None
|
|
82
|
-
explanation = result.explanation if result else None
|
|
83
|
-
error =
|
|
84
|
-
metadata =
|
|
102
|
+
name = payload["name"]
|
|
103
|
+
annotator_kind = payload["annotator_kind"]
|
|
104
|
+
result = payload.get("result")
|
|
105
|
+
label = result.get("label") if result else None
|
|
106
|
+
score = result.get("score") if result else None
|
|
107
|
+
explanation = result.get("explanation") if result else None
|
|
108
|
+
error = payload.get("error")
|
|
109
|
+
metadata = payload.get("metadata") or {}
|
|
85
110
|
start_time = payload["start_time"]
|
|
86
111
|
end_time = payload["end_time"]
|
|
87
112
|
async with request.app.state.db() as session:
|
|
@@ -108,6 +133,4 @@ async def upsert_experiment_evaluation(
|
|
|
108
133
|
).returning(models.ExperimentRunAnnotation)
|
|
109
134
|
)
|
|
110
135
|
evaluation_gid = GlobalID("ExperimentEvaluation", str(exp_eval_run.id))
|
|
111
|
-
return
|
|
112
|
-
data=UpsertExperimentEvaluationResponseBodyData(id=str(evaluation_gid))
|
|
113
|
-
)
|
|
136
|
+
return JSONResponse(content={"data": {"id": str(evaluation_gid)}})
|