arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/METADATA +12 -9
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/RECORD +48 -49
- phoenix/db/bulk_inserter.py +3 -1
- phoenix/experiments/evaluators/base.py +4 -0
- phoenix/experiments/evaluators/code_evaluators.py +80 -0
- phoenix/experiments/evaluators/llm_evaluators.py +77 -1
- phoenix/experiments/evaluators/utils.py +70 -21
- phoenix/experiments/functions.py +14 -14
- phoenix/server/api/context.py +7 -3
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
- phoenix/server/api/dataloaders/experiment_error_rates.py +30 -10
- phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
- phoenix/server/api/input_types/{CreateSpanAnnotationsInput.py → CreateSpanAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{CreateTraceAnnotationsInput.py → CreateTraceAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{PatchAnnotationsInput.py → PatchAnnotationInput.py} +4 -2
- phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
- phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
- phoenix/server/api/openapi/main.py +2 -18
- phoenix/server/api/openapi/schema.py +12 -12
- phoenix/server/api/routers/v1/__init__.py +83 -36
- phoenix/server/api/routers/v1/dataset_examples.py +123 -102
- phoenix/server/api/routers/v1/datasets.py +506 -390
- phoenix/server/api/routers/v1/evaluations.py +66 -73
- phoenix/server/api/routers/v1/experiment_evaluations.py +91 -68
- phoenix/server/api/routers/v1/experiment_runs.py +155 -98
- phoenix/server/api/routers/v1/experiments.py +181 -132
- phoenix/server/api/routers/v1/spans.py +173 -144
- phoenix/server/api/routers/v1/traces.py +128 -115
- phoenix/server/api/types/Experiment.py +2 -2
- phoenix/server/api/types/Inferences.py +1 -2
- phoenix/server/api/types/Model.py +1 -2
- phoenix/server/app.py +177 -152
- phoenix/server/openapi/docs.py +221 -0
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/{components-C8sm_r1F.js → components-DeS0YEmv.js} +2 -2
- phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
- phoenix/server/static/assets/{pages-bN7juCjh.js → pages-hdjlFZhO.js} +275 -198
- phoenix/server/static/assets/{vendor-CUDAPm8e.js → vendor-DPvSDRn3.js} +1 -1
- phoenix/server/static/assets/{vendor-arizeai-Do2HOmcL.js → vendor-arizeai-CkvPT67c.js} +2 -2
- phoenix/server/static/assets/{vendor-codemirror-CrdxOlMs.js → vendor-codemirror-Cqwpwlua.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-PKRvByVe.js → vendor-recharts-5jlNaZuF.js} +1 -1
- phoenix/server/thread_server.py +2 -2
- phoenix/session/client.py +9 -8
- phoenix/trace/dsl/filter.py +40 -25
- phoenix/version.py +1 -1
- phoenix/server/api/routers/v1/pydantic_compat.py +0 -78
- phoenix/server/api/routers/v1/utils.py +0 -95
- phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Any, List, Optional
|
|
3
2
|
|
|
4
|
-
from fastapi import APIRouter, HTTPException
|
|
5
|
-
from pydantic import Field
|
|
6
3
|
from sqlalchemy import select
|
|
7
4
|
from starlette.requests import Request
|
|
5
|
+
from starlette.responses import JSONResponse, Response
|
|
8
6
|
from starlette.status import HTTP_404_NOT_FOUND
|
|
9
7
|
from strawberry.relay import GlobalID
|
|
10
8
|
|
|
@@ -12,129 +10,188 @@ from phoenix.db import models
|
|
|
12
10
|
from phoenix.db.models import ExperimentRunOutput
|
|
13
11
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
14
12
|
|
|
15
|
-
from .pydantic_compat import V1RoutesBaseModel
|
|
16
|
-
from .utils import ResponseBody, add_errors_to_responses
|
|
17
13
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
14
|
+
async def create_experiment_run(request: Request) -> Response:
|
|
15
|
+
"""
|
|
16
|
+
summary: Create a new experiment run for a specific experiment
|
|
17
|
+
operationId: createExperimentRun
|
|
18
|
+
tags:
|
|
19
|
+
- private
|
|
20
|
+
parameters:
|
|
21
|
+
- in: path
|
|
22
|
+
name: experiment_id
|
|
23
|
+
required: true
|
|
24
|
+
description: The ID of the experiment for which the run is being created
|
|
25
|
+
schema:
|
|
26
|
+
type: string
|
|
27
|
+
requestBody:
|
|
28
|
+
description: Details of the experiment run to be created
|
|
29
|
+
required: true
|
|
30
|
+
content:
|
|
31
|
+
application/json:
|
|
32
|
+
schema:
|
|
33
|
+
type: object
|
|
34
|
+
properties:
|
|
35
|
+
dataset_example_id:
|
|
36
|
+
type: string
|
|
37
|
+
description: The ID of the dataset example used in the experiment run
|
|
38
|
+
trace_id:
|
|
39
|
+
type: string
|
|
40
|
+
description: Optional trace ID for tracking
|
|
41
|
+
output:
|
|
42
|
+
description: The output of the experiment task
|
|
43
|
+
repetition_number:
|
|
44
|
+
type: integer
|
|
45
|
+
description: The repetition number of the experiment run
|
|
46
|
+
start_time:
|
|
47
|
+
type: string
|
|
48
|
+
format: date-time
|
|
49
|
+
description: The start time of the experiment run in ISO format
|
|
50
|
+
end_time:
|
|
51
|
+
type: string
|
|
52
|
+
format: date-time
|
|
53
|
+
description: The end time of the experiment run in ISO format
|
|
54
|
+
error:
|
|
55
|
+
type: string
|
|
56
|
+
description: Optional error message if the experiment run encountered an error
|
|
57
|
+
nullable: true
|
|
58
|
+
required:
|
|
59
|
+
- dataset_example_id
|
|
60
|
+
- output
|
|
61
|
+
- repetition_number
|
|
62
|
+
- start_time
|
|
63
|
+
- end_time
|
|
64
|
+
responses:
|
|
65
|
+
200:
|
|
66
|
+
description: Experiment run created successfully
|
|
67
|
+
content:
|
|
68
|
+
application/json:
|
|
69
|
+
schema:
|
|
70
|
+
type: object
|
|
71
|
+
properties:
|
|
72
|
+
data:
|
|
73
|
+
type: object
|
|
74
|
+
properties:
|
|
75
|
+
id:
|
|
76
|
+
type: string
|
|
77
|
+
description: The ID of the created experiment run
|
|
78
|
+
404:
|
|
79
|
+
description: Experiment or DatasetExample not found
|
|
80
|
+
"""
|
|
81
|
+
experiment_gid = GlobalID.from_id(request.path_params["experiment_id"])
|
|
68
82
|
try:
|
|
69
|
-
|
|
83
|
+
experiment_id = from_global_id_with_expected_type(experiment_gid, "Experiment")
|
|
70
84
|
except ValueError:
|
|
71
|
-
|
|
72
|
-
|
|
85
|
+
return Response(
|
|
86
|
+
content=f"Experiment with ID {experiment_gid} does not exist",
|
|
73
87
|
status_code=HTTP_404_NOT_FOUND,
|
|
74
88
|
)
|
|
75
89
|
|
|
76
|
-
|
|
90
|
+
payload = await request.json()
|
|
91
|
+
|
|
92
|
+
example_gid = GlobalID.from_id(payload["dataset_example_id"])
|
|
77
93
|
try:
|
|
78
94
|
dataset_example_id = from_global_id_with_expected_type(example_gid, "DatasetExample")
|
|
79
95
|
except ValueError:
|
|
80
|
-
|
|
81
|
-
|
|
96
|
+
return Response(
|
|
97
|
+
content=f"DatasetExample with ID {example_gid} does not exist",
|
|
82
98
|
status_code=HTTP_404_NOT_FOUND,
|
|
83
99
|
)
|
|
84
100
|
|
|
85
|
-
trace_id =
|
|
86
|
-
task_output =
|
|
87
|
-
repetition_number =
|
|
88
|
-
start_time =
|
|
89
|
-
end_time =
|
|
90
|
-
error =
|
|
101
|
+
trace_id = payload.get("trace_id", None)
|
|
102
|
+
task_output = payload["output"]
|
|
103
|
+
repetition_number = payload["repetition_number"]
|
|
104
|
+
start_time = payload["start_time"]
|
|
105
|
+
end_time = payload["end_time"]
|
|
106
|
+
error = payload.get("error")
|
|
91
107
|
|
|
92
108
|
async with request.app.state.db() as session:
|
|
93
109
|
exp_run = models.ExperimentRun(
|
|
94
|
-
experiment_id=
|
|
110
|
+
experiment_id=experiment_id,
|
|
95
111
|
dataset_example_id=dataset_example_id,
|
|
96
112
|
trace_id=trace_id,
|
|
97
113
|
output=ExperimentRunOutput(task_output=task_output),
|
|
98
114
|
repetition_number=repetition_number,
|
|
99
|
-
start_time=start_time,
|
|
100
|
-
end_time=end_time,
|
|
115
|
+
start_time=datetime.fromisoformat(start_time),
|
|
116
|
+
end_time=datetime.fromisoformat(end_time),
|
|
101
117
|
error=error,
|
|
102
118
|
)
|
|
103
119
|
session.add(exp_run)
|
|
104
120
|
await session.flush()
|
|
105
121
|
run_gid = GlobalID("ExperimentRun", str(exp_run.id))
|
|
106
|
-
return
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
122
|
+
return JSONResponse(content={"data": {"id": str(run_gid)}})
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def list_experiment_runs(request: Request) -> Response:
|
|
126
|
+
"""
|
|
127
|
+
summary: List all runs for a specific experiment
|
|
128
|
+
operationId: listExperimentRuns
|
|
129
|
+
tags:
|
|
130
|
+
- private
|
|
131
|
+
parameters:
|
|
132
|
+
- in: path
|
|
133
|
+
name: experiment_id
|
|
134
|
+
required: true
|
|
135
|
+
description: The ID of the experiment to list runs for
|
|
136
|
+
schema:
|
|
137
|
+
type: string
|
|
138
|
+
responses:
|
|
139
|
+
200:
|
|
140
|
+
description: Experiment runs retrieved successfully
|
|
141
|
+
content:
|
|
142
|
+
application/json:
|
|
143
|
+
schema:
|
|
144
|
+
type: object
|
|
145
|
+
properties:
|
|
146
|
+
data:
|
|
147
|
+
type: array
|
|
148
|
+
items:
|
|
149
|
+
type: object
|
|
150
|
+
properties:
|
|
151
|
+
id:
|
|
152
|
+
type: string
|
|
153
|
+
description: The ID of the experiment run
|
|
154
|
+
experiment_id:
|
|
155
|
+
type: string
|
|
156
|
+
description: The ID of the experiment
|
|
157
|
+
dataset_example_id:
|
|
158
|
+
type: string
|
|
159
|
+
description: The ID of the dataset example
|
|
160
|
+
repetition_number:
|
|
161
|
+
type: integer
|
|
162
|
+
description: The repetition number of the experiment run
|
|
163
|
+
start_time:
|
|
164
|
+
type: string
|
|
165
|
+
format: date-time
|
|
166
|
+
description: The start time of the experiment run in ISO format
|
|
167
|
+
end_time:
|
|
168
|
+
type: string
|
|
169
|
+
format: date-time
|
|
170
|
+
description: The end time of the experiment run in ISO format
|
|
171
|
+
output:
|
|
172
|
+
description: The output of the experiment task
|
|
173
|
+
error:
|
|
174
|
+
type: string
|
|
175
|
+
description: Error message if the experiment run encountered an error
|
|
176
|
+
trace_id:
|
|
177
|
+
type: string
|
|
178
|
+
description: Optional trace ID for tracking
|
|
179
|
+
404:
|
|
180
|
+
description: Experiment not found
|
|
181
|
+
"""
|
|
182
|
+
experiment_gid = GlobalID.from_id(request.path_params["experiment_id"])
|
|
126
183
|
try:
|
|
127
|
-
|
|
184
|
+
experiment_id = from_global_id_with_expected_type(experiment_gid, "Experiment")
|
|
128
185
|
except ValueError:
|
|
129
|
-
|
|
130
|
-
|
|
186
|
+
return Response(
|
|
187
|
+
content=f"Experiment with ID {experiment_gid} does not exist",
|
|
131
188
|
status_code=HTTP_404_NOT_FOUND,
|
|
132
189
|
)
|
|
133
190
|
|
|
134
191
|
async with request.app.state.db() as session:
|
|
135
192
|
experiment_runs = await session.execute(
|
|
136
193
|
select(models.ExperimentRun)
|
|
137
|
-
.where(models.ExperimentRun.experiment_id ==
|
|
194
|
+
.where(models.ExperimentRun.experiment_id == experiment_id)
|
|
138
195
|
# order by dataset_example_id to be consistent with `list_dataset_examples`
|
|
139
196
|
.order_by(models.ExperimentRun.dataset_example_id.asc())
|
|
140
197
|
)
|
|
@@ -145,9 +202,9 @@ async def list_experiment_runs(
|
|
|
145
202
|
experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
|
|
146
203
|
example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
|
|
147
204
|
runs.append(
|
|
148
|
-
|
|
149
|
-
start_time=exp_run.start_time,
|
|
150
|
-
end_time=exp_run.end_time,
|
|
205
|
+
dict(
|
|
206
|
+
start_time=exp_run.start_time.isoformat(),
|
|
207
|
+
end_time=exp_run.end_time.isoformat(),
|
|
151
208
|
experiment_id=str(experiment_gid),
|
|
152
209
|
dataset_example_id=str(example_gid),
|
|
153
210
|
repetition_number=exp_run.repetition_number,
|
|
@@ -157,4 +214,4 @@ async def list_experiment_runs(
|
|
|
157
214
|
trace_id=exp_run.trace_id,
|
|
158
215
|
)
|
|
159
216
|
)
|
|
160
|
-
return
|
|
217
|
+
return JSONResponse(content={"data": runs}, status_code=200)
|