arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (123) hide show
  1. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
  2. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
  3. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
  4. phoenix/__init__.py +0 -27
  5. phoenix/config.py +42 -7
  6. phoenix/core/model.py +25 -25
  7. phoenix/core/model_schema.py +64 -62
  8. phoenix/core/model_schema_adapter.py +27 -25
  9. phoenix/datetime_utils.py +4 -0
  10. phoenix/db/bulk_inserter.py +54 -14
  11. phoenix/db/insertion/dataset.py +237 -0
  12. phoenix/db/insertion/evaluation.py +10 -10
  13. phoenix/db/insertion/helpers.py +17 -14
  14. phoenix/db/insertion/span.py +3 -3
  15. phoenix/db/migrations/types.py +29 -0
  16. phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
  17. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
  18. phoenix/db/models.py +236 -4
  19. phoenix/experiments/__init__.py +6 -0
  20. phoenix/experiments/evaluators/__init__.py +29 -0
  21. phoenix/experiments/evaluators/base.py +153 -0
  22. phoenix/experiments/evaluators/code_evaluators.py +99 -0
  23. phoenix/experiments/evaluators/llm_evaluators.py +244 -0
  24. phoenix/experiments/evaluators/utils.py +186 -0
  25. phoenix/experiments/functions.py +757 -0
  26. phoenix/experiments/tracing.py +85 -0
  27. phoenix/experiments/types.py +753 -0
  28. phoenix/experiments/utils.py +24 -0
  29. phoenix/inferences/fixtures.py +23 -23
  30. phoenix/inferences/inferences.py +7 -7
  31. phoenix/inferences/validation.py +1 -1
  32. phoenix/server/api/context.py +20 -0
  33. phoenix/server/api/dataloaders/__init__.py +20 -0
  34. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  35. phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
  36. phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
  37. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
  38. phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
  39. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  40. phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
  41. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  42. phoenix/server/api/dataloaders/span_descendants.py +2 -3
  43. phoenix/server/api/dataloaders/span_projects.py +33 -0
  44. phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
  45. phoenix/server/api/helpers/dataset_helpers.py +179 -0
  46. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  47. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  48. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  49. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  50. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  51. phoenix/server/api/input_types/DatasetSort.py +17 -0
  52. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  53. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  54. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  55. phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
  56. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  57. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  58. phoenix/server/api/mutations/__init__.py +13 -0
  59. phoenix/server/api/mutations/auth.py +11 -0
  60. phoenix/server/api/mutations/dataset_mutations.py +520 -0
  61. phoenix/server/api/mutations/experiment_mutations.py +65 -0
  62. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
  63. phoenix/server/api/mutations/project_mutations.py +47 -0
  64. phoenix/server/api/openapi/__init__.py +0 -0
  65. phoenix/server/api/openapi/main.py +6 -0
  66. phoenix/server/api/openapi/schema.py +16 -0
  67. phoenix/server/api/queries.py +503 -0
  68. phoenix/server/api/routers/v1/__init__.py +77 -2
  69. phoenix/server/api/routers/v1/dataset_examples.py +178 -0
  70. phoenix/server/api/routers/v1/datasets.py +965 -0
  71. phoenix/server/api/routers/v1/evaluations.py +8 -13
  72. phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
  73. phoenix/server/api/routers/v1/experiment_runs.py +220 -0
  74. phoenix/server/api/routers/v1/experiments.py +302 -0
  75. phoenix/server/api/routers/v1/spans.py +9 -5
  76. phoenix/server/api/routers/v1/traces.py +1 -4
  77. phoenix/server/api/schema.py +2 -303
  78. phoenix/server/api/types/AnnotatorKind.py +10 -0
  79. phoenix/server/api/types/Cluster.py +19 -19
  80. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  81. phoenix/server/api/types/Dataset.py +282 -63
  82. phoenix/server/api/types/DatasetExample.py +85 -0
  83. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  84. phoenix/server/api/types/DatasetVersion.py +14 -0
  85. phoenix/server/api/types/Dimension.py +30 -29
  86. phoenix/server/api/types/EmbeddingDimension.py +40 -34
  87. phoenix/server/api/types/Event.py +16 -16
  88. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  89. phoenix/server/api/types/Experiment.py +147 -0
  90. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  91. phoenix/server/api/types/ExperimentComparison.py +19 -0
  92. phoenix/server/api/types/ExperimentRun.py +91 -0
  93. phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
  94. phoenix/server/api/types/Inferences.py +80 -0
  95. phoenix/server/api/types/InferencesRole.py +23 -0
  96. phoenix/server/api/types/Model.py +43 -42
  97. phoenix/server/api/types/Project.py +26 -12
  98. phoenix/server/api/types/Span.py +79 -2
  99. phoenix/server/api/types/TimeSeries.py +6 -6
  100. phoenix/server/api/types/Trace.py +15 -4
  101. phoenix/server/api/types/UMAPPoints.py +1 -1
  102. phoenix/server/api/types/node.py +5 -111
  103. phoenix/server/api/types/pagination.py +10 -52
  104. phoenix/server/app.py +103 -49
  105. phoenix/server/main.py +49 -27
  106. phoenix/server/openapi/docs.py +3 -0
  107. phoenix/server/static/index.js +2300 -1294
  108. phoenix/server/templates/index.html +1 -0
  109. phoenix/services.py +15 -15
  110. phoenix/session/client.py +581 -22
  111. phoenix/session/session.py +47 -37
  112. phoenix/trace/exporter.py +14 -9
  113. phoenix/trace/fixtures.py +133 -7
  114. phoenix/trace/schemas.py +1 -2
  115. phoenix/trace/span_evaluations.py +3 -3
  116. phoenix/trace/trace_dataset.py +6 -6
  117. phoenix/utilities/json.py +61 -0
  118. phoenix/utilities/re.py +50 -0
  119. phoenix/version.py +1 -1
  120. phoenix/server/api/types/DatasetRole.py +0 -23
  121. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
  122. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
  123. /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
@@ -44,14 +44,7 @@ async def post_evaluations(request: Request) -> Response:
44
44
  summary: Add evaluations to a span, trace, or document
45
45
  operationId: addEvaluations
46
46
  tags:
47
- - evaluations
48
- parameters:
49
- - name: project-name
50
- in: query
51
- schema:
52
- type: string
53
- default: default
54
- description: The project name to add the evaluation to
47
+ - private
55
48
  requestBody:
56
49
  required: true
57
50
  content:
@@ -105,9 +98,9 @@ async def get_evaluations(request: Request) -> Response:
105
98
  summary: Get evaluations from Phoenix
106
99
  operationId: getEvaluation
107
100
  tags:
108
- - evaluations
101
+ - private
109
102
  parameters:
110
- - name: project-name
103
+ - name: project_name
111
104
  in: query
112
105
  schema:
113
106
  type: string
@@ -116,13 +109,15 @@ async def get_evaluations(request: Request) -> Response:
116
109
  responses:
117
110
  200:
118
111
  description: Success
112
+ 403:
113
+ description: Forbidden
119
114
  404:
120
115
  description: Not found
121
116
  """
122
117
  project_name = (
123
- request.query_params.get("project-name")
124
- # read from headers for backwards compatibility
125
- or request.headers.get("project-name")
118
+ request.query_params.get("project_name")
119
+ or request.query_params.get("project-name") # for backward compatibility
120
+ or request.headers.get("project-name") # read from headers for backwards compatibility
126
121
  or DEFAULT_PROJECT_NAME
127
122
  )
128
123
 
@@ -0,0 +1,143 @@
1
+ from datetime import datetime
2
+
3
+ from starlette.requests import Request
4
+ from starlette.responses import JSONResponse, Response
5
+ from starlette.status import HTTP_404_NOT_FOUND
6
+ from strawberry.relay import GlobalID
7
+
8
+ from phoenix.db import models
9
+ from phoenix.db.helpers import SupportedSQLDialect
10
+ from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
11
+ from phoenix.server.api.types.node import from_global_id_with_expected_type
12
+
13
+
14
+ async def upsert_experiment_evaluation(request: Request) -> Response:
15
+ """
16
+ summary: Create an evaluation for a specific experiment run
17
+ operationId: upsertExperimentEvaluation
18
+ tags:
19
+ - private
20
+ requestBody:
21
+ description: Details of the experiment evaluation to be upserted
22
+ required: true
23
+ content:
24
+ application/json:
25
+ schema:
26
+ type: object
27
+ properties:
28
+ experiment_run_id:
29
+ type: string
30
+ description: The ID of the experiment run being evaluated
31
+ name:
32
+ type: string
33
+ description: The name of the evaluation
34
+ annotator_kind:
35
+ type: string
36
+ description: The kind of annotator used for the evaluation
37
+ result:
38
+ type: object
39
+ description: The result of the evaluation
40
+ properties:
41
+ label:
42
+ type: string
43
+ description: The label assigned by the evaluation
44
+ score:
45
+ type: number
46
+ format: float
47
+ description: The score assigned by the evaluation
48
+ explanation:
49
+ type: string
50
+ description: Explanation of the evaluation result
51
+ error:
52
+ type: string
53
+ description: Optional error message if the evaluation encountered an error
54
+ metadata:
55
+ type: object
56
+ description: Metadata for the evaluation
57
+ additionalProperties:
58
+ type: string
59
+ start_time:
60
+ type: string
61
+ format: date-time
62
+ description: The start time of the evaluation in ISO format
63
+ end_time:
64
+ type: string
65
+ format: date-time
66
+ description: The end time of the evaluation in ISO format
67
+ trace_id:
68
+ type: string
69
+ description: Optional trace ID for tracking
70
+ required:
71
+ - experiment_run_id
72
+ - name
73
+ - annotator_kind
74
+ - start_time
75
+ - end_time
76
+ responses:
77
+ 200:
78
+ description: Experiment evaluation upserted successfully
79
+ content:
80
+ application/json:
81
+ schema:
82
+ type: object
83
+ properties:
84
+ data:
85
+ type: object
86
+ properties:
87
+ id:
88
+ type: string
89
+ description: The ID of the upserted experiment evaluation
90
+ 404:
91
+ description: ExperimentRun not found
92
+ """
93
+ payload = await request.json()
94
+ experiment_run_gid = GlobalID.from_id(payload["experiment_run_id"])
95
+ try:
96
+ experiment_run_id = from_global_id_with_expected_type(experiment_run_gid, "ExperimentRun")
97
+ except ValueError:
98
+ return Response(
99
+ content=f"ExperimentRun with ID {experiment_run_gid} does not exist",
100
+ status_code=HTTP_404_NOT_FOUND,
101
+ )
102
+ name = payload["name"]
103
+ annotator_kind = payload["annotator_kind"]
104
+ result = payload.get("result")
105
+ label = result.get("label") if result else None
106
+ score = result.get("score") if result else None
107
+ explanation = result.get("explanation") if result else None
108
+ error = payload.get("error")
109
+ metadata = payload.get("metadata") or {}
110
+ start_time = payload["start_time"]
111
+ end_time = payload["end_time"]
112
+ async with request.app.state.db() as session:
113
+ values = dict(
114
+ experiment_run_id=experiment_run_id,
115
+ name=name,
116
+ annotator_kind=annotator_kind,
117
+ label=label,
118
+ score=score,
119
+ explanation=explanation,
120
+ error=error,
121
+ metadata_=metadata, # `metadata_` must match database
122
+ start_time=datetime.fromisoformat(start_time),
123
+ end_time=datetime.fromisoformat(end_time),
124
+ trace_id=payload.get("trace_id"),
125
+ )
126
+ set_ = {
127
+ **{k: v for k, v in values.items() if k != "metadata_"},
128
+ "metadata": values["metadata_"], # `metadata` must match database
129
+ }
130
+ dialect = SupportedSQLDialect(session.bind.dialect.name)
131
+ exp_eval_run = await session.scalar(
132
+ insert_on_conflict(
133
+ dialect=dialect,
134
+ table=models.ExperimentRunAnnotation,
135
+ values=values,
136
+ constraint="uq_experiment_run_annotations_experiment_run_id_name",
137
+ column_names=("experiment_run_id", "name"),
138
+ on_conflict=OnConflict.DO_UPDATE,
139
+ set_=set_,
140
+ ).returning(models.ExperimentRunAnnotation)
141
+ )
142
+ evaluation_gid = GlobalID("ExperimentEvaluation", str(exp_eval_run.id))
143
+ return JSONResponse(content={"data": {"id": str(evaluation_gid)}})
@@ -0,0 +1,220 @@
1
+ from datetime import datetime
2
+
3
+ from sqlalchemy import select
4
+ from starlette.requests import Request
5
+ from starlette.responses import JSONResponse, Response
6
+ from starlette.status import HTTP_404_NOT_FOUND
7
+ from strawberry.relay import GlobalID
8
+
9
+ from phoenix.db import models
10
+ from phoenix.experiments.types import ExperimentRun, ExperimentRunOutput
11
+ from phoenix.server.api.types.node import from_global_id_with_expected_type
12
+ from phoenix.utilities.json import jsonify
13
+
14
+
15
+ async def create_experiment_run(request: Request) -> Response:
16
+ """
17
+ summary: Create a new experiment run for a specific experiment
18
+ operationId: createExperimentRun
19
+ tags:
20
+ - private
21
+ parameters:
22
+ - in: path
23
+ name: experiment_id
24
+ required: true
25
+ description: The ID of the experiment for which the run is being created
26
+ schema:
27
+ type: string
28
+ requestBody:
29
+ description: Details of the experiment run to be created
30
+ required: true
31
+ content:
32
+ application/json:
33
+ schema:
34
+ type: object
35
+ properties:
36
+ dataset_example_id:
37
+ type: string
38
+ description: The ID of the dataset example used in the experiment run
39
+ trace_id:
40
+ type: string
41
+ description: Optional trace ID for tracking
42
+ experiment_run_output:
43
+ type: object
44
+ description: The output of the experiment run
45
+ repetition_number:
46
+ type: integer
47
+ description: The repetition number of the experiment run
48
+ start_time:
49
+ type: string
50
+ format: date-time
51
+ description: The start time of the experiment run in ISO format
52
+ end_time:
53
+ type: string
54
+ format: date-time
55
+ description: The end time of the experiment run in ISO format
56
+ error:
57
+ type: string
58
+ description: Optional error message if the experiment run encountered an error
59
+ nullable: true
60
+ required:
61
+ - dataset_example_id
62
+ - output
63
+ - repetition_number
64
+ - start_time
65
+ - end_time
66
+ responses:
67
+ 200:
68
+ description: Experiment run created successfully
69
+ content:
70
+ application/json:
71
+ schema:
72
+ type: object
73
+ properties:
74
+ data:
75
+ type: object
76
+ properties:
77
+ id:
78
+ type: string
79
+ description: The ID of the created experiment run
80
+ 404:
81
+ description: Experiment or DatasetExample not found
82
+ """
83
+ experiment_gid = GlobalID.from_id(request.path_params["experiment_id"])
84
+ try:
85
+ experiment_id = from_global_id_with_expected_type(experiment_gid, "Experiment")
86
+ except ValueError:
87
+ return Response(
88
+ content=f"Experiment with ID {experiment_gid} does not exist",
89
+ status_code=HTTP_404_NOT_FOUND,
90
+ )
91
+
92
+ payload = await request.json()
93
+
94
+ example_gid = GlobalID.from_id(payload["dataset_example_id"])
95
+ try:
96
+ dataset_example_id = from_global_id_with_expected_type(example_gid, "DatasetExample")
97
+ except ValueError:
98
+ return Response(
99
+ content=f"DatasetExample with ID {example_gid} does not exist",
100
+ status_code=HTTP_404_NOT_FOUND,
101
+ )
102
+
103
+ trace_id = payload.get("trace_id", None)
104
+ output = payload["experiment_run_output"]
105
+ repetition_number = payload["repetition_number"]
106
+ start_time = payload["start_time"]
107
+ end_time = payload["end_time"]
108
+ error = payload.get("error")
109
+
110
+ async with request.app.state.db() as session:
111
+ exp_run = models.ExperimentRun(
112
+ experiment_id=experiment_id,
113
+ dataset_example_id=dataset_example_id,
114
+ trace_id=trace_id,
115
+ output=output,
116
+ repetition_number=repetition_number,
117
+ start_time=datetime.fromisoformat(start_time),
118
+ end_time=datetime.fromisoformat(end_time),
119
+ error=error,
120
+ )
121
+ session.add(exp_run)
122
+ await session.flush()
123
+ run_gid = GlobalID("ExperimentRun", str(exp_run.id))
124
+ return JSONResponse(content={"data": {"id": str(run_gid)}})
125
+
126
+
127
+ async def list_experiment_runs(request: Request) -> Response:
128
+ """
129
+ summary: List all runs for a specific experiment
130
+ operationId: listExperimentRuns
131
+ tags:
132
+ - private
133
+ parameters:
134
+ - in: path
135
+ name: experiment_id
136
+ required: true
137
+ description: The ID of the experiment to list runs for
138
+ schema:
139
+ type: string
140
+ responses:
141
+ 200:
142
+ description: Experiment runs retrieved successfully
143
+ content:
144
+ application/json:
145
+ schema:
146
+ type: object
147
+ properties:
148
+ data:
149
+ type: array
150
+ items:
151
+ type: object
152
+ properties:
153
+ id:
154
+ type: string
155
+ description: The ID of the experiment run
156
+ experiment_id:
157
+ type: string
158
+ description: The ID of the experiment
159
+ dataset_example_id:
160
+ type: string
161
+ description: The ID of the dataset example
162
+ repetition_number:
163
+ type: integer
164
+ description: The repetition number of the experiment run
165
+ start_time:
166
+ type: string
167
+ format: date-time
168
+ description: The start time of the experiment run in ISO format
169
+ end_time:
170
+ type: string
171
+ format: date-time
172
+ description: The end time of the experiment run in ISO format
173
+ experiment_run_output:
174
+ type: object
175
+ description: The output of the experiment run
176
+ error:
177
+ type: string
178
+ description: Error message if the experiment run encountered an error
179
+ trace_id:
180
+ type: string
181
+ description: Optional trace ID for tracking
182
+ 404:
183
+ description: Experiment not found
184
+ """
185
+ experiment_gid = GlobalID.from_id(request.path_params["experiment_id"])
186
+ try:
187
+ experiment_id = from_global_id_with_expected_type(experiment_gid, "Experiment")
188
+ except ValueError:
189
+ return Response(
190
+ content=f"Experiment with ID {experiment_gid} does not exist",
191
+ status_code=HTTP_404_NOT_FOUND,
192
+ )
193
+
194
+ async with request.app.state.db() as session:
195
+ experiment_runs = await session.execute(
196
+ select(models.ExperimentRun)
197
+ .where(models.ExperimentRun.experiment_id == experiment_id)
198
+ # order by dataset_example_id to be consistent with `list_dataset_examples`
199
+ .order_by(models.ExperimentRun.dataset_example_id.asc())
200
+ )
201
+ experiment_runs = experiment_runs.scalars().all()
202
+ runs = []
203
+ for exp_run in experiment_runs:
204
+ run_gid = GlobalID("ExperimentRun", str(exp_run.id))
205
+ experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
206
+ example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
207
+ runs.append(
208
+ ExperimentRun(
209
+ start_time=exp_run.start_time,
210
+ end_time=exp_run.end_time,
211
+ experiment_id=str(experiment_gid),
212
+ dataset_example_id=str(example_gid),
213
+ repetition_number=exp_run.repetition_number,
214
+ experiment_run_output=ExperimentRunOutput.from_dict(exp_run.output),
215
+ error=exp_run.error,
216
+ id=str(run_gid),
217
+ trace_id=exp_run.trace_id,
218
+ )
219
+ )
220
+ return JSONResponse(content={"data": jsonify(runs)}, status_code=200)