arize-phoenix 4.7.1__py3-none-any.whl → 4.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.7.1.dist-info → arize_phoenix-4.8.0.dist-info}/METADATA +1 -1
- {arize_phoenix-4.7.1.dist-info → arize_phoenix-4.8.0.dist-info}/RECORD +17 -16
- phoenix/db/helpers.py +37 -2
- phoenix/experiments/functions.py +27 -9
- phoenix/experiments/types.py +5 -23
- phoenix/server/api/dataloaders/project_by_name.py +1 -1
- phoenix/server/api/mutations/dataset_mutations.py +23 -13
- phoenix/server/api/mutations/experiment_mutations.py +12 -4
- phoenix/server/api/routers/v1/__init__.py +1 -0
- phoenix/server/api/routers/v1/datasets.py +59 -1
- phoenix/server/api/routers/v1/experiment_runs.py +12 -15
- phoenix/server/api/utils.py +36 -0
- phoenix/server/static/index.js +202 -179
- phoenix/version.py +1 -1
- {arize_phoenix-4.7.1.dist-info → arize_phoenix-4.8.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.7.1.dist-info → arize_phoenix-4.8.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.7.1.dist-info → arize_phoenix-4.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,7 +5,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
|
|
|
5
5
|
phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
6
|
phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
|
|
7
7
|
phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
|
|
8
|
-
phoenix/version.py,sha256=
|
|
8
|
+
phoenix/version.py,sha256=NsvNfym-LKhyklkaZu-iwzlvcIWUdbdxxglpRMhC1TU,22
|
|
9
9
|
phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
|
|
11
11
|
phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
|
|
@@ -16,7 +16,7 @@ phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
|
|
|
16
16
|
phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
|
|
17
17
|
phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,11286
|
|
18
18
|
phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
|
|
19
|
-
phoenix/db/helpers.py,sha256=
|
|
19
|
+
phoenix/db/helpers.py,sha256=mTBhPzdy_aU9gD7hNzUZJkAnV77ko5CdaXyoWH3snPA,2982
|
|
20
20
|
phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
|
|
21
21
|
phoenix/db/models.py,sha256=7DBWbxY3cx3ve2P1I0kkDKXzlt04zEFJuRPJWsVpH-I,20422
|
|
22
22
|
phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -31,9 +31,9 @@ phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgC
|
|
|
31
31
|
phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
|
|
32
32
|
phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
|
|
33
33
|
phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
|
|
34
|
-
phoenix/experiments/functions.py,sha256=
|
|
34
|
+
phoenix/experiments/functions.py,sha256=2ZWCcIs0dh_UNB21t0J_PzWcYhu_32Ai3J6sEMjbBGE,32310
|
|
35
35
|
phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
|
|
36
|
-
phoenix/experiments/types.py,sha256=
|
|
36
|
+
phoenix/experiments/types.py,sha256=VuvDCcvUGeHIQuXS_xpz7Jq5xHdt3qu-O_C7IQ3DvF8,23397
|
|
37
37
|
phoenix/experiments/utils.py,sha256=wLu5Kvt1b4a8rGPRWq5G8RQ9XSiV8fCIVm51zWBI3-g,758
|
|
38
38
|
phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
|
|
39
39
|
phoenix/experiments/evaluators/base.py,sha256=ani0F2TN7DMN0KLhV89LIr9-W4g-ccEl2YQJgfp44Js,5325
|
|
@@ -71,6 +71,7 @@ phoenix/server/api/context.py,sha256=4jcy203Gtx38399FP21iU3HmFsq-50EKFJlX4IW2Los
|
|
|
71
71
|
phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
|
|
72
72
|
phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
|
|
73
73
|
phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
|
|
74
|
+
phoenix/server/api/utils.py,sha256=Y1lGu8J8r8BSBX9OzffgewI8QMziovbG-ePDvZrrwGI,949
|
|
74
75
|
phoenix/server/api/dataloaders/__init__.py,sha256=qehXL37vGdw7v5PFs3kbZVIuhuzrVNVeZACDQjYpwyo,4847
|
|
75
76
|
phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=RiO0AKC6Y5byafsV0zTJEIOt8Nudjte73f1T78cBe1k,1817
|
|
76
77
|
phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
|
|
@@ -85,7 +86,7 @@ phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=wxHv08aZELJ91KTjH
|
|
|
85
86
|
phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
|
|
86
87
|
phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
|
|
87
88
|
phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
|
|
88
|
-
phoenix/server/api/dataloaders/project_by_name.py,sha256=
|
|
89
|
+
phoenix/server/api/dataloaders/project_by_name.py,sha256=W4q-ddeVnja2DNwgg5l8mV2GNADNJf1CTXNcJaBWdfE,1165
|
|
89
90
|
phoenix/server/api/dataloaders/record_counts.py,sha256=mp3KlhwFw-Iy7o6bFxtJKC6B5kGinPIh5PxxNkrxf8o,4283
|
|
90
91
|
phoenix/server/api/dataloaders/span_descendants.py,sha256=djqXXwupWaXkFdscoy-iILYijuxlqr8hJcv6JawsV6s,2162
|
|
91
92
|
phoenix/server/api/dataloaders/span_evaluations.py,sha256=quXGyj_OYvHrcWfzXlYIv7C1OCZiL1K7cWU-YEAlsNA,1316
|
|
@@ -121,8 +122,8 @@ phoenix/server/api/input_types/TimeRange.py,sha256=yzx-gxj8mDeGLft1FzU_x1MVEgIG5
|
|
|
121
122
|
phoenix/server/api/input_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
122
123
|
phoenix/server/api/mutations/__init__.py,sha256=Jcz-pM6SklhEGKcjA7AIB2WJXGhnxGsghrIq131kyDo,502
|
|
123
124
|
phoenix/server/api/mutations/auth.py,sha256=vPRFoj7J6PV6QeODewG4K0PhoOebS5AfMRpbi_wuhyQ,311
|
|
124
|
-
phoenix/server/api/mutations/dataset_mutations.py,sha256=
|
|
125
|
-
phoenix/server/api/mutations/experiment_mutations.py,sha256=
|
|
125
|
+
phoenix/server/api/mutations/dataset_mutations.py,sha256=CuKhxsYfvwVcdN_9EXhKxB6444BQfObzKzzyfAeg-n8,23199
|
|
126
|
+
phoenix/server/api/mutations/experiment_mutations.py,sha256=vV2lbJ7ccXZqe-LY7nXx6QxWqhKQE4UNZAFcML-KQ8I,3011
|
|
126
127
|
phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
|
|
127
128
|
phoenix/server/api/mutations/project_mutations.py,sha256=3SVDCZqxB0Iv60cOwBL8c-rY3QUUPs8PXbp-C_K1mWY,2267
|
|
128
129
|
phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -130,12 +131,12 @@ phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjG
|
|
|
130
131
|
phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
|
|
131
132
|
phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
133
|
phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
|
|
133
|
-
phoenix/server/api/routers/v1/__init__.py,sha256=
|
|
134
|
+
phoenix/server/api/routers/v1/__init__.py,sha256=Ir5fsO6gQXW58HGm7s2sMUq0vya7mfcWneLcLJy6_q8,2895
|
|
134
135
|
phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycwwGt3LnSyyGdMLKC5VKoGQ,6690
|
|
135
|
-
phoenix/server/api/routers/v1/datasets.py,sha256=
|
|
136
|
+
phoenix/server/api/routers/v1/datasets.py,sha256=r0WcNxF8SKVa3-4rrTIg4Andwr4NmRmW1ybpKuxR9qw,33639
|
|
136
137
|
phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
|
|
137
138
|
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=H_psVyuGUQImo0oxdEAKAMQ-oyVwkVIq5yaMHzHIiPc,5455
|
|
138
|
-
phoenix/server/api/routers/v1/experiment_runs.py,sha256=
|
|
139
|
+
phoenix/server/api/routers/v1/experiment_runs.py,sha256=jy4SynmzdtQMoUzlowmG6wsVU14SsLAzfcW4JOhXjeQ,8154
|
|
139
140
|
phoenix/server/api/routers/v1/experiments.py,sha256=cG-LyIGRdB1jVTL42Xi2__nsXibVe9Up7m3hFiTIYYY,11886
|
|
140
141
|
phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
|
|
141
142
|
phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
|
|
@@ -202,7 +203,7 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
|
|
|
202
203
|
phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
|
|
203
204
|
phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
|
|
204
205
|
phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
|
|
205
|
-
phoenix/server/static/index.js,sha256=
|
|
206
|
+
phoenix/server/static/index.js,sha256=sUkm_imwx5uXpCQxQEPmX-axDr094HE1wGdBznF8J_E,3524949
|
|
206
207
|
phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
|
|
207
208
|
phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
208
209
|
phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
|
|
@@ -247,8 +248,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
|
|
|
247
248
|
phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
|
|
248
249
|
phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
|
|
249
250
|
phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
250
|
-
arize_phoenix-4.
|
|
251
|
-
arize_phoenix-4.
|
|
252
|
-
arize_phoenix-4.
|
|
253
|
-
arize_phoenix-4.
|
|
254
|
-
arize_phoenix-4.
|
|
251
|
+
arize_phoenix-4.8.0.dist-info/METADATA,sha256=UNwc2BbfIMe5RbFT-IuzfKQnX21dFA4zY5EUXbo5bC8,11451
|
|
252
|
+
arize_phoenix-4.8.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
253
|
+
arize_phoenix-4.8.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
|
|
254
|
+
arize_phoenix-4.8.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
|
|
255
|
+
arize_phoenix-4.8.0.dist-info/RECORD,,
|
phoenix/db/helpers.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
from openinference.semconv.trace import (
|
|
5
5
|
OpenInferenceSpanKindValues,
|
|
6
6
|
RerankerAttributes,
|
|
7
7
|
SpanAttributes,
|
|
8
8
|
)
|
|
9
|
-
from sqlalchemy import Integer, SQLColumnExpression, case, func
|
|
9
|
+
from sqlalchemy import Integer, Select, SQLColumnExpression, case, distinct, func, select
|
|
10
10
|
from typing_extensions import assert_never
|
|
11
11
|
|
|
12
12
|
from phoenix.db import models
|
|
@@ -45,3 +45,38 @@ def num_docs_col(dialect: SupportedSQLDialect) -> SQLColumnExpression[Integer]:
|
|
|
45
45
|
|
|
46
46
|
_RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS.split(".")
|
|
47
47
|
_RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS.split(".")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_eval_trace_ids_for_datasets(*dataset_ids: int) -> Select[Tuple[Optional[str]]]:
|
|
51
|
+
return (
|
|
52
|
+
select(distinct(models.ExperimentRunAnnotation.trace_id))
|
|
53
|
+
.join(models.ExperimentRun)
|
|
54
|
+
.join_from(models.ExperimentRun, models.Experiment)
|
|
55
|
+
.where(models.Experiment.dataset_id.in_(set(dataset_ids)))
|
|
56
|
+
.where(models.ExperimentRunAnnotation.trace_id.isnot(None))
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_project_names_for_datasets(*dataset_ids: int) -> Select[Tuple[Optional[str]]]:
|
|
61
|
+
return (
|
|
62
|
+
select(distinct(models.Experiment.project_name))
|
|
63
|
+
.where(models.Experiment.dataset_id.in_(set(dataset_ids)))
|
|
64
|
+
.where(models.Experiment.project_name.isnot(None))
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_eval_trace_ids_for_experiments(*experiment_ids: int) -> Select[Tuple[Optional[str]]]:
|
|
69
|
+
return (
|
|
70
|
+
select(distinct(models.ExperimentRunAnnotation.trace_id))
|
|
71
|
+
.join(models.ExperimentRun)
|
|
72
|
+
.where(models.ExperimentRun.experiment_id.in_(set(experiment_ids)))
|
|
73
|
+
.where(models.ExperimentRunAnnotation.trace_id.isnot(None))
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_project_names_for_experiments(*experiment_ids: int) -> Select[Tuple[Optional[str]]]:
|
|
78
|
+
return (
|
|
79
|
+
select(distinct(models.Experiment.project_name))
|
|
80
|
+
.where(models.Experiment.id.in_(set(experiment_ids)))
|
|
81
|
+
.where(models.Experiment.project_name.isnot(None))
|
|
82
|
+
)
|
phoenix/experiments/functions.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import functools
|
|
2
3
|
import inspect
|
|
3
4
|
import json
|
|
@@ -62,7 +63,6 @@ from phoenix.experiments.types import (
|
|
|
62
63
|
ExperimentEvaluationRun,
|
|
63
64
|
ExperimentParameters,
|
|
64
65
|
ExperimentRun,
|
|
65
|
-
ExperimentRunOutput,
|
|
66
66
|
ExperimentTask,
|
|
67
67
|
RanExperiment,
|
|
68
68
|
TaskSummary,
|
|
@@ -284,7 +284,7 @@ def run_experiment(
|
|
|
284
284
|
experiment_id=experiment.id,
|
|
285
285
|
dataset_example_id=example.id,
|
|
286
286
|
repetition_number=repetition_number,
|
|
287
|
-
|
|
287
|
+
output=output,
|
|
288
288
|
error=repr(error) if error else None,
|
|
289
289
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
290
290
|
)
|
|
@@ -345,13 +345,21 @@ def run_experiment(
|
|
|
345
345
|
experiment_id=experiment.id,
|
|
346
346
|
dataset_example_id=example.id,
|
|
347
347
|
repetition_number=repetition_number,
|
|
348
|
-
|
|
348
|
+
output=output,
|
|
349
349
|
error=repr(error) if error else None,
|
|
350
350
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
351
351
|
)
|
|
352
352
|
if not dry_run:
|
|
353
|
-
|
|
354
|
-
|
|
353
|
+
# Below is a workaround to avoid timeout errors sometimes
|
|
354
|
+
# encountered when the task is a synchronous function that
|
|
355
|
+
# blocks for too long.
|
|
356
|
+
resp = await asyncio.get_running_loop().run_in_executor(
|
|
357
|
+
None,
|
|
358
|
+
functools.partial(
|
|
359
|
+
sync_client.post,
|
|
360
|
+
url=f"/v1/experiments/{experiment.id}/runs",
|
|
361
|
+
json=jsonify(exp_run),
|
|
362
|
+
),
|
|
355
363
|
)
|
|
356
364
|
resp.raise_for_status()
|
|
357
365
|
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
@@ -393,7 +401,7 @@ def run_experiment(
|
|
|
393
401
|
ran_experiment.__init__( # type: ignore[misc]
|
|
394
402
|
params=params,
|
|
395
403
|
dataset=dataset,
|
|
396
|
-
runs={r.id: r for r in task_runs},
|
|
404
|
+
runs={r.id: r for r in task_runs if r is not None},
|
|
397
405
|
task_summary=task_summary,
|
|
398
406
|
**_asdict(experiment),
|
|
399
407
|
)
|
|
@@ -498,7 +506,7 @@ def evaluate_experiment(
|
|
|
498
506
|
stack.enter_context(capture_spans(resource))
|
|
499
507
|
try:
|
|
500
508
|
result = evaluator.evaluate(
|
|
501
|
-
output=experiment_run.output,
|
|
509
|
+
output=deepcopy(experiment_run.output),
|
|
502
510
|
expected=example.output,
|
|
503
511
|
reference=example.output,
|
|
504
512
|
input=example.input,
|
|
@@ -550,7 +558,7 @@ def evaluate_experiment(
|
|
|
550
558
|
stack.enter_context(capture_spans(resource))
|
|
551
559
|
try:
|
|
552
560
|
result = await evaluator.async_evaluate(
|
|
553
|
-
output=experiment_run.output,
|
|
561
|
+
output=deepcopy(experiment_run.output),
|
|
554
562
|
expected=example.output,
|
|
555
563
|
reference=example.output,
|
|
556
564
|
input=example.input,
|
|
@@ -582,7 +590,17 @@ def evaluate_experiment(
|
|
|
582
590
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
583
591
|
)
|
|
584
592
|
if not dry_run:
|
|
585
|
-
|
|
593
|
+
# Below is a workaround to avoid timeout errors sometimes
|
|
594
|
+
# encountered when the evaluator is a synchronous function
|
|
595
|
+
# that blocks for too long.
|
|
596
|
+
resp = await asyncio.get_running_loop().run_in_executor(
|
|
597
|
+
None,
|
|
598
|
+
functools.partial(
|
|
599
|
+
sync_client.post,
|
|
600
|
+
url="/v1/experiment_evaluations",
|
|
601
|
+
json=jsonify(eval_run),
|
|
602
|
+
),
|
|
603
|
+
)
|
|
586
604
|
resp.raise_for_status()
|
|
587
605
|
eval_run = replace(eval_run, id=resp.json()["data"]["id"])
|
|
588
606
|
return eval_run
|
phoenix/experiments/types.py
CHANGED
|
@@ -198,20 +198,6 @@ class Experiment:
|
|
|
198
198
|
)
|
|
199
199
|
|
|
200
200
|
|
|
201
|
-
@dataclass(frozen=True)
|
|
202
|
-
class ExperimentRunOutput:
|
|
203
|
-
task_output: TaskOutput
|
|
204
|
-
|
|
205
|
-
def __post_init__(self) -> None:
|
|
206
|
-
object.__setattr__(self, "task_output", _make_read_only(self.task_output))
|
|
207
|
-
|
|
208
|
-
@classmethod
|
|
209
|
-
def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> ExperimentRunOutput:
|
|
210
|
-
if not obj:
|
|
211
|
-
return cls(task_output=None)
|
|
212
|
-
return cls(task_output=obj["task_output"])
|
|
213
|
-
|
|
214
|
-
|
|
215
201
|
@dataclass(frozen=True)
|
|
216
202
|
class ExperimentRun:
|
|
217
203
|
start_time: datetime
|
|
@@ -219,15 +205,11 @@ class ExperimentRun:
|
|
|
219
205
|
experiment_id: ExperimentId
|
|
220
206
|
dataset_example_id: ExampleId
|
|
221
207
|
repetition_number: RepetitionNumber
|
|
222
|
-
|
|
208
|
+
output: JSONSerializable
|
|
223
209
|
error: Optional[str] = None
|
|
224
210
|
id: ExperimentRunId = field(default_factory=_dry_run_id)
|
|
225
211
|
trace_id: Optional[TraceId] = None
|
|
226
212
|
|
|
227
|
-
@property
|
|
228
|
-
def output(self) -> Optional[TaskOutput]:
|
|
229
|
-
return deepcopy(self.experiment_run_output.task_output)
|
|
230
|
-
|
|
231
213
|
@classmethod
|
|
232
214
|
def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
|
|
233
215
|
return cls(
|
|
@@ -236,14 +218,14 @@ class ExperimentRun:
|
|
|
236
218
|
experiment_id=obj["experiment_id"],
|
|
237
219
|
dataset_example_id=obj["dataset_example_id"],
|
|
238
220
|
repetition_number=obj.get("repetition_number") or 1,
|
|
239
|
-
|
|
221
|
+
output=_make_read_only(obj.get("output")),
|
|
240
222
|
error=obj.get("error"),
|
|
241
223
|
id=obj["id"],
|
|
242
224
|
trace_id=obj.get("trace_id"),
|
|
243
225
|
)
|
|
244
226
|
|
|
245
227
|
def __post_init__(self) -> None:
|
|
246
|
-
if bool(self.
|
|
228
|
+
if bool(self.output) == bool(self.error):
|
|
247
229
|
ValueError("Must specify exactly one of experiment_run_output or error")
|
|
248
230
|
|
|
249
231
|
|
|
@@ -381,7 +363,7 @@ class EvaluationSummary(_HasStats):
|
|
|
381
363
|
if df.empty:
|
|
382
364
|
df = pd.DataFrame.from_records(
|
|
383
365
|
[
|
|
384
|
-
{"evaluator": name, "error":
|
|
366
|
+
{"evaluator": name, "error": None, "score": None, "label": None}
|
|
385
367
|
for name in params.eval_names
|
|
386
368
|
]
|
|
387
369
|
)
|
|
@@ -571,7 +553,7 @@ class RanExperiment(Experiment):
|
|
|
571
553
|
{
|
|
572
554
|
"run_id": run.id,
|
|
573
555
|
"error": run.error,
|
|
574
|
-
"output": deepcopy(run.
|
|
556
|
+
"output": deepcopy(run.output),
|
|
575
557
|
"input": deepcopy((ex := self.dataset.examples[run.dataset_example_id]).input),
|
|
576
558
|
"expected": deepcopy(ex.output),
|
|
577
559
|
"metadata": deepcopy(ex.metadata),
|
|
@@ -28,4 +28,4 @@ class ProjectByNameDataLoader(DataLoader[Key, Result]):
|
|
|
28
28
|
async for project in data:
|
|
29
29
|
projects_by_name[project.name] = project
|
|
30
30
|
|
|
31
|
-
return [projects_by_name
|
|
31
|
+
return [projects_by_name.get(project_name) for project_name in keys]
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from typing import Any, Dict
|
|
3
4
|
|
|
@@ -10,6 +11,7 @@ from strawberry import UNSET
|
|
|
10
11
|
from strawberry.types import Info
|
|
11
12
|
|
|
12
13
|
from phoenix.db import models
|
|
14
|
+
from phoenix.db.helpers import get_eval_trace_ids_for_datasets, get_project_names_for_datasets
|
|
13
15
|
from phoenix.server.api.context import Context
|
|
14
16
|
from phoenix.server.api.helpers.dataset_helpers import (
|
|
15
17
|
get_dataset_example_input,
|
|
@@ -30,6 +32,7 @@ from phoenix.server.api.types.Dataset import Dataset, to_gql_dataset
|
|
|
30
32
|
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
31
33
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
32
34
|
from phoenix.server.api.types.Span import Span
|
|
35
|
+
from phoenix.server.api.utils import delete_projects, delete_traces
|
|
33
36
|
|
|
34
37
|
|
|
35
38
|
@strawberry.type
|
|
@@ -274,21 +277,28 @@ class DatasetMutationMixin:
|
|
|
274
277
|
info: Info[Context, None],
|
|
275
278
|
input: DeleteDatasetInput,
|
|
276
279
|
) -> DatasetMutationPayload:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
+
try:
|
|
281
|
+
dataset_id = from_global_id_with_expected_type(
|
|
282
|
+
global_id=input.dataset_id,
|
|
283
|
+
expected_type_name=Dataset.__name__,
|
|
284
|
+
)
|
|
285
|
+
except ValueError:
|
|
286
|
+
raise ValueError(f"Unknown dataset: {input.dataset_id}")
|
|
287
|
+
project_names_stmt = get_project_names_for_datasets(dataset_id)
|
|
288
|
+
eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
|
|
289
|
+
stmt = (
|
|
290
|
+
delete(models.Dataset).where(models.Dataset.id == dataset_id).returning(models.Dataset)
|
|
280
291
|
)
|
|
281
|
-
|
|
282
292
|
async with info.context.db() as session:
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
293
|
+
project_names = await session.scalars(project_names_stmt)
|
|
294
|
+
eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
|
|
295
|
+
if not (dataset := await session.scalar(stmt)):
|
|
296
|
+
raise ValueError(f"Unknown dataset: {input.dataset_id}")
|
|
297
|
+
await asyncio.gather(
|
|
298
|
+
delete_projects(info.context.db, *project_names),
|
|
299
|
+
delete_traces(info.context.db, *eval_trace_ids),
|
|
300
|
+
return_exceptions=True,
|
|
301
|
+
)
|
|
292
302
|
return DatasetMutationPayload(dataset=to_gql_dataset(dataset))
|
|
293
303
|
|
|
294
304
|
@strawberry.mutation(permission_classes=[IsAuthenticated]) # type: ignore
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from typing import List
|
|
2
3
|
|
|
3
4
|
import strawberry
|
|
@@ -6,11 +7,13 @@ from strawberry.relay import GlobalID
|
|
|
6
7
|
from strawberry.types import Info
|
|
7
8
|
|
|
8
9
|
from phoenix.db import models
|
|
10
|
+
from phoenix.db.helpers import get_eval_trace_ids_for_experiments, get_project_names_for_experiments
|
|
9
11
|
from phoenix.server.api.context import Context
|
|
10
12
|
from phoenix.server.api.input_types.DeleteExperimentsInput import DeleteExperimentsInput
|
|
11
13
|
from phoenix.server.api.mutations.auth import IsAuthenticated
|
|
12
14
|
from phoenix.server.api.types.Experiment import Experiment, to_gql_experiment
|
|
13
15
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
16
|
+
from phoenix.server.api.utils import delete_projects, delete_traces
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
@strawberry.type
|
|
@@ -30,7 +33,11 @@ class ExperimentMutationMixin:
|
|
|
30
33
|
from_global_id_with_expected_type(experiment_id, Experiment.__name__)
|
|
31
34
|
for experiment_id in input.experiment_ids
|
|
32
35
|
]
|
|
36
|
+
project_names_stmt = get_project_names_for_experiments(*experiment_ids)
|
|
37
|
+
eval_trace_ids_stmt = get_eval_trace_ids_for_experiments(*experiment_ids)
|
|
33
38
|
async with info.context.db() as session:
|
|
39
|
+
project_names = await session.scalars(project_names_stmt)
|
|
40
|
+
eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
|
|
34
41
|
savepoint = await session.begin_nested()
|
|
35
42
|
experiments = {
|
|
36
43
|
experiment.id: experiment
|
|
@@ -54,10 +61,11 @@ class ExperimentMutationMixin:
|
|
|
54
61
|
]
|
|
55
62
|
)
|
|
56
63
|
)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
64
|
+
await asyncio.gather(
|
|
65
|
+
delete_projects(info.context.db, *project_names),
|
|
66
|
+
delete_traces(info.context.db, *eval_trace_ids),
|
|
67
|
+
return_exceptions=True,
|
|
68
|
+
)
|
|
61
69
|
return ExperimentMutationPayload(
|
|
62
70
|
experiments=[
|
|
63
71
|
to_gql_experiment(experiments[experiment_id]) for experiment_id in experiment_ids
|
|
@@ -44,6 +44,7 @@ V1_ROUTES = [
|
|
|
44
44
|
Route("/v1/spans", spans.get_spans_handler, methods=["GET"]),
|
|
45
45
|
Route("/v1/datasets/upload", datasets.post_datasets_upload, methods=["POST"]),
|
|
46
46
|
Route("/v1/datasets", datasets.list_datasets, methods=["GET"]),
|
|
47
|
+
Route("/v1/datasets/{id:str}", datasets.delete_dataset_by_id, methods=["DELETE"]),
|
|
47
48
|
Route("/v1/datasets/{id:str}", datasets.get_dataset_by_id, methods=["GET"]),
|
|
48
49
|
Route("/v1/datasets/{id:str}/csv", datasets.get_dataset_csv, methods=["GET"]),
|
|
49
50
|
Route(
|
|
@@ -26,13 +26,15 @@ from typing import (
|
|
|
26
26
|
|
|
27
27
|
import pandas as pd
|
|
28
28
|
import pyarrow as pa
|
|
29
|
-
from sqlalchemy import and_, func, select
|
|
29
|
+
from sqlalchemy import and_, delete, func, select
|
|
30
30
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
31
|
+
from starlette.background import BackgroundTasks
|
|
31
32
|
from starlette.concurrency import run_in_threadpool
|
|
32
33
|
from starlette.datastructures import FormData, UploadFile
|
|
33
34
|
from starlette.requests import Request
|
|
34
35
|
from starlette.responses import JSONResponse, Response
|
|
35
36
|
from starlette.status import (
|
|
37
|
+
HTTP_204_NO_CONTENT,
|
|
36
38
|
HTTP_404_NOT_FOUND,
|
|
37
39
|
HTTP_409_CONFLICT,
|
|
38
40
|
HTTP_422_UNPROCESSABLE_ENTITY,
|
|
@@ -42,6 +44,7 @@ from strawberry.relay import GlobalID
|
|
|
42
44
|
from typing_extensions import TypeAlias, assert_never
|
|
43
45
|
|
|
44
46
|
from phoenix.db import models
|
|
47
|
+
from phoenix.db.helpers import get_eval_trace_ids_for_datasets, get_project_names_for_datasets
|
|
45
48
|
from phoenix.db.insertion.dataset import (
|
|
46
49
|
DatasetAction,
|
|
47
50
|
DatasetExampleAdditionEvent,
|
|
@@ -52,6 +55,7 @@ from phoenix.server.api.types.Dataset import Dataset
|
|
|
52
55
|
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
53
56
|
from phoenix.server.api.types.DatasetVersion import DatasetVersion
|
|
54
57
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
58
|
+
from phoenix.server.api.utils import delete_projects, delete_traces
|
|
55
59
|
|
|
56
60
|
logger = logging.getLogger(__name__)
|
|
57
61
|
|
|
@@ -163,6 +167,60 @@ async def list_datasets(request: Request) -> Response:
|
|
|
163
167
|
return JSONResponse(content={"next_cursor": next_cursor, "data": data})
|
|
164
168
|
|
|
165
169
|
|
|
170
|
+
async def delete_dataset_by_id(request: Request) -> Response:
|
|
171
|
+
"""
|
|
172
|
+
summary: Delete dataset by ID
|
|
173
|
+
operationId: deleteDatasetById
|
|
174
|
+
tags:
|
|
175
|
+
- datasets
|
|
176
|
+
parameters:
|
|
177
|
+
- in: path
|
|
178
|
+
name: id
|
|
179
|
+
required: true
|
|
180
|
+
schema:
|
|
181
|
+
type: string
|
|
182
|
+
responses:
|
|
183
|
+
204:
|
|
184
|
+
description: Success
|
|
185
|
+
403:
|
|
186
|
+
description: Forbidden
|
|
187
|
+
404:
|
|
188
|
+
description: Dataset not found
|
|
189
|
+
422:
|
|
190
|
+
description: Dataset ID is invalid
|
|
191
|
+
"""
|
|
192
|
+
if id_ := request.path_params.get("id"):
|
|
193
|
+
try:
|
|
194
|
+
dataset_id = from_global_id_with_expected_type(
|
|
195
|
+
GlobalID.from_id(id_),
|
|
196
|
+
Dataset.__name__,
|
|
197
|
+
)
|
|
198
|
+
except ValueError:
|
|
199
|
+
return Response(
|
|
200
|
+
content=f"Invalid Dataset ID: {id_}",
|
|
201
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
202
|
+
)
|
|
203
|
+
else:
|
|
204
|
+
return Response(
|
|
205
|
+
content="Missing Dataset ID",
|
|
206
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
207
|
+
)
|
|
208
|
+
project_names_stmt = get_project_names_for_datasets(dataset_id)
|
|
209
|
+
eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
|
|
210
|
+
stmt = (
|
|
211
|
+
delete(models.Dataset).where(models.Dataset.id == dataset_id).returning(models.Dataset.id)
|
|
212
|
+
)
|
|
213
|
+
async with request.app.state.db() as session:
|
|
214
|
+
project_names = await session.scalars(project_names_stmt)
|
|
215
|
+
eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
|
|
216
|
+
if (await session.scalar(stmt)) is None:
|
|
217
|
+
return Response(content="Dataset does not exist", status_code=HTTP_404_NOT_FOUND)
|
|
218
|
+
tasks = BackgroundTasks()
|
|
219
|
+
tasks.add_task(delete_projects, request.app.state.db, *project_names)
|
|
220
|
+
tasks.add_task(delete_traces, request.app.state.db, *eval_trace_ids)
|
|
221
|
+
return Response(status_code=HTTP_204_NO_CONTENT, background=tasks)
|
|
222
|
+
|
|
223
|
+
|
|
166
224
|
async def get_dataset_by_id(request: Request) -> Response:
|
|
167
225
|
"""
|
|
168
226
|
summary: Get dataset by ID
|
|
@@ -7,9 +7,8 @@ from starlette.status import HTTP_404_NOT_FOUND
|
|
|
7
7
|
from strawberry.relay import GlobalID
|
|
8
8
|
|
|
9
9
|
from phoenix.db import models
|
|
10
|
-
from phoenix.
|
|
10
|
+
from phoenix.db.models import ExperimentRunOutput
|
|
11
11
|
from phoenix.server.api.types.node import from_global_id_with_expected_type
|
|
12
|
-
from phoenix.utilities.json import jsonify
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
async def create_experiment_run(request: Request) -> Response:
|
|
@@ -39,9 +38,8 @@ async def create_experiment_run(request: Request) -> Response:
|
|
|
39
38
|
trace_id:
|
|
40
39
|
type: string
|
|
41
40
|
description: Optional trace ID for tracking
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
description: The output of the experiment run
|
|
41
|
+
output:
|
|
42
|
+
description: The output of the experiment task
|
|
45
43
|
repetition_number:
|
|
46
44
|
type: integer
|
|
47
45
|
description: The repetition number of the experiment run
|
|
@@ -101,7 +99,7 @@ async def create_experiment_run(request: Request) -> Response:
|
|
|
101
99
|
)
|
|
102
100
|
|
|
103
101
|
trace_id = payload.get("trace_id", None)
|
|
104
|
-
|
|
102
|
+
task_output = payload["output"]
|
|
105
103
|
repetition_number = payload["repetition_number"]
|
|
106
104
|
start_time = payload["start_time"]
|
|
107
105
|
end_time = payload["end_time"]
|
|
@@ -112,7 +110,7 @@ async def create_experiment_run(request: Request) -> Response:
|
|
|
112
110
|
experiment_id=experiment_id,
|
|
113
111
|
dataset_example_id=dataset_example_id,
|
|
114
112
|
trace_id=trace_id,
|
|
115
|
-
output=
|
|
113
|
+
output=ExperimentRunOutput(task_output=task_output),
|
|
116
114
|
repetition_number=repetition_number,
|
|
117
115
|
start_time=datetime.fromisoformat(start_time),
|
|
118
116
|
end_time=datetime.fromisoformat(end_time),
|
|
@@ -170,9 +168,8 @@ async def list_experiment_runs(request: Request) -> Response:
|
|
|
170
168
|
type: string
|
|
171
169
|
format: date-time
|
|
172
170
|
description: The end time of the experiment run in ISO format
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
description: The output of the experiment run
|
|
171
|
+
output:
|
|
172
|
+
description: The output of the experiment task
|
|
176
173
|
error:
|
|
177
174
|
type: string
|
|
178
175
|
description: Error message if the experiment run encountered an error
|
|
@@ -205,16 +202,16 @@ async def list_experiment_runs(request: Request) -> Response:
|
|
|
205
202
|
experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
|
|
206
203
|
example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
|
|
207
204
|
runs.append(
|
|
208
|
-
|
|
209
|
-
start_time=exp_run.start_time,
|
|
210
|
-
end_time=exp_run.end_time,
|
|
205
|
+
dict(
|
|
206
|
+
start_time=exp_run.start_time.isoformat(),
|
|
207
|
+
end_time=exp_run.end_time.isoformat(),
|
|
211
208
|
experiment_id=str(experiment_gid),
|
|
212
209
|
dataset_example_id=str(example_gid),
|
|
213
210
|
repetition_number=exp_run.repetition_number,
|
|
214
|
-
|
|
211
|
+
output=exp_run.output.get("task_output"),
|
|
215
212
|
error=exp_run.error,
|
|
216
213
|
id=str(run_gid),
|
|
217
214
|
trace_id=exp_run.trace_id,
|
|
218
215
|
)
|
|
219
216
|
)
|
|
220
|
-
return JSONResponse(content={"data":
|
|
217
|
+
return JSONResponse(content={"data": runs}, status_code=200)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import AsyncContextManager, Callable, List
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import delete
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from phoenix.db import models
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def delete_projects(
|
|
10
|
+
db: Callable[[], AsyncContextManager[AsyncSession]],
|
|
11
|
+
*project_names: str,
|
|
12
|
+
) -> List[int]:
|
|
13
|
+
if not project_names:
|
|
14
|
+
return []
|
|
15
|
+
stmt = (
|
|
16
|
+
delete(models.Project)
|
|
17
|
+
.where(models.Project.name.in_(set(project_names)))
|
|
18
|
+
.returning(models.Project.id)
|
|
19
|
+
)
|
|
20
|
+
async with db() as session:
|
|
21
|
+
return list(await session.scalars(stmt))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def delete_traces(
|
|
25
|
+
db: Callable[[], AsyncContextManager[AsyncSession]],
|
|
26
|
+
*trace_ids: str,
|
|
27
|
+
) -> List[int]:
|
|
28
|
+
if not trace_ids:
|
|
29
|
+
return []
|
|
30
|
+
stmt = (
|
|
31
|
+
delete(models.Trace)
|
|
32
|
+
.where(models.Trace.trace_id.in_(set(trace_ids)))
|
|
33
|
+
.returning(models.Trace.id)
|
|
34
|
+
)
|
|
35
|
+
async with db() as session:
|
|
36
|
+
return list(await session.scalars(stmt))
|