arize-phoenix 4.7.1__py3-none-any.whl → 4.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.7.1
3
+ Version: 4.8.0
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -5,7 +5,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
5
5
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
7
7
  phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
8
- phoenix/version.py,sha256=7suIQcI50zQCokBqNExDKrJDN157VogyLAp_ZZ4xqCM,22
8
+ phoenix/version.py,sha256=NsvNfym-LKhyklkaZu-iwzlvcIWUdbdxxglpRMhC1TU,22
9
9
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
11
11
  phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
@@ -16,7 +16,7 @@ phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
16
16
  phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
17
17
  phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,11286
18
18
  phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
19
- phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
19
+ phoenix/db/helpers.py,sha256=mTBhPzdy_aU9gD7hNzUZJkAnV77ko5CdaXyoWH3snPA,2982
20
20
  phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
21
21
  phoenix/db/models.py,sha256=7DBWbxY3cx3ve2P1I0kkDKXzlt04zEFJuRPJWsVpH-I,20422
22
22
  phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -31,9 +31,9 @@ phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgC
31
31
  phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
32
32
  phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
33
33
  phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
34
- phoenix/experiments/functions.py,sha256=m4PlpkTLoZxmt7ZV2KqwcWY5q18Yu3ZTrcEzAqLN-o4,31656
34
+ phoenix/experiments/functions.py,sha256=2ZWCcIs0dh_UNB21t0J_PzWcYhu_32Ai3J6sEMjbBGE,32310
35
35
  phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
36
- phoenix/experiments/types.py,sha256=HQ9k7dUTlOLZl0iGtZOnToUtZBYGos6afwvO44subAM,24035
36
+ phoenix/experiments/types.py,sha256=VuvDCcvUGeHIQuXS_xpz7Jq5xHdt3qu-O_C7IQ3DvF8,23397
37
37
  phoenix/experiments/utils.py,sha256=wLu5Kvt1b4a8rGPRWq5G8RQ9XSiV8fCIVm51zWBI3-g,758
38
38
  phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
39
39
  phoenix/experiments/evaluators/base.py,sha256=ani0F2TN7DMN0KLhV89LIr9-W4g-ccEl2YQJgfp44Js,5325
@@ -71,6 +71,7 @@ phoenix/server/api/context.py,sha256=4jcy203Gtx38399FP21iU3HmFsq-50EKFJlX4IW2Los
71
71
  phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
72
72
  phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
73
73
  phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
74
+ phoenix/server/api/utils.py,sha256=Y1lGu8J8r8BSBX9OzffgewI8QMziovbG-ePDvZrrwGI,949
74
75
  phoenix/server/api/dataloaders/__init__.py,sha256=qehXL37vGdw7v5PFs3kbZVIuhuzrVNVeZACDQjYpwyo,4847
75
76
  phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=RiO0AKC6Y5byafsV0zTJEIOt8Nudjte73f1T78cBe1k,1817
76
77
  phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
@@ -85,7 +86,7 @@ phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=wxHv08aZELJ91KTjH
85
86
  phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
86
87
  phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
87
88
  phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
88
- phoenix/server/api/dataloaders/project_by_name.py,sha256=ziYp8fZGy2duD-f_oTqNLwbrGmBQFesqlTEHBDMtZlw,1170
89
+ phoenix/server/api/dataloaders/project_by_name.py,sha256=W4q-ddeVnja2DNwgg5l8mV2GNADNJf1CTXNcJaBWdfE,1165
89
90
  phoenix/server/api/dataloaders/record_counts.py,sha256=mp3KlhwFw-Iy7o6bFxtJKC6B5kGinPIh5PxxNkrxf8o,4283
90
91
  phoenix/server/api/dataloaders/span_descendants.py,sha256=djqXXwupWaXkFdscoy-iILYijuxlqr8hJcv6JawsV6s,2162
91
92
  phoenix/server/api/dataloaders/span_evaluations.py,sha256=quXGyj_OYvHrcWfzXlYIv7C1OCZiL1K7cWU-YEAlsNA,1316
@@ -121,8 +122,8 @@ phoenix/server/api/input_types/TimeRange.py,sha256=yzx-gxj8mDeGLft1FzU_x1MVEgIG5
121
122
  phoenix/server/api/input_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
123
  phoenix/server/api/mutations/__init__.py,sha256=Jcz-pM6SklhEGKcjA7AIB2WJXGhnxGsghrIq131kyDo,502
123
124
  phoenix/server/api/mutations/auth.py,sha256=vPRFoj7J6PV6QeODewG4K0PhoOebS5AfMRpbi_wuhyQ,311
124
- phoenix/server/api/mutations/dataset_mutations.py,sha256=Zp2sFWyGyubILUQboR6bafRWafsfeRO2ffUWnkLlfgI,22532
125
- phoenix/server/api/mutations/experiment_mutations.py,sha256=Fw_yEdITGJ6A33M5JZ-2YnBTDoBqZUUFON6vy8JoVjE,2569
125
+ phoenix/server/api/mutations/dataset_mutations.py,sha256=CuKhxsYfvwVcdN_9EXhKxB6444BQfObzKzzyfAeg-n8,23199
126
+ phoenix/server/api/mutations/experiment_mutations.py,sha256=vV2lbJ7ccXZqe-LY7nXx6QxWqhKQE4UNZAFcML-KQ8I,3011
126
127
  phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
127
128
  phoenix/server/api/mutations/project_mutations.py,sha256=3SVDCZqxB0Iv60cOwBL8c-rY3QUUPs8PXbp-C_K1mWY,2267
128
129
  phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -130,12 +131,12 @@ phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjG
130
131
  phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
131
132
  phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
133
  phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
133
- phoenix/server/api/routers/v1/__init__.py,sha256=vvdpUa2LJPWEg8HbvDm_ANkBAwubPIFPbbHi7elOUws,2808
134
+ phoenix/server/api/routers/v1/__init__.py,sha256=Ir5fsO6gQXW58HGm7s2sMUq0vya7mfcWneLcLJy6_q8,2895
134
135
  phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycwwGt3LnSyyGdMLKC5VKoGQ,6690
135
- phoenix/server/api/routers/v1/datasets.py,sha256=f2gLG-geu-_wtEw4mKSzNWK2cFb5TYOyRL3tQ7Fl7Es,31544
136
+ phoenix/server/api/routers/v1/datasets.py,sha256=r0WcNxF8SKVa3-4rrTIg4Andwr4NmRmW1ybpKuxR9qw,33639
136
137
  phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
137
138
  phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=H_psVyuGUQImo0oxdEAKAMQ-oyVwkVIq5yaMHzHIiPc,5455
138
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=u4Kgz1i5AffmCF2LHtC9Oo1hlGscZ3Dm8JlTRhM55yU,8307
139
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=jy4SynmzdtQMoUzlowmG6wsVU14SsLAzfcW4JOhXjeQ,8154
139
140
  phoenix/server/api/routers/v1/experiments.py,sha256=cG-LyIGRdB1jVTL42Xi2__nsXibVe9Up7m3hFiTIYYY,11886
140
141
  phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
141
142
  phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
@@ -202,7 +203,7 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
202
203
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
203
204
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
204
205
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
205
- phoenix/server/static/index.js,sha256=tmyBBBaYS-HaFhd0dqqXiIsy5Lnvf-klOxQ0gRHAoDM,3521766
206
+ phoenix/server/static/index.js,sha256=sUkm_imwx5uXpCQxQEPmX-axDr094HE1wGdBznF8J_E,3524949
206
207
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
207
208
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
208
209
  phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
@@ -247,8 +248,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
247
248
  phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
248
249
  phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
249
250
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
250
- arize_phoenix-4.7.1.dist-info/METADATA,sha256=4l6GZ3dH0ALP9ifyZ7mSk32mjC_i7rWj_ahg2Mrvf-c,11451
251
- arize_phoenix-4.7.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
252
- arize_phoenix-4.7.1.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
253
- arize_phoenix-4.7.1.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
254
- arize_phoenix-4.7.1.dist-info/RECORD,,
251
+ arize_phoenix-4.8.0.dist-info/METADATA,sha256=UNwc2BbfIMe5RbFT-IuzfKQnX21dFA4zY5EUXbo5bC8,11451
252
+ arize_phoenix-4.8.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
253
+ arize_phoenix-4.8.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
254
+ arize_phoenix-4.8.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
255
+ arize_phoenix-4.8.0.dist-info/RECORD,,
phoenix/db/helpers.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from enum import Enum
2
- from typing import Any
2
+ from typing import Any, Optional, Tuple
3
3
 
4
4
  from openinference.semconv.trace import (
5
5
  OpenInferenceSpanKindValues,
6
6
  RerankerAttributes,
7
7
  SpanAttributes,
8
8
  )
9
- from sqlalchemy import Integer, SQLColumnExpression, case, func
9
+ from sqlalchemy import Integer, Select, SQLColumnExpression, case, distinct, func, select
10
10
  from typing_extensions import assert_never
11
11
 
12
12
  from phoenix.db import models
@@ -45,3 +45,38 @@ def num_docs_col(dialect: SupportedSQLDialect) -> SQLColumnExpression[Integer]:
45
45
 
46
46
  _RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS.split(".")
47
47
  _RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS.split(".")
48
+
49
+
50
+ def get_eval_trace_ids_for_datasets(*dataset_ids: int) -> Select[Tuple[Optional[str]]]:
51
+ return (
52
+ select(distinct(models.ExperimentRunAnnotation.trace_id))
53
+ .join(models.ExperimentRun)
54
+ .join_from(models.ExperimentRun, models.Experiment)
55
+ .where(models.Experiment.dataset_id.in_(set(dataset_ids)))
56
+ .where(models.ExperimentRunAnnotation.trace_id.isnot(None))
57
+ )
58
+
59
+
60
+ def get_project_names_for_datasets(*dataset_ids: int) -> Select[Tuple[Optional[str]]]:
61
+ return (
62
+ select(distinct(models.Experiment.project_name))
63
+ .where(models.Experiment.dataset_id.in_(set(dataset_ids)))
64
+ .where(models.Experiment.project_name.isnot(None))
65
+ )
66
+
67
+
68
+ def get_eval_trace_ids_for_experiments(*experiment_ids: int) -> Select[Tuple[Optional[str]]]:
69
+ return (
70
+ select(distinct(models.ExperimentRunAnnotation.trace_id))
71
+ .join(models.ExperimentRun)
72
+ .where(models.ExperimentRun.experiment_id.in_(set(experiment_ids)))
73
+ .where(models.ExperimentRunAnnotation.trace_id.isnot(None))
74
+ )
75
+
76
+
77
+ def get_project_names_for_experiments(*experiment_ids: int) -> Select[Tuple[Optional[str]]]:
78
+ return (
79
+ select(distinct(models.Experiment.project_name))
80
+ .where(models.Experiment.id.in_(set(experiment_ids)))
81
+ .where(models.Experiment.project_name.isnot(None))
82
+ )
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import functools
2
3
  import inspect
3
4
  import json
@@ -62,7 +63,6 @@ from phoenix.experiments.types import (
62
63
  ExperimentEvaluationRun,
63
64
  ExperimentParameters,
64
65
  ExperimentRun,
65
- ExperimentRunOutput,
66
66
  ExperimentTask,
67
67
  RanExperiment,
68
68
  TaskSummary,
@@ -284,7 +284,7 @@ def run_experiment(
284
284
  experiment_id=experiment.id,
285
285
  dataset_example_id=example.id,
286
286
  repetition_number=repetition_number,
287
- experiment_run_output=ExperimentRunOutput(task_output=output),
287
+ output=output,
288
288
  error=repr(error) if error else None,
289
289
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
290
290
  )
@@ -345,13 +345,21 @@ def run_experiment(
345
345
  experiment_id=experiment.id,
346
346
  dataset_example_id=example.id,
347
347
  repetition_number=repetition_number,
348
- experiment_run_output=ExperimentRunOutput(task_output=output),
348
+ output=output,
349
349
  error=repr(error) if error else None,
350
350
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
351
351
  )
352
352
  if not dry_run:
353
- resp = await async_client.post(
354
- f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
353
+ # Below is a workaround to avoid timeout errors sometimes
354
+ # encountered when the task is a synchronous function that
355
+ # blocks for too long.
356
+ resp = await asyncio.get_running_loop().run_in_executor(
357
+ None,
358
+ functools.partial(
359
+ sync_client.post,
360
+ url=f"/v1/experiments/{experiment.id}/runs",
361
+ json=jsonify(exp_run),
362
+ ),
355
363
  )
356
364
  resp.raise_for_status()
357
365
  exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -393,7 +401,7 @@ def run_experiment(
393
401
  ran_experiment.__init__( # type: ignore[misc]
394
402
  params=params,
395
403
  dataset=dataset,
396
- runs={r.id: r for r in task_runs},
404
+ runs={r.id: r for r in task_runs if r is not None},
397
405
  task_summary=task_summary,
398
406
  **_asdict(experiment),
399
407
  )
@@ -498,7 +506,7 @@ def evaluate_experiment(
498
506
  stack.enter_context(capture_spans(resource))
499
507
  try:
500
508
  result = evaluator.evaluate(
501
- output=experiment_run.output,
509
+ output=deepcopy(experiment_run.output),
502
510
  expected=example.output,
503
511
  reference=example.output,
504
512
  input=example.input,
@@ -550,7 +558,7 @@ def evaluate_experiment(
550
558
  stack.enter_context(capture_spans(resource))
551
559
  try:
552
560
  result = await evaluator.async_evaluate(
553
- output=experiment_run.output,
561
+ output=deepcopy(experiment_run.output),
554
562
  expected=example.output,
555
563
  reference=example.output,
556
564
  input=example.input,
@@ -582,7 +590,17 @@ def evaluate_experiment(
582
590
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
583
591
  )
584
592
  if not dry_run:
585
- resp = await async_client.post("/v1/experiment_evaluations", json=jsonify(eval_run))
593
+ # Below is a workaround to avoid timeout errors sometimes
594
+ # encountered when the evaluator is a synchronous function
595
+ # that blocks for too long.
596
+ resp = await asyncio.get_running_loop().run_in_executor(
597
+ None,
598
+ functools.partial(
599
+ sync_client.post,
600
+ url="/v1/experiment_evaluations",
601
+ json=jsonify(eval_run),
602
+ ),
603
+ )
586
604
  resp.raise_for_status()
587
605
  eval_run = replace(eval_run, id=resp.json()["data"]["id"])
588
606
  return eval_run
@@ -198,20 +198,6 @@ class Experiment:
198
198
  )
199
199
 
200
200
 
201
- @dataclass(frozen=True)
202
- class ExperimentRunOutput:
203
- task_output: TaskOutput
204
-
205
- def __post_init__(self) -> None:
206
- object.__setattr__(self, "task_output", _make_read_only(self.task_output))
207
-
208
- @classmethod
209
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> ExperimentRunOutput:
210
- if not obj:
211
- return cls(task_output=None)
212
- return cls(task_output=obj["task_output"])
213
-
214
-
215
201
  @dataclass(frozen=True)
216
202
  class ExperimentRun:
217
203
  start_time: datetime
@@ -219,15 +205,11 @@ class ExperimentRun:
219
205
  experiment_id: ExperimentId
220
206
  dataset_example_id: ExampleId
221
207
  repetition_number: RepetitionNumber
222
- experiment_run_output: ExperimentRunOutput
208
+ output: JSONSerializable
223
209
  error: Optional[str] = None
224
210
  id: ExperimentRunId = field(default_factory=_dry_run_id)
225
211
  trace_id: Optional[TraceId] = None
226
212
 
227
- @property
228
- def output(self) -> Optional[TaskOutput]:
229
- return deepcopy(self.experiment_run_output.task_output)
230
-
231
213
  @classmethod
232
214
  def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
233
215
  return cls(
@@ -236,14 +218,14 @@ class ExperimentRun:
236
218
  experiment_id=obj["experiment_id"],
237
219
  dataset_example_id=obj["dataset_example_id"],
238
220
  repetition_number=obj.get("repetition_number") or 1,
239
- experiment_run_output=ExperimentRunOutput.from_dict(obj["experiment_run_output"]),
221
+ output=_make_read_only(obj.get("output")),
240
222
  error=obj.get("error"),
241
223
  id=obj["id"],
242
224
  trace_id=obj.get("trace_id"),
243
225
  )
244
226
 
245
227
  def __post_init__(self) -> None:
246
- if bool(self.experiment_run_output) == bool(self.error):
228
+ if bool(self.output) == bool(self.error):
247
229
  ValueError("Must specify exactly one of experiment_run_output or error")
248
230
 
249
231
 
@@ -381,7 +363,7 @@ class EvaluationSummary(_HasStats):
381
363
  if df.empty:
382
364
  df = pd.DataFrame.from_records(
383
365
  [
384
- {"evaluator": name, "error": True, "score": None, "label": None}
366
+ {"evaluator": name, "error": None, "score": None, "label": None}
385
367
  for name in params.eval_names
386
368
  ]
387
369
  )
@@ -571,7 +553,7 @@ class RanExperiment(Experiment):
571
553
  {
572
554
  "run_id": run.id,
573
555
  "error": run.error,
574
- "output": deepcopy(run.experiment_run_output.task_output),
556
+ "output": deepcopy(run.output),
575
557
  "input": deepcopy((ex := self.dataset.examples[run.dataset_example_id]).input),
576
558
  "expected": deepcopy(ex.output),
577
559
  "metadata": deepcopy(ex.metadata),
@@ -28,4 +28,4 @@ class ProjectByNameDataLoader(DataLoader[Key, Result]):
28
28
  async for project in data:
29
29
  projects_by_name[project.name] = project
30
30
 
31
- return [projects_by_name[project_name] for project_name in project_names]
31
+ return [projects_by_name.get(project_name) for project_name in keys]
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  from datetime import datetime
2
3
  from typing import Any, Dict
3
4
 
@@ -10,6 +11,7 @@ from strawberry import UNSET
10
11
  from strawberry.types import Info
11
12
 
12
13
  from phoenix.db import models
14
+ from phoenix.db.helpers import get_eval_trace_ids_for_datasets, get_project_names_for_datasets
13
15
  from phoenix.server.api.context import Context
14
16
  from phoenix.server.api.helpers.dataset_helpers import (
15
17
  get_dataset_example_input,
@@ -30,6 +32,7 @@ from phoenix.server.api.types.Dataset import Dataset, to_gql_dataset
30
32
  from phoenix.server.api.types.DatasetExample import DatasetExample
31
33
  from phoenix.server.api.types.node import from_global_id_with_expected_type
32
34
  from phoenix.server.api.types.Span import Span
35
+ from phoenix.server.api.utils import delete_projects, delete_traces
33
36
 
34
37
 
35
38
  @strawberry.type
@@ -274,21 +277,28 @@ class DatasetMutationMixin:
274
277
  info: Info[Context, None],
275
278
  input: DeleteDatasetInput,
276
279
  ) -> DatasetMutationPayload:
277
- dataset_id = input.dataset_id
278
- dataset_rowid = from_global_id_with_expected_type(
279
- global_id=dataset_id, expected_type_name=Dataset.__name__
280
+ try:
281
+ dataset_id = from_global_id_with_expected_type(
282
+ global_id=input.dataset_id,
283
+ expected_type_name=Dataset.__name__,
284
+ )
285
+ except ValueError:
286
+ raise ValueError(f"Unknown dataset: {input.dataset_id}")
287
+ project_names_stmt = get_project_names_for_datasets(dataset_id)
288
+ eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
289
+ stmt = (
290
+ delete(models.Dataset).where(models.Dataset.id == dataset_id).returning(models.Dataset)
280
291
  )
281
-
282
292
  async with info.context.db() as session:
283
- delete_result = await session.execute(
284
- delete(models.Dataset)
285
- .where(models.Dataset.id == dataset_rowid)
286
- .returning(models.Dataset)
287
- )
288
- if not (datasets := delete_result.first()):
289
- raise ValueError(f"Unknown dataset: {dataset_id}")
290
-
291
- dataset = datasets[0]
293
+ project_names = await session.scalars(project_names_stmt)
294
+ eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
295
+ if not (dataset := await session.scalar(stmt)):
296
+ raise ValueError(f"Unknown dataset: {input.dataset_id}")
297
+ await asyncio.gather(
298
+ delete_projects(info.context.db, *project_names),
299
+ delete_traces(info.context.db, *eval_trace_ids),
300
+ return_exceptions=True,
301
+ )
292
302
  return DatasetMutationPayload(dataset=to_gql_dataset(dataset))
293
303
 
294
304
  @strawberry.mutation(permission_classes=[IsAuthenticated]) # type: ignore
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  from typing import List
2
3
 
3
4
  import strawberry
@@ -6,11 +7,13 @@ from strawberry.relay import GlobalID
6
7
  from strawberry.types import Info
7
8
 
8
9
  from phoenix.db import models
10
+ from phoenix.db.helpers import get_eval_trace_ids_for_experiments, get_project_names_for_experiments
9
11
  from phoenix.server.api.context import Context
10
12
  from phoenix.server.api.input_types.DeleteExperimentsInput import DeleteExperimentsInput
11
13
  from phoenix.server.api.mutations.auth import IsAuthenticated
12
14
  from phoenix.server.api.types.Experiment import Experiment, to_gql_experiment
13
15
  from phoenix.server.api.types.node import from_global_id_with_expected_type
16
+ from phoenix.server.api.utils import delete_projects, delete_traces
14
17
 
15
18
 
16
19
  @strawberry.type
@@ -30,7 +33,11 @@ class ExperimentMutationMixin:
30
33
  from_global_id_with_expected_type(experiment_id, Experiment.__name__)
31
34
  for experiment_id in input.experiment_ids
32
35
  ]
36
+ project_names_stmt = get_project_names_for_experiments(*experiment_ids)
37
+ eval_trace_ids_stmt = get_eval_trace_ids_for_experiments(*experiment_ids)
33
38
  async with info.context.db() as session:
39
+ project_names = await session.scalars(project_names_stmt)
40
+ eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
34
41
  savepoint = await session.begin_nested()
35
42
  experiments = {
36
43
  experiment.id: experiment
@@ -54,10 +61,11 @@ class ExperimentMutationMixin:
54
61
  ]
55
62
  )
56
63
  )
57
- if project_names := set(filter(bool, (e.project_name for e in experiments.values()))):
58
- await session.execute(
59
- delete(models.Project).where(models.Project.name.in_(project_names))
60
- )
64
+ await asyncio.gather(
65
+ delete_projects(info.context.db, *project_names),
66
+ delete_traces(info.context.db, *eval_trace_ids),
67
+ return_exceptions=True,
68
+ )
61
69
  return ExperimentMutationPayload(
62
70
  experiments=[
63
71
  to_gql_experiment(experiments[experiment_id]) for experiment_id in experiment_ids
@@ -44,6 +44,7 @@ V1_ROUTES = [
44
44
  Route("/v1/spans", spans.get_spans_handler, methods=["GET"]),
45
45
  Route("/v1/datasets/upload", datasets.post_datasets_upload, methods=["POST"]),
46
46
  Route("/v1/datasets", datasets.list_datasets, methods=["GET"]),
47
+ Route("/v1/datasets/{id:str}", datasets.delete_dataset_by_id, methods=["DELETE"]),
47
48
  Route("/v1/datasets/{id:str}", datasets.get_dataset_by_id, methods=["GET"]),
48
49
  Route("/v1/datasets/{id:str}/csv", datasets.get_dataset_csv, methods=["GET"]),
49
50
  Route(
@@ -26,13 +26,15 @@ from typing import (
26
26
 
27
27
  import pandas as pd
28
28
  import pyarrow as pa
29
- from sqlalchemy import and_, func, select
29
+ from sqlalchemy import and_, delete, func, select
30
30
  from sqlalchemy.ext.asyncio import AsyncSession
31
+ from starlette.background import BackgroundTasks
31
32
  from starlette.concurrency import run_in_threadpool
32
33
  from starlette.datastructures import FormData, UploadFile
33
34
  from starlette.requests import Request
34
35
  from starlette.responses import JSONResponse, Response
35
36
  from starlette.status import (
37
+ HTTP_204_NO_CONTENT,
36
38
  HTTP_404_NOT_FOUND,
37
39
  HTTP_409_CONFLICT,
38
40
  HTTP_422_UNPROCESSABLE_ENTITY,
@@ -42,6 +44,7 @@ from strawberry.relay import GlobalID
42
44
  from typing_extensions import TypeAlias, assert_never
43
45
 
44
46
  from phoenix.db import models
47
+ from phoenix.db.helpers import get_eval_trace_ids_for_datasets, get_project_names_for_datasets
45
48
  from phoenix.db.insertion.dataset import (
46
49
  DatasetAction,
47
50
  DatasetExampleAdditionEvent,
@@ -52,6 +55,7 @@ from phoenix.server.api.types.Dataset import Dataset
52
55
  from phoenix.server.api.types.DatasetExample import DatasetExample
53
56
  from phoenix.server.api.types.DatasetVersion import DatasetVersion
54
57
  from phoenix.server.api.types.node import from_global_id_with_expected_type
58
+ from phoenix.server.api.utils import delete_projects, delete_traces
55
59
 
56
60
  logger = logging.getLogger(__name__)
57
61
 
@@ -163,6 +167,60 @@ async def list_datasets(request: Request) -> Response:
163
167
  return JSONResponse(content={"next_cursor": next_cursor, "data": data})
164
168
 
165
169
 
170
+ async def delete_dataset_by_id(request: Request) -> Response:
171
+ """
172
+ summary: Delete dataset by ID
173
+ operationId: deleteDatasetById
174
+ tags:
175
+ - datasets
176
+ parameters:
177
+ - in: path
178
+ name: id
179
+ required: true
180
+ schema:
181
+ type: string
182
+ responses:
183
+ 204:
184
+ description: Success
185
+ 403:
186
+ description: Forbidden
187
+ 404:
188
+ description: Dataset not found
189
+ 422:
190
+ description: Dataset ID is invalid
191
+ """
192
+ if id_ := request.path_params.get("id"):
193
+ try:
194
+ dataset_id = from_global_id_with_expected_type(
195
+ GlobalID.from_id(id_),
196
+ Dataset.__name__,
197
+ )
198
+ except ValueError:
199
+ return Response(
200
+ content=f"Invalid Dataset ID: {id_}",
201
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
202
+ )
203
+ else:
204
+ return Response(
205
+ content="Missing Dataset ID",
206
+ status_code=HTTP_422_UNPROCESSABLE_ENTITY,
207
+ )
208
+ project_names_stmt = get_project_names_for_datasets(dataset_id)
209
+ eval_trace_ids_stmt = get_eval_trace_ids_for_datasets(dataset_id)
210
+ stmt = (
211
+ delete(models.Dataset).where(models.Dataset.id == dataset_id).returning(models.Dataset.id)
212
+ )
213
+ async with request.app.state.db() as session:
214
+ project_names = await session.scalars(project_names_stmt)
215
+ eval_trace_ids = await session.scalars(eval_trace_ids_stmt)
216
+ if (await session.scalar(stmt)) is None:
217
+ return Response(content="Dataset does not exist", status_code=HTTP_404_NOT_FOUND)
218
+ tasks = BackgroundTasks()
219
+ tasks.add_task(delete_projects, request.app.state.db, *project_names)
220
+ tasks.add_task(delete_traces, request.app.state.db, *eval_trace_ids)
221
+ return Response(status_code=HTTP_204_NO_CONTENT, background=tasks)
222
+
223
+
166
224
  async def get_dataset_by_id(request: Request) -> Response:
167
225
  """
168
226
  summary: Get dataset by ID
@@ -7,9 +7,8 @@ from starlette.status import HTTP_404_NOT_FOUND
7
7
  from strawberry.relay import GlobalID
8
8
 
9
9
  from phoenix.db import models
10
- from phoenix.experiments.types import ExperimentRun, ExperimentRunOutput
10
+ from phoenix.db.models import ExperimentRunOutput
11
11
  from phoenix.server.api.types.node import from_global_id_with_expected_type
12
- from phoenix.utilities.json import jsonify
13
12
 
14
13
 
15
14
  async def create_experiment_run(request: Request) -> Response:
@@ -39,9 +38,8 @@ async def create_experiment_run(request: Request) -> Response:
39
38
  trace_id:
40
39
  type: string
41
40
  description: Optional trace ID for tracking
42
- experiment_run_output:
43
- type: object
44
- description: The output of the experiment run
41
+ output:
42
+ description: The output of the experiment task
45
43
  repetition_number:
46
44
  type: integer
47
45
  description: The repetition number of the experiment run
@@ -101,7 +99,7 @@ async def create_experiment_run(request: Request) -> Response:
101
99
  )
102
100
 
103
101
  trace_id = payload.get("trace_id", None)
104
- output = payload["experiment_run_output"]
102
+ task_output = payload["output"]
105
103
  repetition_number = payload["repetition_number"]
106
104
  start_time = payload["start_time"]
107
105
  end_time = payload["end_time"]
@@ -112,7 +110,7 @@ async def create_experiment_run(request: Request) -> Response:
112
110
  experiment_id=experiment_id,
113
111
  dataset_example_id=dataset_example_id,
114
112
  trace_id=trace_id,
115
- output=output,
113
+ output=ExperimentRunOutput(task_output=task_output),
116
114
  repetition_number=repetition_number,
117
115
  start_time=datetime.fromisoformat(start_time),
118
116
  end_time=datetime.fromisoformat(end_time),
@@ -170,9 +168,8 @@ async def list_experiment_runs(request: Request) -> Response:
170
168
  type: string
171
169
  format: date-time
172
170
  description: The end time of the experiment run in ISO format
173
- experiment_run_output:
174
- type: object
175
- description: The output of the experiment run
171
+ output:
172
+ description: The output of the experiment task
176
173
  error:
177
174
  type: string
178
175
  description: Error message if the experiment run encountered an error
@@ -205,16 +202,16 @@ async def list_experiment_runs(request: Request) -> Response:
205
202
  experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
206
203
  example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
207
204
  runs.append(
208
- ExperimentRun(
209
- start_time=exp_run.start_time,
210
- end_time=exp_run.end_time,
205
+ dict(
206
+ start_time=exp_run.start_time.isoformat(),
207
+ end_time=exp_run.end_time.isoformat(),
211
208
  experiment_id=str(experiment_gid),
212
209
  dataset_example_id=str(example_gid),
213
210
  repetition_number=exp_run.repetition_number,
214
- experiment_run_output=ExperimentRunOutput.from_dict(exp_run.output),
211
+ output=exp_run.output.get("task_output"),
215
212
  error=exp_run.error,
216
213
  id=str(run_gid),
217
214
  trace_id=exp_run.trace_id,
218
215
  )
219
216
  )
220
- return JSONResponse(content={"data": jsonify(runs)}, status_code=200)
217
+ return JSONResponse(content={"data": runs}, status_code=200)
@@ -0,0 +1,36 @@
1
+ from typing import AsyncContextManager, Callable, List
2
+
3
+ from sqlalchemy import delete
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from phoenix.db import models
7
+
8
+
9
+ async def delete_projects(
10
+ db: Callable[[], AsyncContextManager[AsyncSession]],
11
+ *project_names: str,
12
+ ) -> List[int]:
13
+ if not project_names:
14
+ return []
15
+ stmt = (
16
+ delete(models.Project)
17
+ .where(models.Project.name.in_(set(project_names)))
18
+ .returning(models.Project.id)
19
+ )
20
+ async with db() as session:
21
+ return list(await session.scalars(stmt))
22
+
23
+
24
+ async def delete_traces(
25
+ db: Callable[[], AsyncContextManager[AsyncSession]],
26
+ *trace_ids: str,
27
+ ) -> List[int]:
28
+ if not trace_ids:
29
+ return []
30
+ stmt = (
31
+ delete(models.Trace)
32
+ .where(models.Trace.trace_id.in_(set(trace_ids)))
33
+ .returning(models.Trace.id)
34
+ )
35
+ async with db() as session:
36
+ return list(await session.scalars(stmt))