arize-phoenix 10.0.3__py3-none-any.whl → 10.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (20) hide show
  1. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/METADATA +3 -3
  2. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/RECORD +20 -20
  3. phoenix/experiments/functions.py +132 -18
  4. phoenix/server/api/helpers/playground_clients.py +4 -0
  5. phoenix/server/api/routers/v1/experiment_runs.py +16 -4
  6. phoenix/server/api/routers/v1/spans.py +521 -4
  7. phoenix/server/static/.vite/manifest.json +44 -44
  8. phoenix/server/static/assets/{components-DULKeDfL.js → components-BHJEWQsc.js} +384 -336
  9. phoenix/server/static/assets/{index-E0M82BdE.js → index-CAt4FOOX.js} +6 -2
  10. phoenix/server/static/assets/{pages-Cl0A-0U2.js → pages-kpw8RDmo.js} +535 -545
  11. phoenix/server/static/assets/{vendor-oB4u9zuV.js → vendor-DOUbLVp5.js} +1 -1
  12. phoenix/server/static/assets/{vendor-arizeai-Dy-0mSNw.js → vendor-arizeai-DHqMQzfV.js} +6 -6
  13. phoenix/server/static/assets/{vendor-codemirror-DBtifKNr.js → vendor-codemirror-DWdZV1Is.js} +1 -1
  14. phoenix/server/static/assets/{vendor-recharts-D-T4KPz2.js → vendor-recharts-BfHdRd1Y.js} +1 -1
  15. phoenix/server/static/assets/{vendor-shiki-BMn4O_9F.js → vendor-shiki-CHu75YVL.js} +1 -1
  16. phoenix/version.py +1 -1
  17. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/WHEEL +0 -0
  18. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/entry_points.txt +0 -0
  19. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/licenses/IP_NOTICE +0 -0
  20. {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize-phoenix
3
- Version: 10.0.3
3
+ Version: 10.1.0
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -21,8 +21,8 @@ Requires-Dist: aioitertools
21
21
  Requires-Dist: aiosqlite
22
22
  Requires-Dist: alembic<2,>=1.3.0
23
23
  Requires-Dist: arize-phoenix-client
24
- Requires-Dist: arize-phoenix-evals>=0.13.1
25
- Requires-Dist: arize-phoenix-otel>=0.5.1
24
+ Requires-Dist: arize-phoenix-evals>=0.20.6
25
+ Requires-Dist: arize-phoenix-otel>=0.9.2
26
26
  Requires-Dist: authlib
27
27
  Requires-Dist: cachetools
28
28
  Requires-Dist: email-validator
@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
6
6
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
7
  phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
8
8
  phoenix/settings.py,sha256=x87BX7hWGQQZbrW_vrYqFR_izCGfO9gFc--JXUG4Tdk,754
9
- phoenix/version.py,sha256=SBPze3-vOQ7xHI2sABcoDWHzmJDMtJBs9M1wdSz1A3E,23
9
+ phoenix/version.py,sha256=OL-z73Pbatjb7iB0kk-omeb_qR2_5zFbHqhV-TRrbvM,23
10
10
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
12
12
  phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
@@ -56,7 +56,7 @@ phoenix/db/types/identifier.py,sha256=Opr3_1di6e5ncrBDn30WfBSr-jN_VGBnkkA4BMuSoy
56
56
  phoenix/db/types/model_provider.py,sha256=96UMeqiy5X9PmYMOWA6dZAmI_BSV3yVxt9HEVYGe5Ns,157
57
57
  phoenix/db/types/trace_retention.py,sha256=UoLVX4efhv-mcF32zHpMX_hXWe8474cmhEyPmYqeMfI,9573
58
58
  phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
59
- phoenix/experiments/functions.py,sha256=hmTLqNgrWGFWQlQnWvNgOrpeWStGvbzQkD-1Tx-wM94,33270
59
+ phoenix/experiments/functions.py,sha256=6cSS_5O5V1EZ_5CIQ6lGOUwFyNNsADP7Uu__GmuOz4A,37983
60
60
  phoenix/experiments/tracing.py,sha256=seNh9rBH-rtQe8_FPI_VJj1rbo3ADcP6wDvERkMoxNc,2858
61
61
  phoenix/experiments/types.py,sha256=yntt6fnAny1U4Q9Y5Mm4ZYIb9319OaJovl-kyXFtGQE,23475
62
62
  phoenix/experiments/utils.py,sha256=MZ1-OnTcavk_KUtbfGqt55Fk9TGtJpYG_K71WsN-zDk,785
@@ -158,7 +158,7 @@ phoenix/server/api/helpers/__init__.py,sha256=m2-xaSPqUiSs91k62JaRDjFNfl-1byxBfY
158
158
  phoenix/server/api/helpers/annotations.py,sha256=9gMXKpMTfWEChoSCnvdWYuyB0hlSnNOp-qUdar9Vono,262
159
159
  phoenix/server/api/helpers/dataset_helpers.py,sha256=DoMBTg-qXTnC_K4Evx1WKpCCYgRbITpVqyY-8efJRf0,8984
160
160
  phoenix/server/api/helpers/experiment_run_filters.py,sha256=DOnVwrmn39eAkk2mwuZP8kIcAnR5jrOgllEwWSjsw94,29893
161
- phoenix/server/api/helpers/playground_clients.py,sha256=C-GPq4wklcnGXiW5-7-ipx5wjowDuwSKzqbGHta2QEc,41888
161
+ phoenix/server/api/helpers/playground_clients.py,sha256=-9lbPHTiYgRa1KdzdM1pRJvISvRRgEfhn0jdfoWUWHU,42014
162
162
  phoenix/server/api/helpers/playground_registry.py,sha256=CPLMziFB2wmr-dfbx7VbzO2f8YIG_k5RftzvGXYGQ1w,2570
163
163
  phoenix/server/api/helpers/playground_spans.py,sha256=ObAhvV_yNwEQDkjzgU5G73wfIisc8q4cpB0OFH5cd24,16974
164
164
  phoenix/server/api/helpers/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -235,12 +235,12 @@ phoenix/server/api/routers/v1/annotations.py,sha256=oeafR2tCLu-uIwM9J72gN3MX5WDh
235
235
  phoenix/server/api/routers/v1/datasets.py,sha256=Wqiy6ZKqn4BZSFyn93gzuhWx3mGn7kOkNncHzCWuBq8,37325
236
236
  phoenix/server/api/routers/v1/evaluations.py,sha256=GFTo42aIEX0Htn0EjjoE1JZDYlvryeZ_CK9kowhwzGw,12830
237
237
  phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xSs004jNYsOl3eg-6Zjo2tt9TefTd7WR3twWYrsNQNk,4828
238
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=jqpquCygtUYNNN7lgSvGvOlXCE7KTleDRFjxJ7bbDfM,6400
238
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=FreGzzprPpJ_DBHUkdUckca6EGCrnvgVpqk3CLT7wRc,7000
239
239
  phoenix/server/api/routers/v1/experiments.py,sha256=V9_sxqLTE1MKGFu9H3FEdGKr70lYMbGZx813MGaavfQ,20430
240
240
  phoenix/server/api/routers/v1/models.py,sha256=r0nM2kFJ3mxDqgc5vFr1cjNuyOPs3RIKE_DS2VMdF48,1749
241
241
  phoenix/server/api/routers/v1/projects.py,sha256=RVOAWW8RQIqaebQFcIbk1OGxO7B1BITtUewlcSUhasg,12615
242
242
  phoenix/server/api/routers/v1/prompts.py,sha256=aBOUBwLDzZDIzJQkxJcR8ZKnakNJOLMwzsLKINSs1mA,26545
243
- phoenix/server/api/routers/v1/spans.py,sha256=RsmHZ-KusCUleCObNLzlHgOtbTqR50PU82sr8mspecc,10811
243
+ phoenix/server/api/routers/v1/spans.py,sha256=qJVN0pVgZM5cMXQoNrCwmFjKDm_7-JHKdt_KU9IDFsA,32121
244
244
  phoenix/server/api/routers/v1/traces.py,sha256=DfzeszQvtlrVxvurJLaWJJAhkCZ4BodLwpFuBYPwN5Q,8206
245
245
  phoenix/server/api/routers/v1/utils.py,sha256=oXIOGPzPTkE0ZWUTRCoRIQQ7wTzoSwtWFaUSjlGBqts,4960
246
246
  phoenix/server/api/types/Annotation.py,sha256=gsl8CwjIbDUbZRj4d9USwZ_w_Tkz4i7zuZh9ftV80jA,1132
@@ -347,16 +347,16 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
347
347
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
348
348
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
349
349
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
350
- phoenix/server/static/.vite/manifest.json,sha256=qvWFastdLyXnceS4MZmIPrKElK-bFuE8RRfzZH5k7kI,2165
351
- phoenix/server/static/assets/components-DULKeDfL.js,sha256=iGiONZ4ZFgUfocixbsrs--OKKF6YCNKFR4AFfw5_rpQ,540890
352
- phoenix/server/static/assets/index-E0M82BdE.js,sha256=_2I1bt69d7vhL-IdVCHKs1Geodztj9fi-h4Qq5m7Rd0,60283
353
- phoenix/server/static/assets/pages-Cl0A-0U2.js,sha256=wUCE1V9qy66c9twIYV0djjOAntXS8fqZIdcAvy7LfMM,1033702
350
+ phoenix/server/static/.vite/manifest.json,sha256=uZhT5WYds65YqNtUyHWehErQIV4tFNEoiyZahFTFA4Y,2165
351
+ phoenix/server/static/assets/components-BHJEWQsc.js,sha256=ekj4iMOzo2et9hLV5-gp70qytp--8V7mXpFWbQ-Ov1k,544202
352
+ phoenix/server/static/assets/index-CAt4FOOX.js,sha256=R9LYPWBofyISq2QdzxT80OiIbsv99gncJ1XdqhrYrr8,60432
353
+ phoenix/server/static/assets/pages-kpw8RDmo.js,sha256=NU__puRtkOse1X6iDEVq2_v-lWMOlTMcR6c8fkTZFes,1033323
354
+ phoenix/server/static/assets/vendor-DOUbLVp5.js,sha256=AN12odor8w2dKOG4LokzFITR1h52TP1qnBDQl1u67Dc,2744392
354
355
  phoenix/server/static/assets/vendor-WIZid84E.css,sha256=spZD2r7XL5GfLO13ln-IuXfnjAref8l6g_n_AvxxOlI,5517
355
- phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js,sha256=z31Zu8483ieIn8uA916ca4O0E1d62E2CIb2Vastw4jo,193248
356
- phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js,sha256=GTbWyAwCsvpTLghl-Z70jTHdW1htwhulJ5x-P1BCKGA,781264
357
- phoenix/server/static/assets/vendor-oB4u9zuV.js,sha256=urt3ZQ2T4zryDs_0BOgzSLzE6CF9vHHd01cAf3vfefE,2744392
358
- phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js,sha256=CsrHvK1HHMKh4FUhX2oBjc8FupQqktP0iM16HiutK_4,282150
359
- phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js,sha256=LIz0aw3E5-PMDE-1oyAG08B_By2OKHMZ74AvuWJU0ug,8980312
356
+ phoenix/server/static/assets/vendor-arizeai-DHqMQzfV.js,sha256=zaVJbapZYf9-reZp549G232mEi7MY7yZR2cBbchRISs,193248
357
+ phoenix/server/static/assets/vendor-codemirror-DWdZV1Is.js,sha256=S0-OzJX5aZayqOQS3KcC-ztH5EITxeCHnOA2nk5li8A,781264
358
+ phoenix/server/static/assets/vendor-recharts-BfHdRd1Y.js,sha256=YdjHHSsRjbhTkgi_NdynIGnwqX1YibrXZhGgbewsOfo,282150
359
+ phoenix/server/static/assets/vendor-shiki-CHu75YVL.js,sha256=FaxGIHyaAJFTau_Jt8GlW41aKphdTaEur_nSd6UrbpU,8980312
360
360
  phoenix/server/static/assets/vendor-three-C5WAXd5r.js,sha256=ELkg06u70N7h8oFmvqdoHyPuUf9VgGEWeT4LKFx4VWo,620975
361
361
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
362
  phoenix/server/templates/index.html,sha256=NpJ83DULqcStXFbShNamX4_NPDtnnucuBxppvUYjJa8,4409
@@ -397,9 +397,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
397
397
  phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
398
398
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
399
  phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
400
- arize_phoenix-10.0.3.dist-info/METADATA,sha256=5iF5zvSMrc2EboqD4A3zQIJZQIzDBs-E8xDk7nvaAb4,25591
401
- arize_phoenix-10.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
402
- arize_phoenix-10.0.3.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
403
- arize_phoenix-10.0.3.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
404
- arize_phoenix-10.0.3.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
405
- arize_phoenix-10.0.3.dist-info/RECORD,,
400
+ arize_phoenix-10.1.0.dist-info/METADATA,sha256=LQdP2QH9SNHA7uH0XeEJq1Mkf3l3JG0s8qcg0WRTK1Q,25591
401
+ arize_phoenix-10.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
402
+ arize_phoenix-10.1.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
403
+ arize_phoenix-10.1.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
404
+ arize_phoenix-10.1.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
405
+ arize_phoenix-10.1.0.dist-info/RECORD,,
@@ -16,6 +16,7 @@ from urllib.parse import urljoin
16
16
  import httpx
17
17
  import opentelemetry.sdk.trace as trace_sdk
18
18
  import pandas as pd
19
+ from httpx import HTTPStatusError
19
20
  from openinference.semconv.resource import ResourceAttributes
20
21
  from openinference.semconv.trace import (
21
22
  OpenInferenceMimeTypeValues,
@@ -232,8 +233,43 @@ def run_experiment(
232
233
  print(f"📺 View dataset experiments: {dataset_experiments_url}")
233
234
  print(f"🔗 View this experiment: {experiment_compare_url}")
234
235
 
235
- def sync_run_experiment(test_case: TestCase) -> ExperimentRun:
236
+ # Create a cache for task results
237
+ task_result_cache: dict[tuple[str, int], Any] = {}
238
+
239
+ def sync_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
236
240
  example, repetition_number = test_case.example, test_case.repetition_number
241
+ cache_key = (example.id, repetition_number)
242
+
243
+ # Check if we have a cached result
244
+ if cache_key in task_result_cache:
245
+ output = task_result_cache[cache_key]
246
+ exp_run = ExperimentRun(
247
+ start_time=datetime.now(
248
+ timezone.utc
249
+ ), # Use current time since we don't have the original span
250
+ end_time=datetime.now(timezone.utc),
251
+ experiment_id=experiment.id,
252
+ dataset_example_id=example.id,
253
+ repetition_number=repetition_number,
254
+ output=output,
255
+ error=None,
256
+ trace_id=None, # No trace ID since we don't have the original span
257
+ )
258
+ if not dry_run:
259
+ try:
260
+ # Try to create the run directly
261
+ resp = sync_client.post(
262
+ f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
263
+ )
264
+ resp.raise_for_status()
265
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
266
+ except HTTPStatusError as e:
267
+ if e.response.status_code == 409:
268
+ # Ignore duplicate runs - we'll get the final state from the database
269
+ return None
270
+ raise
271
+ return exp_run
272
+
237
273
  output = None
238
274
  error: Optional[BaseException] = None
239
275
  status = Status(StatusCode.OK)
@@ -283,6 +319,7 @@ def run_experiment(
283
319
  assert isinstance(
284
320
  output, (dict, list, str, int, float, bool, type(None))
285
321
  ), "Output must be JSON serializable"
322
+
286
323
  exp_run = ExperimentRun(
287
324
  start_time=_decode_unix_nano(cast(int, span.start_time)),
288
325
  end_time=_decode_unix_nano(cast(int, span.end_time)),
@@ -294,13 +331,62 @@ def run_experiment(
294
331
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
295
332
  )
296
333
  if not dry_run:
297
- resp = sync_client.post(f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run))
298
- resp.raise_for_status()
299
- exp_run = replace(exp_run, id=resp.json()["data"]["id"])
334
+ try:
335
+ # Try to create the run directly
336
+ resp = sync_client.post(
337
+ f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
338
+ )
339
+ resp.raise_for_status()
340
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
341
+ if error is None:
342
+ task_result_cache[cache_key] = output
343
+ except HTTPStatusError as e:
344
+ if e.response.status_code == 409:
345
+ # 409 conflict errors are caused by submitting duplicate runs
346
+ return None
347
+ raise
300
348
  return exp_run
301
349
 
302
- async def async_run_experiment(test_case: TestCase) -> ExperimentRun:
350
+ async def async_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
303
351
  example, repetition_number = test_case.example, test_case.repetition_number
352
+ cache_key = (example.id, repetition_number)
353
+
354
+ # Check if we have a cached result
355
+ if cache_key in task_result_cache:
356
+ output = task_result_cache[cache_key]
357
+ exp_run = ExperimentRun(
358
+ start_time=datetime.now(
359
+ timezone.utc
360
+ ), # Use current time since we don't have the original span
361
+ end_time=datetime.now(timezone.utc),
362
+ experiment_id=experiment.id,
363
+ dataset_example_id=example.id,
364
+ repetition_number=repetition_number,
365
+ output=output,
366
+ error=None,
367
+ trace_id=None, # No trace ID since we don't have the original span
368
+ )
369
+ if not dry_run:
370
+ try:
371
+ # Try to create the run directly
372
+ future = asyncio.get_running_loop().run_in_executor(
373
+ None,
374
+ functools.partial(
375
+ sync_client.post,
376
+ url=f"/v1/experiments/{experiment.id}/runs",
377
+ json=jsonify(exp_run),
378
+ ),
379
+ )
380
+ resp = await future
381
+ resp.raise_for_status()
382
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
383
+ except HTTPStatusError as e:
384
+ if e.response.status_code == 409:
385
+ # 409 conflict errors are caused by submitting duplicate runs
386
+ return None
387
+ raise
388
+ return exp_run
389
+
304
390
  output = None
305
391
  error: Optional[BaseException] = None
306
392
  status = Status(StatusCode.OK)
@@ -344,6 +430,7 @@ def run_experiment(
344
430
  assert isinstance(
345
431
  output, (dict, list, str, int, float, bool, type(None))
346
432
  ), "Output must be JSON serializable"
433
+
347
434
  exp_run = ExperimentRun(
348
435
  start_time=_decode_unix_nano(cast(int, span.start_time)),
349
436
  end_time=_decode_unix_nano(cast(int, span.end_time)),
@@ -355,19 +442,26 @@ def run_experiment(
355
442
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
356
443
  )
357
444
  if not dry_run:
358
- # Below is a workaround to avoid timeout errors sometimes
359
- # encountered when the task is a synchronous function that
360
- # blocks for too long.
361
- resp = await asyncio.get_running_loop().run_in_executor(
362
- None,
363
- functools.partial(
364
- sync_client.post,
365
- url=f"/v1/experiments/{experiment.id}/runs",
366
- json=jsonify(exp_run),
367
- ),
368
- )
369
- resp.raise_for_status()
370
- exp_run = replace(exp_run, id=resp.json()["data"]["id"])
445
+ try:
446
+ # Try to create the run directly
447
+ future = asyncio.get_running_loop().run_in_executor(
448
+ None,
449
+ functools.partial(
450
+ sync_client.post,
451
+ url=f"/v1/experiments/{experiment.id}/runs",
452
+ json=jsonify(exp_run),
453
+ ),
454
+ )
455
+ resp = await future
456
+ resp.raise_for_status()
457
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
458
+ if error is None:
459
+ task_result_cache[cache_key] = output
460
+ except HTTPStatusError as e:
461
+ if e.response.status_code == 409:
462
+ # Ignore duplicate runs - we'll get the final state from the database
463
+ return None
464
+ raise
371
465
  return exp_run
372
466
 
373
467
  _errors: tuple[type[BaseException], ...]
@@ -401,6 +495,26 @@ def run_experiment(
401
495
  ]
402
496
  task_runs, _execution_details = executor.run(test_cases)
403
497
  print("✅ Task runs completed.")
498
+
499
+ # Get the final state of runs from the database
500
+ if not dry_run:
501
+ all_runs = sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
502
+ task_runs = []
503
+ for run in all_runs:
504
+ # Parse datetime strings
505
+ run["start_time"] = datetime.fromisoformat(run["start_time"])
506
+ run["end_time"] = datetime.fromisoformat(run["end_time"])
507
+ task_runs.append(ExperimentRun.from_dict(run))
508
+
509
+ # Check if we got all expected runs
510
+ expected_runs = len(dataset.examples) * repetitions
511
+ actual_runs = len(task_runs)
512
+ if actual_runs < expected_runs:
513
+ print(
514
+ f"⚠️ Warning: Only {actual_runs} out of {expected_runs} expected runs were "
515
+ "completed successfully."
516
+ )
517
+
404
518
  params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
405
519
  task_summary = TaskSummary.from_task_runs(params, task_runs)
406
520
  ran_experiment: RanExperiment = object.__new__(RanExperiment)
@@ -701,6 +701,10 @@ class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
701
701
  provider_key=GenerativeProviderKey.ANTHROPIC,
702
702
  model_names=[
703
703
  PROVIDER_DEFAULT,
704
+ "claude-sonnet-4-0",
705
+ "claude-sonnet-4-20250514",
706
+ "claude-opus-4-0",
707
+ "claude-opus-4-20250514",
704
708
  "claude-3-7-sonnet-latest",
705
709
  "claude-3-7-sonnet-20250219",
706
710
  "claude-3-5-sonnet-latest",
@@ -4,8 +4,10 @@ from typing import Any, Optional
4
4
  from fastapi import APIRouter, HTTPException
5
5
  from pydantic import Field
6
6
  from sqlalchemy import select
7
+ from sqlalchemy.exc import IntegrityError as PostgreSQLIntegrityError
8
+ from sqlean.dbapi2 import IntegrityError as SQLiteIntegrityError # type: ignore[import-untyped]
7
9
  from starlette.requests import Request
8
- from starlette.status import HTTP_404_NOT_FOUND
10
+ from starlette.status import HTTP_404_NOT_FOUND, HTTP_409_CONFLICT
9
11
  from strawberry.relay import GlobalID
10
12
 
11
13
  from phoenix.db import models
@@ -58,7 +60,11 @@ class CreateExperimentRunResponseBody(ResponseBody[CreateExperimentRunResponseBo
58
60
  {
59
61
  "status_code": HTTP_404_NOT_FOUND,
60
62
  "description": "Experiment or dataset example not found",
61
- }
63
+ },
64
+ {
65
+ "status_code": HTTP_409_CONFLICT,
66
+ "description": "This experiment run has already been submitted",
67
+ },
62
68
  ]
63
69
  ),
64
70
  )
@@ -101,8 +107,14 @@ async def create_experiment_run(
101
107
  end_time=end_time,
102
108
  error=error,
103
109
  )
104
- session.add(exp_run)
105
- await session.flush()
110
+ try:
111
+ session.add(exp_run)
112
+ await session.flush()
113
+ except (PostgreSQLIntegrityError, SQLiteIntegrityError):
114
+ raise HTTPException(
115
+ detail="This experiment run has already been submitted",
116
+ status_code=HTTP_409_CONFLICT,
117
+ )
106
118
  request.state.event_queue.put(ExperimentRunInsertEvent((exp_run.id,)))
107
119
  run_gid = GlobalID("ExperimentRun", str(exp_run.id))
108
120
  return CreateExperimentRunResponseBody(