arize-phoenix 10.0.3__py3-none-any.whl → 10.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize-phoenix
3
- Version: 10.0.3
3
+ Version: 10.0.4
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
6
6
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
7
  phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
8
8
  phoenix/settings.py,sha256=x87BX7hWGQQZbrW_vrYqFR_izCGfO9gFc--JXUG4Tdk,754
9
- phoenix/version.py,sha256=SBPze3-vOQ7xHI2sABcoDWHzmJDMtJBs9M1wdSz1A3E,23
9
+ phoenix/version.py,sha256=gyWMSD5bIZRum86t3FjwjQ0gz3poNiCaCrUA12HtJp8,23
10
10
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
12
12
  phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
@@ -56,7 +56,7 @@ phoenix/db/types/identifier.py,sha256=Opr3_1di6e5ncrBDn30WfBSr-jN_VGBnkkA4BMuSoy
56
56
  phoenix/db/types/model_provider.py,sha256=96UMeqiy5X9PmYMOWA6dZAmI_BSV3yVxt9HEVYGe5Ns,157
57
57
  phoenix/db/types/trace_retention.py,sha256=UoLVX4efhv-mcF32zHpMX_hXWe8474cmhEyPmYqeMfI,9573
58
58
  phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
59
- phoenix/experiments/functions.py,sha256=hmTLqNgrWGFWQlQnWvNgOrpeWStGvbzQkD-1Tx-wM94,33270
59
+ phoenix/experiments/functions.py,sha256=6cSS_5O5V1EZ_5CIQ6lGOUwFyNNsADP7Uu__GmuOz4A,37983
60
60
  phoenix/experiments/tracing.py,sha256=seNh9rBH-rtQe8_FPI_VJj1rbo3ADcP6wDvERkMoxNc,2858
61
61
  phoenix/experiments/types.py,sha256=yntt6fnAny1U4Q9Y5Mm4ZYIb9319OaJovl-kyXFtGQE,23475
62
62
  phoenix/experiments/utils.py,sha256=MZ1-OnTcavk_KUtbfGqt55Fk9TGtJpYG_K71WsN-zDk,785
@@ -235,7 +235,7 @@ phoenix/server/api/routers/v1/annotations.py,sha256=oeafR2tCLu-uIwM9J72gN3MX5WDh
235
235
  phoenix/server/api/routers/v1/datasets.py,sha256=Wqiy6ZKqn4BZSFyn93gzuhWx3mGn7kOkNncHzCWuBq8,37325
236
236
  phoenix/server/api/routers/v1/evaluations.py,sha256=GFTo42aIEX0Htn0EjjoE1JZDYlvryeZ_CK9kowhwzGw,12830
237
237
  phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xSs004jNYsOl3eg-6Zjo2tt9TefTd7WR3twWYrsNQNk,4828
238
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=jqpquCygtUYNNN7lgSvGvOlXCE7KTleDRFjxJ7bbDfM,6400
238
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=FreGzzprPpJ_DBHUkdUckca6EGCrnvgVpqk3CLT7wRc,7000
239
239
  phoenix/server/api/routers/v1/experiments.py,sha256=V9_sxqLTE1MKGFu9H3FEdGKr70lYMbGZx813MGaavfQ,20430
240
240
  phoenix/server/api/routers/v1/models.py,sha256=r0nM2kFJ3mxDqgc5vFr1cjNuyOPs3RIKE_DS2VMdF48,1749
241
241
  phoenix/server/api/routers/v1/projects.py,sha256=RVOAWW8RQIqaebQFcIbk1OGxO7B1BITtUewlcSUhasg,12615
@@ -397,9 +397,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
397
397
  phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
398
398
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
399
  phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
400
- arize_phoenix-10.0.3.dist-info/METADATA,sha256=5iF5zvSMrc2EboqD4A3zQIJZQIzDBs-E8xDk7nvaAb4,25591
401
- arize_phoenix-10.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
402
- arize_phoenix-10.0.3.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
403
- arize_phoenix-10.0.3.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
404
- arize_phoenix-10.0.3.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
405
- arize_phoenix-10.0.3.dist-info/RECORD,,
400
+ arize_phoenix-10.0.4.dist-info/METADATA,sha256=Xywjo53oBnsHEco90Wn_5l0uSpp5XcDVhT2MjjG8VNg,25591
401
+ arize_phoenix-10.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
402
+ arize_phoenix-10.0.4.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
403
+ arize_phoenix-10.0.4.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
404
+ arize_phoenix-10.0.4.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
405
+ arize_phoenix-10.0.4.dist-info/RECORD,,
@@ -16,6 +16,7 @@ from urllib.parse import urljoin
16
16
  import httpx
17
17
  import opentelemetry.sdk.trace as trace_sdk
18
18
  import pandas as pd
19
+ from httpx import HTTPStatusError
19
20
  from openinference.semconv.resource import ResourceAttributes
20
21
  from openinference.semconv.trace import (
21
22
  OpenInferenceMimeTypeValues,
@@ -232,8 +233,43 @@ def run_experiment(
232
233
  print(f"📺 View dataset experiments: {dataset_experiments_url}")
233
234
  print(f"🔗 View this experiment: {experiment_compare_url}")
234
235
 
235
- def sync_run_experiment(test_case: TestCase) -> ExperimentRun:
236
+ # Create a cache for task results
237
+ task_result_cache: dict[tuple[str, int], Any] = {}
238
+
239
+ def sync_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
236
240
  example, repetition_number = test_case.example, test_case.repetition_number
241
+ cache_key = (example.id, repetition_number)
242
+
243
+ # Check if we have a cached result
244
+ if cache_key in task_result_cache:
245
+ output = task_result_cache[cache_key]
246
+ exp_run = ExperimentRun(
247
+ start_time=datetime.now(
248
+ timezone.utc
249
+ ), # Use current time since we don't have the original span
250
+ end_time=datetime.now(timezone.utc),
251
+ experiment_id=experiment.id,
252
+ dataset_example_id=example.id,
253
+ repetition_number=repetition_number,
254
+ output=output,
255
+ error=None,
256
+ trace_id=None, # No trace ID since we don't have the original span
257
+ )
258
+ if not dry_run:
259
+ try:
260
+ # Try to create the run directly
261
+ resp = sync_client.post(
262
+ f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
263
+ )
264
+ resp.raise_for_status()
265
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
266
+ except HTTPStatusError as e:
267
+ if e.response.status_code == 409:
268
+ # Ignore duplicate runs - we'll get the final state from the database
269
+ return None
270
+ raise
271
+ return exp_run
272
+
237
273
  output = None
238
274
  error: Optional[BaseException] = None
239
275
  status = Status(StatusCode.OK)
@@ -283,6 +319,7 @@ def run_experiment(
283
319
  assert isinstance(
284
320
  output, (dict, list, str, int, float, bool, type(None))
285
321
  ), "Output must be JSON serializable"
322
+
286
323
  exp_run = ExperimentRun(
287
324
  start_time=_decode_unix_nano(cast(int, span.start_time)),
288
325
  end_time=_decode_unix_nano(cast(int, span.end_time)),
@@ -294,13 +331,62 @@ def run_experiment(
294
331
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
295
332
  )
296
333
  if not dry_run:
297
- resp = sync_client.post(f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run))
298
- resp.raise_for_status()
299
- exp_run = replace(exp_run, id=resp.json()["data"]["id"])
334
+ try:
335
+ # Try to create the run directly
336
+ resp = sync_client.post(
337
+ f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
338
+ )
339
+ resp.raise_for_status()
340
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
341
+ if error is None:
342
+ task_result_cache[cache_key] = output
343
+ except HTTPStatusError as e:
344
+ if e.response.status_code == 409:
345
+ # 409 conflict errors are caused by submitting duplicate runs
346
+ return None
347
+ raise
300
348
  return exp_run
301
349
 
302
- async def async_run_experiment(test_case: TestCase) -> ExperimentRun:
350
+ async def async_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
303
351
  example, repetition_number = test_case.example, test_case.repetition_number
352
+ cache_key = (example.id, repetition_number)
353
+
354
+ # Check if we have a cached result
355
+ if cache_key in task_result_cache:
356
+ output = task_result_cache[cache_key]
357
+ exp_run = ExperimentRun(
358
+ start_time=datetime.now(
359
+ timezone.utc
360
+ ), # Use current time since we don't have the original span
361
+ end_time=datetime.now(timezone.utc),
362
+ experiment_id=experiment.id,
363
+ dataset_example_id=example.id,
364
+ repetition_number=repetition_number,
365
+ output=output,
366
+ error=None,
367
+ trace_id=None, # No trace ID since we don't have the original span
368
+ )
369
+ if not dry_run:
370
+ try:
371
+ # Try to create the run directly
372
+ future = asyncio.get_running_loop().run_in_executor(
373
+ None,
374
+ functools.partial(
375
+ sync_client.post,
376
+ url=f"/v1/experiments/{experiment.id}/runs",
377
+ json=jsonify(exp_run),
378
+ ),
379
+ )
380
+ resp = await future
381
+ resp.raise_for_status()
382
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
383
+ except HTTPStatusError as e:
384
+ if e.response.status_code == 409:
385
+ # 409 conflict errors are caused by submitting duplicate runs
386
+ return None
387
+ raise
388
+ return exp_run
389
+
304
390
  output = None
305
391
  error: Optional[BaseException] = None
306
392
  status = Status(StatusCode.OK)
@@ -344,6 +430,7 @@ def run_experiment(
344
430
  assert isinstance(
345
431
  output, (dict, list, str, int, float, bool, type(None))
346
432
  ), "Output must be JSON serializable"
433
+
347
434
  exp_run = ExperimentRun(
348
435
  start_time=_decode_unix_nano(cast(int, span.start_time)),
349
436
  end_time=_decode_unix_nano(cast(int, span.end_time)),
@@ -355,19 +442,26 @@ def run_experiment(
355
442
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
356
443
  )
357
444
  if not dry_run:
358
- # Below is a workaround to avoid timeout errors sometimes
359
- # encountered when the task is a synchronous function that
360
- # blocks for too long.
361
- resp = await asyncio.get_running_loop().run_in_executor(
362
- None,
363
- functools.partial(
364
- sync_client.post,
365
- url=f"/v1/experiments/{experiment.id}/runs",
366
- json=jsonify(exp_run),
367
- ),
368
- )
369
- resp.raise_for_status()
370
- exp_run = replace(exp_run, id=resp.json()["data"]["id"])
445
+ try:
446
+ # Try to create the run directly
447
+ future = asyncio.get_running_loop().run_in_executor(
448
+ None,
449
+ functools.partial(
450
+ sync_client.post,
451
+ url=f"/v1/experiments/{experiment.id}/runs",
452
+ json=jsonify(exp_run),
453
+ ),
454
+ )
455
+ resp = await future
456
+ resp.raise_for_status()
457
+ exp_run = replace(exp_run, id=resp.json()["data"]["id"])
458
+ if error is None:
459
+ task_result_cache[cache_key] = output
460
+ except HTTPStatusError as e:
461
+ if e.response.status_code == 409:
462
+ # Ignore duplicate runs - we'll get the final state from the database
463
+ return None
464
+ raise
371
465
  return exp_run
372
466
 
373
467
  _errors: tuple[type[BaseException], ...]
@@ -401,6 +495,26 @@ def run_experiment(
401
495
  ]
402
496
  task_runs, _execution_details = executor.run(test_cases)
403
497
  print("✅ Task runs completed.")
498
+
499
+ # Get the final state of runs from the database
500
+ if not dry_run:
501
+ all_runs = sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
502
+ task_runs = []
503
+ for run in all_runs:
504
+ # Parse datetime strings
505
+ run["start_time"] = datetime.fromisoformat(run["start_time"])
506
+ run["end_time"] = datetime.fromisoformat(run["end_time"])
507
+ task_runs.append(ExperimentRun.from_dict(run))
508
+
509
+ # Check if we got all expected runs
510
+ expected_runs = len(dataset.examples) * repetitions
511
+ actual_runs = len(task_runs)
512
+ if actual_runs < expected_runs:
513
+ print(
514
+ f"⚠️ Warning: Only {actual_runs} out of {expected_runs} expected runs were "
515
+ "completed successfully."
516
+ )
517
+
404
518
  params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
405
519
  task_summary = TaskSummary.from_task_runs(params, task_runs)
406
520
  ran_experiment: RanExperiment = object.__new__(RanExperiment)
@@ -4,8 +4,10 @@ from typing import Any, Optional
4
4
  from fastapi import APIRouter, HTTPException
5
5
  from pydantic import Field
6
6
  from sqlalchemy import select
7
+ from sqlalchemy.exc import IntegrityError as PostgreSQLIntegrityError
8
+ from sqlean.dbapi2 import IntegrityError as SQLiteIntegrityError # type: ignore[import-untyped]
7
9
  from starlette.requests import Request
8
- from starlette.status import HTTP_404_NOT_FOUND
10
+ from starlette.status import HTTP_404_NOT_FOUND, HTTP_409_CONFLICT
9
11
  from strawberry.relay import GlobalID
10
12
 
11
13
  from phoenix.db import models
@@ -58,7 +60,11 @@ class CreateExperimentRunResponseBody(ResponseBody[CreateExperimentRunResponseBo
58
60
  {
59
61
  "status_code": HTTP_404_NOT_FOUND,
60
62
  "description": "Experiment or dataset example not found",
61
- }
63
+ },
64
+ {
65
+ "status_code": HTTP_409_CONFLICT,
66
+ "description": "This experiment run has already been submitted",
67
+ },
62
68
  ]
63
69
  ),
64
70
  )
@@ -101,8 +107,14 @@ async def create_experiment_run(
101
107
  end_time=end_time,
102
108
  error=error,
103
109
  )
104
- session.add(exp_run)
105
- await session.flush()
110
+ try:
111
+ session.add(exp_run)
112
+ await session.flush()
113
+ except (PostgreSQLIntegrityError, SQLiteIntegrityError):
114
+ raise HTTPException(
115
+ detail="This experiment run has already been submitted",
116
+ status_code=HTTP_409_CONFLICT,
117
+ )
106
118
  request.state.event_queue.put(ExperimentRunInsertEvent((exp_run.id,)))
107
119
  run_gid = GlobalID("ExperimentRun", str(exp_run.id))
108
120
  return CreateExperimentRunResponseBody(
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "10.0.3"
1
+ __version__ = "10.0.4"