PyPI - arize-phoenix - Versions diffs - 4.4.4rc5__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl - Mend

arize-phoenix 4.4.4rc5py3-none-any.whl → 4.4.4rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (42) hide show

{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +11 -5
{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +39 -36
phoenix/config.py +21 -0
phoenix/datetime_utils.py +4 -0
phoenix/db/insertion/evaluation.py +4 -4
phoenix/db/insertion/helpers.py +4 -12
phoenix/db/insertion/span.py +3 -3
phoenix/db/models.py +1 -1
phoenix/experiments/__init__.py +6 -0
phoenix/experiments/evaluators/__init__.py +29 -0
phoenix/experiments/evaluators/base.py +153 -0
phoenix/{datasets → experiments}/evaluators/code_evaluators.py +7 -7
phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +9 -9
phoenix/{datasets → experiments}/evaluators/utils.py +38 -141
phoenix/{datasets/experiments.py → experiments/functions.py} +248 -182
phoenix/experiments/types.py +722 -0
phoenix/experiments/utils.py +9 -0
phoenix/server/api/context.py +2 -0
phoenix/server/api/dataloaders/__init__.py +2 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
phoenix/server/api/routers/v1/__init__.py +1 -1
phoenix/server/api/routers/v1/dataset_examples.py +10 -10
phoenix/server/api/routers/v1/datasets.py +6 -6
phoenix/server/api/routers/v1/evaluations.py +4 -11
phoenix/server/api/routers/v1/experiment_evaluations.py +22 -23
phoenix/server/api/routers/v1/experiment_runs.py +4 -16
phoenix/server/api/routers/v1/experiments.py +5 -5
phoenix/server/api/routers/v1/spans.py +6 -4
phoenix/server/api/types/Experiment.py +7 -0
phoenix/server/app.py +2 -0
phoenix/server/static/index.js +648 -570
phoenix/session/client.py +256 -85
phoenix/trace/fixtures.py +6 -6
phoenix/utilities/json.py +8 -8
phoenix/version.py +1 -1
phoenix/datasets/__init__.py +0 -0
phoenix/datasets/evaluators/__init__.py +0 -18
phoenix/datasets/types.py +0 -178
{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
/phoenix/{datasets → experiments}/tracing.py +0 -0

{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arize-phoenix
-Version: 4.4.4rc5
+Version: 4.4.4rc6
 Summary: AI Observability and Evaluation
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -20,7 +20,7 @@ Requires-Python: <3.13,>=3.8
 Requires-Dist: aioitertools
 Requires-Dist: aiosqlite
 Requires-Dist: alembic<2,>=1.3.0
-Requires-Dist: arize-phoenix-evals>=0.3.0
+Requires-Dist: arize-phoenix-evals>=0.13.1
 Requires-Dist: cachetools
 Requires-Dist: grpcio
 Requires-Dist: hdbscan>=0.8.33
@@ -36,7 +36,7 @@ Requires-Dist: opentelemetry-exporter-otlp
 Requires-Dist: opentelemetry-proto>=1.12.0
 Requires-Dist: opentelemetry-sdk
 Requires-Dist: opentelemetry-semantic-conventions
-Requires-Dist: pandas
+Requires-Dist: pandas>=1.0
 Requires-Dist: protobuf<6.0,>=3.20
 Requires-Dist: psutil
 Requires-Dist: pyarrow
@@ -79,6 +79,7 @@ Requires-Dist: llama-index>=0.10.3; extra == 'dev'
 Requires-Dist: nbqa; extra == 'dev'
 Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
 Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
+Requires-Dist: pandas>=1.0; extra == 'dev'
 Requires-Dist: pre-commit; extra == 'dev'
 Requires-Dist: prometheus-client; extra == 'dev'
 Requires-Dist: psycopg[binary]; extra == 'dev'
@@ -88,10 +89,15 @@ Requires-Dist: pytest-postgresql; extra == 'dev'
 Requires-Dist: pytest==8.2.2; extra == 'dev'
 Requires-Dist: ruff==0.4.9; extra == 'dev'
 Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
+Requires-Dist: tabulate; extra == 'dev'
+Requires-Dist: types-tabulate; extra == 'dev'
 Provides-Extra: evals
 Provides-Extra: experimental
 Provides-Extra: llama-index
-Requires-Dist: llama-index==0.10.44; extra == 'llama-index'
+Requires-Dist: llama-index-embeddings-openai; extra == 'llama-index'
+Requires-Dist: llama-index-llms-openai; extra == 'llama-index'
+Requires-Dist: llama-index-readers-file; extra == 'llama-index'
+Requires-Dist: llama-index==0.10.51; extra == 'llama-index'
 Provides-Extra: pg
 Requires-Dist: asyncpg; extra == 'pg'
 Description-Content-Type: text/markdown
@@ -127,7 +133,7 @@ Description-Content-Type: text/markdown
 Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting. It provides:
--   **_Tracing_** - Trace your LLM application's runtime using using OpenTelemetry-based instrumentation.
+-   **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
 -   **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
 -   **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.

{arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,16 @@
 phoenix/__init__.py,sha256=JMXBf8J0tAa5ycWDIn8QWYgQu6H2vL77RC4lful-YH8,1542
-phoenix/config.py,sha256=9xkQBn_Z-tsQct-zq0B80N4Xa2k2jfhivdc_qYCA5G8,7618
-phoenix/datetime_utils.py,sha256=oqkxJ5I7ggrCKYTEi8q-akC501calylD26NVOPQQcHw,3305
+phoenix/config.py,sha256=eXciIho_PDh4ZSmq4Gtuo7Qz__yTluDP3_WUwig5OiU,8141
+phoenix/datetime_utils.py,sha256=yDKjwX2Vtqw9h5F_ProtP-TsXidM43uIvmJ_pOzYc9A,3405
 phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
 phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
-phoenix/version.py,sha256=-Vg_bLotyeJdv0gFqG5-A64nsG-6AR0xZSp3sDDsV_w,25
+phoenix/version.py,sha256=rZ0Z9PgUs79kMn4HpCH3vAEVOqqPCzzD7Xz8N5sa7qI,25
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
 phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
 phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
 phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
-phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/datasets/experiments.py,sha256=RzZezHQcTpPcr7gY9rGtoYlfoesFNhNV7EO5f_oHNFk,21198
-phoenix/datasets/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
-phoenix/datasets/types.py,sha256=N17mnnVwmu1k3bnmbyROPt_6TxPaZY_QkOZmCOR5_jE,4835
-phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
-phoenix/datasets/evaluators/code_evaluators.py,sha256=DdCcAi274t_TLs_aARd-GmWWpJrxVeNEAegMFEAfe0E,3894
-phoenix/datasets/evaluators/llm_evaluators.py,sha256=aVfAHOWhskBiy0IVeq_ACTs7B37uXTTtDoNBS0XenIc,9165
-phoenix/datasets/evaluators/utils.py,sha256=S7OGrb1sBWg5l9K35X29OKJe5wZ3k7xMhxJBclzxta0,10452
 phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
 phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
 phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
@@ -26,18 +18,28 @@ phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,1
 phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
 phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
 phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
-phoenix/db/models.py,sha256=lYzI3tCDUl8njXb3Vf3R8e6y56-MErprjjfBE-o9Kao,20419
+phoenix/db/models.py,sha256=zFtdhVuQFOvquyKsto62aqAVaTRUlq9gxU0j1M1yLdg,20408
 phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/db/insertion/dataset.py,sha256=_vxy5e6W5jEuvO2fMKbbNCn9JvHkwI4LRKk_10eKFVg,7171
-phoenix/db/insertion/evaluation.py,sha256=fAerUy3QGf2wID_tiVmPvzxBDFGiONPl3pmpZDgJDWQ,7183
-phoenix/db/insertion/helpers.py,sha256=7tf6qQyJ05nn3IXaZEpj2b4Jz5boGLWT8tzlMaJ9tQY,2337
-phoenix/db/insertion/span.py,sha256=DNBjSrx5g2W5KuTB1dkHwtkb0SFnMIxN1jB-BAdGKFY,5634
+phoenix/db/insertion/evaluation.py,sha256=HoUncZN9ZlIr1QO0uA37SbWhrjmwQVYVJlgFX2VefY8,7211
+phoenix/db/insertion/helpers.py,sha256=5AZQSyTGAthyaIl_l5jL4yva1IrTTBG9y2G7l1r2Yyk,1937
+phoenix/db/insertion/span.py,sha256=d85O3R_Cc3aVFDJSgLLX66qNCPNbKtDxug_dSKFDfew,5655
 phoenix/db/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/db/migrations/env.py,sha256=QbzB5zrRs6XQQmrYeUpuzeilcMlM-MsbaAgHHYcIHTI,3626
 phoenix/db/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
 phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgCE,605
 phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
 phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
+phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
+phoenix/experiments/functions.py,sha256=w0A6BK80avoupxd3sPJZ_btftV1pXrkbZj4omR_H214,24723
+phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
+phoenix/experiments/types.py,sha256=tj7DxfsU_nQP5bNe_h6p4KvRjkXKaaB3FeaIerAi_iA,22790
+phoenix/experiments/utils.py,sha256=ZZajvIrZTURhOX5Nx4nyogJEbI18sKCHYiYwOxz2vYU,340
+phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
+phoenix/experiments/evaluators/base.py,sha256=uhO4R06YWBbTxdpvXLldANnTxTA5r2h_Ktj-ZMLH57c,5305
+phoenix/experiments/evaluators/code_evaluators.py,sha256=0qIKQS14Knze50ziJEPVEnNeV3QIs4g1IXtCmaWZu7o,3923
+phoenix/experiments/evaluators/llm_evaluators.py,sha256=EFce6LKZwUZDBa5ZozvcdqeZpdWM6n6bmq7_oIzM2Nw,9211
+phoenix/experiments/evaluators/utils.py,sha256=o84UTWN7fzjCGZDTS-KpGZ2VBrk2iSuO3X2LoC7pr3Y,6966
 phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
 phoenix/inferences/fixtures.py,sha256=FC2eRL4dpobKQHYOilFtDexUWFkMZ_w6jun_4WkbMk0,20792
@@ -58,18 +60,19 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
 phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
 phoenix/pointcloud/umap_parameters.py,sha256=lJsEOrbSuSiqI7g4Yt6xj7kgYxEqoep4ZHWLr6VWBqw,1760
 phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/server/app.py,sha256=LQrHWt5HG_pWqnR9Ozb3-vnAGiiRGuZ3uV_9-886Yxw,18340
+phoenix/server/app.py,sha256=Ld9NvW7sBT4aJn8CXAB_PIw4c5wlkxCcjT_hzjtn7dM,18478
 phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
 phoenix/server/main.py,sha256=mtzH_2Kyvuy3AHiiKfqiCdUQ6SGFzeT4q9fefbV6GLg,11114
 phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
 phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
 phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
 phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/server/api/context.py,sha256=jb69SVdb5hpVbfM4U0pZi4sGa2a-0VKOJWcBjjS7l4s,2761
+phoenix/server/api/context.py,sha256=4jcy203Gtx38399FP21iU3HmFsq-50EKFJlX4IW2Los,2878
 phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
 phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
 phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
-phoenix/server/api/dataloaders/__init__.py,sha256=urbG3M-k2cpj2ymMLYQ28tzIXAG1edECxM-tJ22ylqE,4720
+phoenix/server/api/dataloaders/__init__.py,sha256=qehXL37vGdw7v5PFs3kbZVIuhuzrVNVeZACDQjYpwyo,4847
+phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=RiO0AKC6Y5byafsV0zTJEIOt8Nudjte73f1T78cBe1k,1817
 phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
 phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
 phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
@@ -127,14 +130,14 @@ phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjG
 phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
 phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
-phoenix/server/api/routers/v1/__init__.py,sha256=B5eSaylPI7MoYia1-VgKrU8rDi-69r_hRwPU5yMLUTE,2808
-phoenix/server/api/routers/v1/dataset_examples.py,sha256=wtplRUv2ee9xGTrcEMgTn-7L4NX_73IcwUXkCMZEFc4,6726
-phoenix/server/api/routers/v1/datasets.py,sha256=ws2Guou9mspwFx3-cBFZoD2VuTwWGoFZmtt2Sr3zg6k,31516
-phoenix/server/api/routers/v1/evaluations.py,sha256=rwSVg-rpujhsMcDVFt-VAr0Ix9TgvLcY_bSxeh8PzJI,9241
-phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xhrkPUc_4ncIBm24aUyzu47UU0CN1tGlbisn-oLqt_Y,2702
-phoenix/server/api/routers/v1/experiment_runs.py,sha256=0AUNHA5nvpGDeoJUGK8VxP2TFN3iPwhMW3D9QmHstPk,4399
-phoenix/server/api/routers/v1/experiments.py,sha256=5Rh7q6sHswmk11PZSJ7KMrtqfIE16X_xSKkKSASK9-I,7251
-phoenix/server/api/routers/v1/spans.py,sha256=FEnmlRPBPl71BSGNBuPrz14fk8nmxJQYsKECdDbdUdw,3977
+phoenix/server/api/routers/v1/__init__.py,sha256=vvdpUa2LJPWEg8HbvDm_ANkBAwubPIFPbbHi7elOUws,2808
+phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycwwGt3LnSyyGdMLKC5VKoGQ,6690
+phoenix/server/api/routers/v1/datasets.py,sha256=f2gLG-geu-_wtEw4mKSzNWK2cFb5TYOyRL3tQ7Fl7Es,31544
+phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
+phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=HeyV3PXS1BxQpzNOUBpQlX_0JH_jbjZjTxrqy2ujwJQ,2746
+phoenix/server/api/routers/v1/experiment_runs.py,sha256=_c7qmPIja_gpvoVaf_t7KtNc9Zz-0m9da9MS-EcbPBo,3918
+phoenix/server/api/routers/v1/experiments.py,sha256=ntb0lRV2h90mFepWiZfQ1MIAJhOaK9tkWzTejmpwed0,7243
+phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
 phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
 phoenix/server/api/types/AnnotatorKind.py,sha256=UmYb2KG0JfxdX0mW1qrXrUoIgjMOncRJr1i8mJki1sE,141
 phoenix/server/api/types/Cluster.py,sha256=ac4YfT1OH3xLVmex7EUmB6b9IpULnhLTt554LR0jglE,5689
@@ -159,7 +162,7 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
 phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
 phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
 phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
-phoenix/server/api/types/Experiment.py,sha256=Cs0EKhVLI5l5LKFI0hQA-ekZuaiJcOHT88JGFBa2deU,4906
+phoenix/server/api/types/Experiment.py,sha256=ELYdYFKwgBllxx3cZ_X0XicHjLtshZl0bFqqJdVGXRQ,5177
 phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
 phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
 phoenix/server/api/types/ExperimentRun.py,sha256=8jUIi3ApVCqQHwnYe59CYhrmh5iZ6-QmlH5WpF7UWtM,2990
@@ -199,12 +202,12 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
-phoenix/server/static/index.js,sha256=I9Y8svcPruUrXklKcZUxFz5HfLB0vOwczYLSwLAs_04,3500011
+phoenix/server/static/index.js,sha256=qAPO3xGdQ2mIA8TcIPNkIfLvoGeZ78fVKbGZevfJzoM,3518643
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
 phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
 phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/session/client.py,sha256=R7dV38yjkIQa522nhG6jhDllWcXft2JJ7RlcPYpqiiQ,24846
+phoenix/session/client.py,sha256=5mnWVqMFbC8NYbX4m2oRla1VvlmrgabD1oT2UdwDRJ8,33201
 phoenix/session/data_extractor.py,sha256=dwhiDu-ISaXr8UI9I-CszZhB5BlUNmdDopjFZvMIXMw,2101
 phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
 phoenix/session/session.py,sha256=rjIuSSK2gAYIUPQTJc4E2ebew5o6I070FWRoFn4W3EI,26620
@@ -213,7 +216,7 @@ phoenix/trace/attributes.py,sha256=xSr2EvlkDS9Wiij7FywXzw7UmMAMLIARSEm_P686BKM,1
 phoenix/trace/errors.py,sha256=wB1z8qdPckngdfU-TORToekvg3344oNFAA83_hC2yFY,180
 phoenix/trace/evaluation_conventions.py,sha256=t8jydM3U0-T5YpiQKRJ3tWdWGlHtzKyttYdw-ddvPOk,1048
 phoenix/trace/exporter.py,sha256=eAYemdvDCHMugDJiaR29BFFMTQBdf3oerdkz34Cl3hE,4736
-phoenix/trace/fixtures.py,sha256=gBGFG2gkcBsSDzolzzR9AJDrB_fdOQfUaGgHV-EHdco,14204
+phoenix/trace/fixtures.py,sha256=tDso17oAYMBQ8GJsAlHFzrC0otXPzUOEh4TgIjEfB7M,14260
 phoenix/trace/otel.py,sha256=WA720jvRadiZBAKjsYoPyXzypHwbyEK2OZRVUwtbjB8,9976
 phoenix/trace/projects.py,sha256=2BwlNjFE-uwpqYtCu5YyBiYZk9wRPpM13vh3-Cv7GkA,2157
 phoenix/trace/schemas.py,sha256=Mjc6fD9OyeMnEk5wPPSbveqnNUYWK3p3BxpOvSGanHU,5950
@@ -239,13 +242,13 @@ phoenix/trace/v1/evaluation_pb2.pyi,sha256=cCbbx06gwQmaH14s3J1X25TtaARh-k1abbxQd
 phoenix/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/utilities/deprecation.py,sha256=cFuTVvjSYyRlrdxdJewjJVieIEHPk30BukSRGRydQ3k,1046
 phoenix/utilities/error_handling.py,sha256=7b5rpGFj9EWZ8yrZK1IHvxB89suWk3lggDayUQcvZds,1946
-phoenix/utilities/json.py,sha256=nuD1SbPm871tbt9Cz7V2NUmS9m0jWgcpRUx1YOV3NYQ,1954
+phoenix/utilities/json.py,sha256=y_w-McDfvlTeGJT28sCtyjzVkwFicakxERG-sGRc8Ak,1948
 phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,222
 phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
 phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
 phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arize_phoenix-4.4.4rc5.dist-info/METADATA,sha256=yT0gbMlPkiRkZeC8Yj_eLyaufriREVn3jxz5-qTKDjI,11012
-arize_phoenix-4.4.4rc5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-arize_phoenix-4.4.4rc5.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-4.4.4rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-4.4.4rc5.dist-info/RECORD,,
+arize_phoenix-4.4.4rc6.dist-info/METADATA,sha256=gyc5KyS4aFqefmGcezl1eC_8lCZ5DF0iHdSDh0V41f8,11337
+arize_phoenix-4.4.4rc6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+arize_phoenix-4.4.4rc6.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-4.4.4rc6.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-4.4.4rc6.dist-info/RECORD,,

phoenix/config.py CHANGED Viewed

@@ -233,4 +233,25 @@ def get_env_client_headers() -> Optional[Dict[str, str]]:
     return None
+def get_base_url() -> str:
+    host = get_env_host()
+    if host == "0.0.0.0":
+        host = "127.0.0.1"
+    base_url = get_env_collector_endpoint() or f"http://{host}:{get_env_port()}"
+    return base_url if base_url.endswith("/") else base_url + "/"
+def get_web_base_url() -> str:
+    """Return the web UI base URL.
+    Returns:
+        str: the web UI base URL
+    """
+    from phoenix.session.session import active_session
+    if session := active_session():
+        return session.url
+    return get_base_url()
 DEFAULT_PROJECT_NAME = "default"

phoenix/datetime_utils.py CHANGED Viewed

@@ -14,6 +14,10 @@ from pandas.core.dtypes.common import (
 _LOCAL_TIMEZONE = datetime.now(timezone.utc).astimezone().tzinfo
+def local_now() -> datetime:
+    return datetime.now(timezone.utc).astimezone(tz=_LOCAL_TIMEZONE)
 def normalize_datetime(
     dt: Optional[datetime],
     tz: Optional[tzinfo] = None,

phoenix/db/insertion/evaluation.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing_extensions import assert_never
 from phoenix.db import models
 from phoenix.db.helpers import SupportedSQLDialect, num_docs_col
-from phoenix.db.insertion.helpers import OnConflict, insert_stmt
+from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
 from phoenix.exceptions import PhoenixException
 from phoenix.trace import v1 as pb
@@ -91,7 +91,7 @@ async def _insert_trace_evaluation(
     set_.pop("metadata_")
     set_["metadata"] = values["metadata_"]  # `metadata` must match database
     await session.execute(
-        insert_stmt(
+        insert_on_conflict(
             dialect=dialect,
             table=models.TraceAnnotation,
             values=values,
@@ -139,7 +139,7 @@ async def _insert_span_evaluation(
     set_.pop("metadata_")
     set_["metadata"] = values["metadata_"]  # `metadata` must match database
     await session.execute(
-        insert_stmt(
+        insert_on_conflict(
             dialect=dialect,
             table=models.SpanAnnotation,
             values=values,
@@ -196,7 +196,7 @@ async def _insert_document_evaluation(
     set_.pop("metadata_")
     set_["metadata"] = values["metadata_"]  # `metadata` must match database
     await session.execute(
-        insert_stmt(
+        insert_on_conflict(
             dialect=dialect,
             table=models.DocumentAnnotation,
             values=values,

phoenix/db/insertion/helpers.py CHANGED Viewed

@@ -2,7 +2,7 @@ from abc import ABC
 from enum import Enum, auto
 from typing import Any, Awaitable, Callable, Mapping, Optional, Sequence
-from sqlalchemy import Insert, insert
+from sqlalchemy import Insert
 from sqlalchemy.dialects.postgresql import insert as insert_postgresql
 from sqlalchemy.dialects.sqlite import insert as insert_sqlite
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -25,26 +25,18 @@ class OnConflict(Enum):
     DO_UPDATE = auto()
-def insert_stmt(
+def insert_on_conflict(
     dialect: SupportedSQLDialect,
     table: Any,
     values: Mapping[str, Any],
-    constraint: Optional[str] = None,
-    column_names: Sequence[str] = (),
+    constraint: str,
+    column_names: Sequence[str],
     on_conflict: OnConflict = OnConflict.DO_NOTHING,
     set_: Optional[Mapping[str, Any]] = None,
 ) -> Insert:
     """
     Dialect specific insertion statement using ON CONFLICT DO syntax.
     """
-    if bool(constraint) != bool(column_names):
-        raise ValueError(
-            "Both `constraint` and `column_names` must be provided or omitted at the same time."
-        )
-    if (dialect is SupportedSQLDialect.POSTGRESQL and constraint is None) or (
-        dialect is SupportedSQLDialect.SQLITE and not column_names
-    ):
-        return insert(table).values(values)
     if dialect is SupportedSQLDialect.POSTGRESQL:
         stmt_postgresql = insert_postgresql(table).values(values)
         if on_conflict is OnConflict.DO_NOTHING or not set_:

phoenix/db/insertion/span.py CHANGED Viewed

@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from phoenix.db import models
 from phoenix.db.helpers import SupportedSQLDialect
-from phoenix.db.insertion.helpers import OnConflict, insert_stmt
+from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
 from phoenix.trace.attributes import get_attribute_value
 from phoenix.trace.schemas import Span, SpanStatusCode
@@ -27,7 +27,7 @@ async def insert_span(
 ) -> Optional[SpanInsertionEvent]:
     dialect = SupportedSQLDialect(session.bind.dialect.name)
     project_rowid = await session.scalar(
-        insert_stmt(
+        insert_on_conflict(
             dialect=dialect,
             table=models.Project,
             constraint="uq_projects_name",
@@ -87,7 +87,7 @@ async def insert_span(
         cumulative_llm_token_count_prompt += cast(int, accumulation[1] or 0)
         cumulative_llm_token_count_completion += cast(int, accumulation[2] or 0)
     span_rowid = await session.scalar(
-        insert_stmt(
+        insert_on_conflict(
             dialect=dialect,
             table=models.Span,
             constraint="uq_spans_span_id",

phoenix/db/models.py CHANGED Viewed

@@ -92,7 +92,7 @@ class UtcTimeStamp(TypeDecorator[datetime]):
 class ExperimentResult(TypedDict, total=False):
-    result: Dict[str, Any]
+    result: Any
 class Base(DeclarativeBase):

phoenix/experiments/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .functions import evaluate_experiment, run_experiment
+__all__ = [
+    "evaluate_experiment",
+    "run_experiment",
+]

phoenix/experiments/evaluators/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+from phoenix.experiments.evaluators.code_evaluators import (
+    ContainsAllKeywords,
+    ContainsAnyKeyword,
+    ContainsKeyword,
+    JSONParsable,
+    MatchesRegex,
+)
+from phoenix.experiments.evaluators.llm_evaluators import (
+    CoherenceEvaluator,
+    ConcisenessEvaluator,
+    HelpfulnessEvaluator,
+    LLMCriteriaEvaluator,
+    RelevanceEvaluator,
+)
+from phoenix.experiments.evaluators.utils import create_evaluator
+__all__ = [
+    "create_evaluator",
+    "ContainsAllKeywords",
+    "ContainsAnyKeyword",
+    "ContainsKeyword",
+    "JSONParsable",
+    "MatchesRegex",
+    "CoherenceEvaluator",
+    "ConcisenessEvaluator",
+    "LLMCriteriaEvaluator",
+    "HelpfulnessEvaluator",
+    "RelevanceEvaluator",
+]

phoenix/experiments/evaluators/base.py ADDED Viewed

@@ -0,0 +1,153 @@
+import functools
+import inspect
+from abc import ABC
+from types import MappingProxyType
+from typing import Any, Awaitable, Callable, Optional, Union
+from typing_extensions import TypeAlias
+from phoenix.experiments.evaluators.utils import validate_signature
+from phoenix.experiments.types import (
+    AnnotatorKind,
+    EvaluationResult,
+    EvaluatorKind,
+    EvaluatorName,
+    EvaluatorOutput,
+    ExampleInput,
+    ExampleMetadata,
+    ExampleOutput,
+    TaskOutput,
+)
+class Evaluator(ABC):
+    """
+    A helper super class to guide the implementation of an `Evaluator` object.
+    Subclasses must implement either the `evaluate` or `async_evaluate` method.
+    Implementing both methods is recommended, but not required.
+    This Class is intended to be subclassed, and should not be instantiated directly.
+    """
+    _kind: AnnotatorKind
+    _name: EvaluatorName
+    @functools.cached_property
+    def name(self) -> EvaluatorName:
+        if hasattr(self, "_name"):
+            return self._name
+        return self.__class__.__name__
+    @functools.cached_property
+    def kind(self) -> EvaluatorKind:
+        if hasattr(self, "_kind"):
+            return self._kind.value
+        return AnnotatorKind.CODE.value
+    def __new__(cls, *args: Any, **kwargs: Any) -> "Evaluator":
+        if cls is Evaluator:
+            raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
+        return object.__new__(cls)
+    def evaluate(
+        self,
+        *,
+        output: Optional[TaskOutput] = None,
+        expected: Optional[ExampleOutput] = None,
+        metadata: ExampleMetadata = MappingProxyType({}),
+        input: ExampleInput = MappingProxyType({}),
+        **kwargs: Any,
+    ) -> EvaluationResult:
+        # For subclassing, one should implement either this sync method or the
+        # async version. Implementing both is recommended but not required.
+        raise NotImplementedError
+    async def async_evaluate(
+        self,
+        *,
+        output: Optional[TaskOutput] = None,
+        expected: Optional[ExampleOutput] = None,
+        metadata: ExampleMetadata = MappingProxyType({}),
+        input: ExampleInput = MappingProxyType({}),
+        **kwargs: Any,
+    ) -> EvaluationResult:
+        # For subclassing, one should implement either this async method or the
+        # sync version. Implementing both is recommended but not required.
+        return self.evaluate(
+            output=output,
+            expected=expected,
+            metadata=metadata,
+            input=input,
+            **kwargs,
+        )
+    def __init_subclass__(cls, is_abstract: bool = False, **kwargs: Any) -> None:
+        super().__init_subclass__(**kwargs)
+        if is_abstract:
+            return
+        evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
+        for super_cls in inspect.getmro(cls):
+            if super_cls in (LLMEvaluator, Evaluator):
+                break
+            if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
+                assert callable(evaluate), "`evaluate()` method should be callable"
+                # need to remove the first param, i.e. `self`
+                _validate_sig(functools.partial(evaluate, None), "evaluate")
+                return
+            if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
+                assert callable(async_evaluate), "`async_evaluate()` method should be callable"
+                # need to remove the first param, i.e. `self`
+                _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
+                return
+        raise ValueError(
+            f"Evaluator must implement either "
+            f"`def evaluate{evaluate_fn_signature}` or "
+            f"`async def async_evaluate{evaluate_fn_signature}`"
+        )
+def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
+    sig = inspect.signature(fn)
+    validate_signature(sig)
+    for param in sig.parameters.values():
+        if param.kind is inspect.Parameter.VAR_KEYWORD:
+            return
+    else:
+        raise ValueError(f"`{fn_name}` should allow variadic keyword arguments `**kwargs`")
+class CodeEvaluator(Evaluator, ABC, is_abstract=True):
+    """
+    A convenience super class for defining code evaluators.
+    This class is intended to be subclassed, and should not be instantiated directly.
+    """
+    _kind = AnnotatorKind.CODE
+    def __new__(cls, *args: Any, **kwargs: Any) -> "CodeEvaluator":
+        if cls is CodeEvaluator:
+            raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
+        return object.__new__(cls)
+class LLMEvaluator(Evaluator, ABC, is_abstract=True):
+    """
+    A convenience super class for defining LLM evaluators.
+    This class is intended to be subclassed, and should not be instantiated directly.
+    """
+    _kind = AnnotatorKind.LLM
+    def __new__(cls, *args: Any, **kwargs: Any) -> "LLMEvaluator":
+        if cls is LLMEvaluator:
+            raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
+        return object.__new__(cls)
+ExperimentEvaluator: TypeAlias = Union[
+    Evaluator,
+    Callable[..., EvaluatorOutput],
+    Callable[..., Awaitable[EvaluatorOutput]],
+]

phoenix/{datasets → experiments}/evaluators/code_evaluators.py RENAMED Viewed

@@ -4,11 +4,11 @@ import json
 import re
 from typing import Any, List, Optional, Union
-from phoenix.datasets.evaluators.utils import Evaluator
-from phoenix.datasets.types import EvaluationResult, TaskOutput
+from phoenix.experiments.evaluators.base import CodeEvaluator
+from phoenix.experiments.types import EvaluationResult, TaskOutput
-class JSONParsable(Evaluator):
+class JSONParsable(CodeEvaluator):
     def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
         assert isinstance(output, str), "Experiment run output must be a string"
         try:
@@ -21,7 +21,7 @@ class JSONParsable(Evaluator):
         )
-class ContainsKeyword(Evaluator):
+class ContainsKeyword(CodeEvaluator):
     def __init__(self, keyword: str, name: Optional[str] = None) -> None:
         self.keyword = keyword
         self._name = name or f"Contains({repr(keyword)})"
@@ -38,7 +38,7 @@ class ContainsKeyword(Evaluator):
         )
-class ContainsAnyKeyword(Evaluator):
+class ContainsAnyKeyword(CodeEvaluator):
     def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
         self.keywords = keywords
         self._name = name or f"ContainsAny({keywords})"
@@ -56,7 +56,7 @@ class ContainsAnyKeyword(Evaluator):
         )
-class ContainsAllKeywords(Evaluator):
+class ContainsAllKeywords(CodeEvaluator):
     def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
         self.keywords = keywords
         self._name = name or f"ContainsAll({keywords})"
@@ -76,7 +76,7 @@ class ContainsAllKeywords(Evaluator):
         )
-class MatchesRegex(Evaluator):
+class MatchesRegex(CodeEvaluator):
     def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
         if isinstance(pattern, str):
             pattern = re.compile(pattern)

phoenix/{datasets → experiments}/evaluators/llm_evaluators.py RENAMED Viewed

@@ -2,19 +2,19 @@ import re
 from types import MappingProxyType
 from typing import Any, Callable, Optional, Type
-from phoenix.datasets.evaluators.utils import (
-    ExampleInput,
-    ExampleMetadata,
+from phoenix.evals.models.base import BaseModel as LLMBaseModel
+from phoenix.evals.utils import snap_to_rail
+from phoenix.experiments.evaluators.base import (
     ExperimentEvaluator,
     LLMEvaluator,
-    _unwrap_json,
 )
-from phoenix.datasets.types import (
+from phoenix.experiments.evaluators.utils import unwrap_json
+from phoenix.experiments.types import (
     EvaluationResult,
+    ExampleInput,
+    ExampleMetadata,
     TaskOutput,
 )
-from phoenix.evals.models.base import BaseModel as LLMBaseModel
-from phoenix.evals.utils import snap_to_rail
 class LLMCriteriaEvaluator(LLMEvaluator):
@@ -62,7 +62,7 @@ class LLMCriteriaEvaluator(LLMEvaluator):
     def _format_eval_template(self, output: TaskOutput) -> str:
         assert output is not None
-        result = _unwrap_json(output)
+        result = unwrap_json(output)
         return self.template.format(text=str(result))
     def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -217,7 +217,7 @@ class RelevanceEvaluator(LLMEvaluator):
         self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
     ) -> str:
         assert output is not None
-        return str(_unwrap_json(output))
+        return str(unwrap_json(output))
     def evaluate(
         self,

arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.4.4rc5py3-none-any.whl → 4.4.4rc6py3-none-any.whl