PyPI - arize-phoenix - Versions diffs - 4.4.4rc4__py3-none-any.whl → 4.4.4rc5__py3-none-any.whl - Mend

arize-phoenix 4.4.4rc4py3-none-any.whl → 4.4.4rc5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (31) hide show

{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/METADATA +2 -2
{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/RECORD +30 -28
phoenix/datasets/evaluators/code_evaluators.py +25 -53
phoenix/datasets/evaluators/llm_evaluators.py +63 -32
phoenix/datasets/evaluators/utils.py +292 -0
phoenix/datasets/experiments.py +147 -82
phoenix/datasets/tracing.py +19 -0
phoenix/datasets/types.py +18 -52
phoenix/db/insertion/dataset.py +19 -16
phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
phoenix/db/models.py +8 -3
phoenix/server/api/context.py +2 -0
phoenix/server/api/dataloaders/__init__.py +2 -0
phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
phoenix/server/api/helpers/dataset_helpers.py +8 -7
phoenix/server/api/input_types/ClearProjectInput.py +15 -0
phoenix/server/api/mutations/project_mutations.py +9 -4
phoenix/server/api/routers/v1/datasets.py +146 -42
phoenix/server/api/routers/v1/experiment_evaluations.py +1 -0
phoenix/server/api/routers/v1/experiment_runs.py +2 -2
phoenix/server/api/types/Experiment.py +5 -0
phoenix/server/api/types/ExperimentRun.py +1 -1
phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
phoenix/server/app.py +2 -0
phoenix/server/static/index.js +610 -564
phoenix/session/client.py +124 -2
phoenix/version.py +1 -1
phoenix/datasets/evaluators/_utils.py +0 -13
{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/WHEEL +0 -0
{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/LICENSE +0 -0

{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arize-phoenix
-Version: 4.4.4rc4
+Version: 4.4.4rc5
 Summary: AI Observability and Evaluation
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -31,7 +31,7 @@ Requires-Dist: openinference-instrumentation
 Requires-Dist: openinference-instrumentation-langchain>=0.1.12
 Requires-Dist: openinference-instrumentation-llama-index>=1.2.0
 Requires-Dist: openinference-instrumentation-openai>=0.1.4
-Requires-Dist: openinference-semantic-conventions>=0.1.5
+Requires-Dist: openinference-semantic-conventions>=0.1.9
 Requires-Dist: opentelemetry-exporter-otlp
 Requires-Dist: opentelemetry-proto>=1.12.0
 Requires-Dist: opentelemetry-sdk

{arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/RECORD RENAMED Viewed

@@ -5,20 +5,20 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
 phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
-phoenix/version.py,sha256=NZ2gYPUT2LKOK3V9-dZJ34v1J27mnLmDtx-pKAXd1W0,25
+phoenix/version.py,sha256=-Vg_bLotyeJdv0gFqG5-A64nsG-6AR0xZSp3sDDsV_w,25
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
 phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
 phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
 phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
 phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/datasets/experiments.py,sha256=MhuhJWJ-bBqZ_aR3FewudEeo6RUrLgm0hmDlGjWVsrU,19314
-phoenix/datasets/tracing.py,sha256=Ieb2Uo-9qHpmv65uf1VsFSsWo5Yxj6VHwGS6dxu9NHQ,2248
-phoenix/datasets/types.py,sha256=w0KoSP7AdlcFlV3I6qVtvKOOWoK0yiY6_s4CvH0flcs,5753
+phoenix/datasets/experiments.py,sha256=RzZezHQcTpPcr7gY9rGtoYlfoesFNhNV7EO5f_oHNFk,21198
+phoenix/datasets/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
+phoenix/datasets/types.py,sha256=N17mnnVwmu1k3bnmbyROPt_6TxPaZY_QkOZmCOR5_jE,4835
 phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
-phoenix/datasets/evaluators/_utils.py,sha256=-MaNdoN1hA3FLzLyIDplUUkUtmM56BMIV83Gh-sgAsU,436
-phoenix/datasets/evaluators/code_evaluators.py,sha256=fwoKfyHD7_xBaHY8Ax78xcry7PtB8Y1FxIn82guAV5M,4640
-phoenix/datasets/evaluators/llm_evaluators.py,sha256=Ghg3bIBtQCdd6LuQ6VdcbkNQKI9ouZXwjlJV5GcdxOg,8675
+phoenix/datasets/evaluators/code_evaluators.py,sha256=DdCcAi274t_TLs_aARd-GmWWpJrxVeNEAegMFEAfe0E,3894
+phoenix/datasets/evaluators/llm_evaluators.py,sha256=aVfAHOWhskBiy0IVeq_ACTs7B37uXTTtDoNBS0XenIc,9165
+phoenix/datasets/evaluators/utils.py,sha256=S7OGrb1sBWg5l9K35X29OKJe5wZ3k7xMhxJBclzxta0,10452
 phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
 phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
 phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
@@ -26,9 +26,9 @@ phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,1
 phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
 phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
 phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
-phoenix/db/models.py,sha256=zzZHXh1NpS3LyOOFp1BS7aVyrU1Qx3gcBY-H8ouoyjg,20282
+phoenix/db/models.py,sha256=lYzI3tCDUl8njXb3Vf3R8e6y56-MErprjjfBE-o9Kao,20419
 phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/db/insertion/dataset.py,sha256=2aBOTgjwRkmJqjE1FEQp7BTu1Jz4-bS1bKyeJgvSxfg,7305
+phoenix/db/insertion/dataset.py,sha256=_vxy5e6W5jEuvO2fMKbbNCn9JvHkwI4LRKk_10eKFVg,7171
 phoenix/db/insertion/evaluation.py,sha256=fAerUy3QGf2wID_tiVmPvzxBDFGiONPl3pmpZDgJDWQ,7183
 phoenix/db/insertion/helpers.py,sha256=7tf6qQyJ05nn3IXaZEpj2b4Jz5boGLWT8tzlMaJ9tQY,2337
 phoenix/db/insertion/span.py,sha256=DNBjSrx5g2W5KuTB1dkHwtkb0SFnMIxN1jB-BAdGKFY,5634
@@ -36,7 +36,7 @@ phoenix/db/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
 phoenix/db/migrations/env.py,sha256=QbzB5zrRs6XQQmrYeUpuzeilcMlM-MsbaAgHHYcIHTI,3626
 phoenix/db/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
 phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgCE,605
-phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=RapdD9Sud_Gq45Vpz7VnDQB_toG6B6yHlwS93qAh_0c,8133
+phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
 phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
 phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
@@ -58,18 +58,18 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
 phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
 phoenix/pointcloud/umap_parameters.py,sha256=lJsEOrbSuSiqI7g4Yt6xj7kgYxEqoep4ZHWLr6VWBqw,1760
 phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/server/app.py,sha256=_D2DgupKJHv8DmS6VgWxvygumSM75qdlDg6qSj61PRU,18227
+phoenix/server/app.py,sha256=LQrHWt5HG_pWqnR9Ozb3-vnAGiiRGuZ3uV_9-886Yxw,18340
 phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
 phoenix/server/main.py,sha256=mtzH_2Kyvuy3AHiiKfqiCdUQ6SGFzeT4q9fefbV6GLg,11114
 phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
 phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
 phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
 phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/server/api/context.py,sha256=GfAD9QHg5erKwYGpqDj_8bL2GwmccARDZQc8yO-4Fm0,2669
+phoenix/server/api/context.py,sha256=jb69SVdb5hpVbfM4U0pZi4sGa2a-0VKOJWcBjjS7l4s,2761
 phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
 phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
 phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
-phoenix/server/api/dataloaders/__init__.py,sha256=9fFjDNlCtOYTOKJi0uPIRh7xJMpCnrBOhoWGEdv1BrI,4618
+phoenix/server/api/dataloaders/__init__.py,sha256=urbG3M-k2cpj2ymMLYQ28tzIXAG1edECxM-tJ22ylqE,4720
 phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
 phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
 phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
@@ -78,6 +78,7 @@ phoenix/server/api/dataloaders/document_retrieval_metrics.py,sha256=8tZYMNLZ7zxU
 phoenix/server/api/dataloaders/evaluation_summaries.py,sha256=z9aal3IQL_t30aNqpAS7x4tjq0xNkuEG8dWW-bhqZmo,5724
 phoenix/server/api/dataloaders/experiment_annotation_summaries.py,sha256=RsQ-o84kWVTYgIlh9VKkyw2kDMWIlHCRpS7RE2aw9vs,2881
 phoenix/server/api/dataloaders/experiment_error_rates.py,sha256=EHlTdZi8F94vo-qJUcnnXFvuSh_d0fTT0Xg4SfW_A70,1397
+phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=wxHv08aZELJ91KTjHdt_x33M3wGDDa9GfbFHeRyOyGk,1343
 phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
 phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
 phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
@@ -92,9 +93,10 @@ phoenix/server/api/dataloaders/trace_row_ids.py,sha256=yAWuVFWUjDdmmwfXsGs_l6LuG
 phoenix/server/api/dataloaders/cache/__init__.py,sha256=SYoOM9n8FJaMdQarma5d1blu-jIg2GB8Shqg5ezSzZ8,106
 phoenix/server/api/dataloaders/cache/two_tier_cache.py,sha256=I38L1RsOis98OQftE7n1Q9QBZfFJO6OW_qIINkuJllo,2295
 phoenix/server/api/helpers/__init__.py,sha256=_V1eVkchZmTkhOfRC4QqR1sUB2xtIxdsMJkDouZq_IE,251
-phoenix/server/api/helpers/dataset_helpers.py,sha256=kIo_kPrV8O40CUypB57JCB5Ek3GJmZXPlz6NIULIsSM,6875
+phoenix/server/api/helpers/dataset_helpers.py,sha256=A6UzEyAb4gFtyc_AV63_yl9OpN0vn8Vw1BBCTNjg9J0,6875
 phoenix/server/api/input_types/AddExamplesToDatasetInput.py,sha256=ZGXMV0H3DYHi4DdqGhejDzaWdFinyem1Mc8DVA7iCh0,436
 phoenix/server/api/input_types/AddSpansToDatasetInput.py,sha256=C4oZ0WqYqca1kleNOCMIM2_aY6Qnc5n1xXG51_C1V0w,368
+phoenix/server/api/input_types/ClearProjectInput.py,sha256=cpPFRyQ3ffy2dLbCZgYpway-mCzhdm4QqnUg8caOBfQ,382
 phoenix/server/api/input_types/ClusterInput.py,sha256=EL4ftvZxQ8mVdruUPcdhMhByORmSmM8S-X6RPqU6GX0,179
 phoenix/server/api/input_types/Coordinates.py,sha256=meTwbIjwTfqx5DGD2DBlH9wQzdQVNM5a8x9dp1FfIgA,173
 phoenix/server/api/input_types/CreateDatasetInput.py,sha256=Q3MwouIx9jTQBRWDju75iMQXEGJCrL4aD4ESQp771nc,248
@@ -119,7 +121,7 @@ phoenix/server/api/mutations/auth.py,sha256=vPRFoj7J6PV6QeODewG4K0PhoOebS5AfMRpb
 phoenix/server/api/mutations/dataset_mutations.py,sha256=Zp2sFWyGyubILUQboR6bafRWafsfeRO2ffUWnkLlfgI,22532
 phoenix/server/api/mutations/experiment_mutations.py,sha256=Fw_yEdITGJ6A33M5JZ-2YnBTDoBqZUUFON6vy8JoVjE,2569
 phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
-phoenix/server/api/mutations/project_mutations.py,sha256=6A7BS3651iaeAwUszKXQB3NK4QJY_tGpALBMNw1bqp8,2021
+phoenix/server/api/mutations/project_mutations.py,sha256=3SVDCZqxB0Iv60cOwBL8c-rY3QUUPs8PXbp-C_K1mWY,2267
 phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjGwLXfw,153
 phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
@@ -127,10 +129,10 @@ phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
 phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
 phoenix/server/api/routers/v1/__init__.py,sha256=B5eSaylPI7MoYia1-VgKrU8rDi-69r_hRwPU5yMLUTE,2808
 phoenix/server/api/routers/v1/dataset_examples.py,sha256=wtplRUv2ee9xGTrcEMgTn-7L4NX_73IcwUXkCMZEFc4,6726
-phoenix/server/api/routers/v1/datasets.py,sha256=2wkBOLqo8ttSN1VNVEcnPcLCitkSLGp62AjWlxJhV4Y,27605
+phoenix/server/api/routers/v1/datasets.py,sha256=ws2Guou9mspwFx3-cBFZoD2VuTwWGoFZmtt2Sr3zg6k,31516
 phoenix/server/api/routers/v1/evaluations.py,sha256=rwSVg-rpujhsMcDVFt-VAr0Ix9TgvLcY_bSxeh8PzJI,9241
-phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xemnZ10WL5pErndP7jbaDipUj3Tkl813XSIjx7X5MBY,2656
-phoenix/server/api/routers/v1/experiment_runs.py,sha256=o6IvcyFDY-cy3KqeO9FIKy3XAgbIJhx7SFUoxML-MeY,4337
+phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xhrkPUc_4ncIBm24aUyzu47UU0CN1tGlbisn-oLqt_Y,2702
+phoenix/server/api/routers/v1/experiment_runs.py,sha256=0AUNHA5nvpGDeoJUGK8VxP2TFN3iPwhMW3D9QmHstPk,4399
 phoenix/server/api/routers/v1/experiments.py,sha256=5Rh7q6sHswmk11PZSJ7KMrtqfIE16X_xSKkKSASK9-I,7251
 phoenix/server/api/routers/v1/spans.py,sha256=FEnmlRPBPl71BSGNBuPrz14fk8nmxJQYsKECdDbdUdw,3977
 phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
@@ -157,11 +159,11 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
 phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
 phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
 phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
-phoenix/server/api/types/Experiment.py,sha256=Lon2ZNZYdWXQmj3nLr_TXN8CCtZtC-AXYfyJuoqI2DM,4692
+phoenix/server/api/types/Experiment.py,sha256=Cs0EKhVLI5l5LKFI0hQA-ekZuaiJcOHT88JGFBa2deU,4906
 phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
 phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
-phoenix/server/api/types/ExperimentRun.py,sha256=uM7HxaC8nEjtO7yLr8WjLEfYRvEvbX6ibR8I0fVzdeU,2976
-phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=GvWY6wukBhSr2Tk9Ef0R5bH5yCMxVakqeypoyYoUb6o,1774
+phoenix/server/api/types/ExperimentRun.py,sha256=8jUIi3ApVCqQHwnYe59CYhrmh5iZ6-QmlH5WpF7UWtM,2990
+phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=zGstMbS5OxNikEhD8VouY7Ls7YbxKm-0EmqvGeY3-DI,1773
 phoenix/server/api/types/ExportedFile.py,sha256=e3GTn7B5LgsTbqiwjhMCQH7VsiqXitrBO4aCMS1lHsg,163
 phoenix/server/api/types/Functionality.py,sha256=tzV9xdhB8zqfsjWxP66NDC7EZsplYkYO7jRbLWJIeeg,382
 phoenix/server/api/types/Inferences.py,sha256=HWuDZZrXPWVoEy_pA3bRsAOUYsCKgAxf9zshasGqu5Y,3403
@@ -197,12 +199,12 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
-phoenix/server/static/index.js,sha256=n8qF_l7ijW-7E8m63oViD8SpXOYjN3wvZUhgB8H6ZLo,3489949
+phoenix/server/static/index.js,sha256=I9Y8svcPruUrXklKcZUxFz5HfLB0vOwczYLSwLAs_04,3500011
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
 phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
 phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/session/client.py,sha256=tq2qghwYa_mxNYLD41UNfD7n57msB9lYIe5H16lBqoo,20333
+phoenix/session/client.py,sha256=R7dV38yjkIQa522nhG6jhDllWcXft2JJ7RlcPYpqiiQ,24846
 phoenix/session/data_extractor.py,sha256=dwhiDu-ISaXr8UI9I-CszZhB5BlUNmdDopjFZvMIXMw,2101
 phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
 phoenix/session/session.py,sha256=rjIuSSK2gAYIUPQTJc4E2ebew5o6I070FWRoFn4W3EI,26620
@@ -242,8 +244,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
 phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
 phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
 phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arize_phoenix-4.4.4rc4.dist-info/METADATA,sha256=YEUoxXSRba4zRgzM8-lcq7TIp9GNPZSjY_QGoyIJN-w,11012
-arize_phoenix-4.4.4rc4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-arize_phoenix-4.4.4rc4.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-4.4.4rc4.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-4.4.4rc4.dist-info/RECORD,,
+arize_phoenix-4.4.4rc5.dist-info/METADATA,sha256=yT0gbMlPkiRkZeC8Yj_eLyaufriREVn3jxz5-qTKDjI,11012
+arize_phoenix-4.4.4rc5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+arize_phoenix-4.4.4rc5.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-4.4.4rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-4.4.4rc5.dist-info/RECORD,,

phoenix/datasets/evaluators/code_evaluators.py CHANGED Viewed

@@ -2,19 +2,14 @@ from __future__ import annotations
 import json
 import re
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import Any, List, Optional, Union
-from phoenix.datasets.evaluators._utils import _unwrap_json
-from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
+from phoenix.datasets.evaluators.utils import Evaluator
+from phoenix.datasets.types import EvaluationResult, TaskOutput
-class JSONParsable:
-    annotator_kind = "CODE"
-    name = "JSONParsable"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        output = _unwrap_json(exp_run.output.result)
+class JSONParsable(Evaluator):
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
         assert isinstance(output, str), "Experiment run output must be a string"
         try:
             json.loads(output)
@@ -26,18 +21,14 @@ class JSONParsable:
         )
-class ContainsKeyword:
-    annotator_kind = "CODE"
+class ContainsKeyword(Evaluator):
     def __init__(self, keyword: str, name: Optional[str] = None) -> None:
         self.keyword = keyword
-        self.name = name or f"Contains({repr(keyword)})"
+        self._name = name or f"Contains({repr(keyword)})"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        result = _unwrap_json(exp_run.output.result)
-        assert isinstance(result, str), "Experiment run output must be a string"
-        found = self.keyword in result
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
+        assert isinstance(output, str), "Experiment run output must be a string"
+        found = self.keyword in output
         return EvaluationResult(
             score=float(found),
             explanation=(
@@ -47,18 +38,14 @@ class ContainsKeyword:
         )
-class ContainsAnyKeyword:
-    annotator_kind = "CODE"
+class ContainsAnyKeyword(Evaluator):
     def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
         self.keywords = keywords
-        self.name = name or f"ContainsAny({keywords})"
+        self._name = name or f"ContainsAny({keywords})"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        result = _unwrap_json(exp_run.output.result)
-        assert isinstance(result, str), "Experiment run output must be a string"
-        found = [keyword for keyword in self.keywords if keyword in result]
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
+        assert isinstance(output, str), "Experiment run output must be a string"
+        found = [keyword for keyword in self.keywords if keyword in output]
         if found:
             explanation = f"the keywords {found} were found in the output"
         else:
@@ -69,18 +56,14 @@ class ContainsAnyKeyword:
         )
-class ContainsAllKeywords:
-    annotator_kind = "CODE"
+class ContainsAllKeywords(Evaluator):
     def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
         self.keywords = keywords
-        self.name = name or f"ContainsAll({keywords})"
+        self._name = name or f"ContainsAll({keywords})"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        result = _unwrap_json(exp_run.output.result)
-        assert isinstance(result, str), "Experiment run output must be a string"
-        not_found = [keyword for keyword in self.keywords if keyword not in result]
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
+        assert isinstance(output, str), "Experiment run output must be a string"
+        not_found = [keyword for keyword in self.keywords if keyword not in output]
         if not_found:
             contains_all = False
             explanation = f"the keywords {not_found} were not found in the output"
@@ -93,21 +76,17 @@ class ContainsAllKeywords:
         )
-class MatchesRegex:
-    annotator_kind = "CODE"
+class MatchesRegex(Evaluator):
     def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
         if isinstance(pattern, str):
             pattern = re.compile(pattern)
         self.pattern = pattern
         assert isinstance(pattern, re.Pattern)
-        self.name = name or f"matches_({pattern})"
+        self._name = name or f"matches_({pattern})"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        result = _unwrap_json(exp_run.output.result)
-        assert isinstance(result, str), "Experiment run output must be a string"
-        matches = self.pattern.findall(result)
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
+        assert isinstance(output, str), "Experiment run output must be a string"
+        matches = self.pattern.findall(output)
         if matches:
             explanation = (
                 f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
@@ -118,10 +97,3 @@ class MatchesRegex:
             score=float(bool(matches)),
             explanation=explanation,
         )
-# Someday we'll do typing checking in unit tests.
-if TYPE_CHECKING:
-    _: ExperimentEvaluator
-    _ = JSONParsable()
-    _ = ContainsKeyword("test")

phoenix/datasets/evaluators/llm_evaluators.py CHANGED Viewed

@@ -1,14 +1,23 @@
 import re
-from typing import Callable, Optional, Type
-from phoenix.datasets.evaluators._utils import _unwrap_json
-from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
+from types import MappingProxyType
+from typing import Any, Callable, Optional, Type
+from phoenix.datasets.evaluators.utils import (
+    ExampleInput,
+    ExampleMetadata,
+    ExperimentEvaluator,
+    LLMEvaluator,
+    _unwrap_json,
+)
+from phoenix.datasets.types import (
+    EvaluationResult,
+    TaskOutput,
+)
 from phoenix.evals.models.base import BaseModel as LLMBaseModel
 from phoenix.evals.utils import snap_to_rail
-class LLMCriteriaEvaluator:
-    annotator_kind = "LLM"
+class LLMCriteriaEvaluator(LLMEvaluator):
     _base_template = (
         "Determine if the following text is {criteria}. {description}"
         "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -37,21 +46,23 @@ class LLMCriteriaEvaluator:
         self.criteria = criteria
         self.description = description
         self.template = self._format_base_template(self.criteria, self.description)
-        self.name = name
+        self._name = name
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        formatted_template = self._format_eval_template(exp_run)
+    def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
+        formatted_template = self._format_eval_template(output)
         unparsed_response = self.model._generate(formatted_template)
         return self._parse_eval_output(unparsed_response)
-    async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        formatted_template = self._format_eval_template(exp_run)
+    async def async_evaluate(
+        self, *, output: Optional[TaskOutput] = None, **_: Any
+    ) -> EvaluationResult:
+        formatted_template = self._format_eval_template(output)
         unparsed_response = await self.model._async_generate(formatted_template)
         return self._parse_eval_output(unparsed_response)
-    def _format_eval_template(self, experiment_run: ExperimentRun) -> str:
-        assert experiment_run.output is not None
-        result = _unwrap_json(experiment_run.output.result)
+    def _format_eval_template(self, output: TaskOutput) -> str:
+        assert output is not None
+        result = _unwrap_json(output)
         return self.template.format(text=str(result))
     def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -137,8 +148,7 @@ def _parse_label_from_explanation(raw_string: str) -> str:
     return raw_string
-class RelevanceEvaluator:
-    annotator_kind = "LLM"
+class RelevanceEvaluator(LLMEvaluator):
     template = (
         "Determine if the following response is relevant to the query. In this context, "
         "'relevance' means that the response directly addresses the core question or topic of the "
@@ -162,19 +172,24 @@ class RelevanceEvaluator:
     def __init__(
         self,
         model: LLMBaseModel,
-        get_query: Optional[Callable[[Example, ExperimentRun], str]] = None,
-        get_response: Optional[Callable[[Example, ExperimentRun], str]] = None,
+        get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
+        get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
         name: str = "RelevanceEvaluator",
     ):
         self.model = model
-        self.name = name
+        self._name = name
         self.get_query = get_query or self._default_get_query
         self.get_response = get_response or self._default_get_response
-    def _format_eval_template(self, example: Example, experiment_run: ExperimentRun) -> str:
-        assert experiment_run.output is not None
-        query = self.get_query(example, experiment_run)
-        response = self.get_response(example, experiment_run)
+    def _format_eval_template(
+        self,
+        output: Optional[TaskOutput] = None,
+        input: ExampleInput = MappingProxyType({}),
+        metadata: ExampleMetadata = MappingProxyType({}),
+    ) -> str:
+        assert output is not None
+        query = self.get_query(input, metadata)
+        response = self.get_response(output, metadata)
         return self.template.format(query=query, response=response)
     def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -195,19 +210,35 @@ class RelevanceEvaluator:
             metadata={},
         )
-    def _default_get_query(self, example: Example, experiment_run: ExperimentRun) -> str:
-        return str(example.input)
+    def _default_get_query(self, input: ExampleInput, *args: Any, **kwargs: Any) -> str:
+        return str(input)
-    def _default_get_response(self, example: Example, experiment_run: ExperimentRun) -> str:
-        assert experiment_run.output is not None
-        return str(_unwrap_json(experiment_run.output.result))
+    def _default_get_response(
+        self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
+    ) -> str:
+        assert output is not None
+        return str(_unwrap_json(output))
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        formatted_template = self._format_eval_template(example, exp_run)
+    def evaluate(
+        self,
+        *,
+        output: Optional[TaskOutput] = None,
+        metadata: ExampleMetadata = MappingProxyType({}),
+        input: ExampleInput = MappingProxyType({}),
+        **_: Any,
+    ) -> EvaluationResult:
+        formatted_template = self._format_eval_template(output, input, metadata)
         unparsed_response = self.model._generate(formatted_template)
         return self._parse_eval_output(unparsed_response)
-    async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        formatted_template = self._format_eval_template(example, exp_run)
+    async def async_evaluate(
+        self,
+        *,
+        output: Optional[TaskOutput] = None,
+        metadata: ExampleMetadata = MappingProxyType({}),
+        input: ExampleInput = MappingProxyType({}),
+        **_: Any,
+    ) -> EvaluationResult:
+        formatted_template = self._format_eval_template(output, input, metadata)
         unparsed_response = await self.model._async_generate(formatted_template)
         return self._parse_eval_output(unparsed_response)

arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc5__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.4.4rc4py3-none-any.whl → 4.4.4rc5py3-none-any.whl