arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (31) hide show
  1. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/METADATA +2 -2
  2. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/RECORD +30 -28
  3. phoenix/datasets/evaluators/code_evaluators.py +25 -53
  4. phoenix/datasets/evaluators/llm_evaluators.py +63 -32
  5. phoenix/datasets/evaluators/utils.py +292 -0
  6. phoenix/datasets/experiments.py +147 -82
  7. phoenix/datasets/tracing.py +19 -0
  8. phoenix/datasets/types.py +18 -52
  9. phoenix/db/insertion/dataset.py +19 -16
  10. phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
  11. phoenix/db/models.py +8 -3
  12. phoenix/server/api/context.py +2 -0
  13. phoenix/server/api/dataloaders/__init__.py +2 -0
  14. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  15. phoenix/server/api/helpers/dataset_helpers.py +8 -7
  16. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  17. phoenix/server/api/mutations/project_mutations.py +9 -4
  18. phoenix/server/api/routers/v1/datasets.py +146 -42
  19. phoenix/server/api/routers/v1/experiment_evaluations.py +1 -0
  20. phoenix/server/api/routers/v1/experiment_runs.py +2 -2
  21. phoenix/server/api/types/Experiment.py +5 -0
  22. phoenix/server/api/types/ExperimentRun.py +1 -1
  23. phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
  24. phoenix/server/app.py +2 -0
  25. phoenix/server/static/index.js +610 -564
  26. phoenix/session/client.py +124 -2
  27. phoenix/version.py +1 -1
  28. phoenix/datasets/evaluators/_utils.py +0 -13
  29. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/WHEEL +0 -0
  30. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/IP_NOTICE +0 -0
  31. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.4.4rc4
3
+ Version: 4.4.4rc5
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -31,7 +31,7 @@ Requires-Dist: openinference-instrumentation
31
31
  Requires-Dist: openinference-instrumentation-langchain>=0.1.12
32
32
  Requires-Dist: openinference-instrumentation-llama-index>=1.2.0
33
33
  Requires-Dist: openinference-instrumentation-openai>=0.1.4
34
- Requires-Dist: openinference-semantic-conventions>=0.1.5
34
+ Requires-Dist: openinference-semantic-conventions>=0.1.9
35
35
  Requires-Dist: opentelemetry-exporter-otlp
36
36
  Requires-Dist: opentelemetry-proto>=1.12.0
37
37
  Requires-Dist: opentelemetry-sdk
@@ -5,20 +5,20 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
5
5
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
7
7
  phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
8
- phoenix/version.py,sha256=NZ2gYPUT2LKOK3V9-dZJ34v1J27mnLmDtx-pKAXd1W0,25
8
+ phoenix/version.py,sha256=-Vg_bLotyeJdv0gFqG5-A64nsG-6AR0xZSp3sDDsV_w,25
9
9
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
11
11
  phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
12
12
  phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
13
13
  phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
14
14
  phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- phoenix/datasets/experiments.py,sha256=MhuhJWJ-bBqZ_aR3FewudEeo6RUrLgm0hmDlGjWVsrU,19314
16
- phoenix/datasets/tracing.py,sha256=Ieb2Uo-9qHpmv65uf1VsFSsWo5Yxj6VHwGS6dxu9NHQ,2248
17
- phoenix/datasets/types.py,sha256=w0KoSP7AdlcFlV3I6qVtvKOOWoK0yiY6_s4CvH0flcs,5753
15
+ phoenix/datasets/experiments.py,sha256=RzZezHQcTpPcr7gY9rGtoYlfoesFNhNV7EO5f_oHNFk,21198
16
+ phoenix/datasets/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
17
+ phoenix/datasets/types.py,sha256=N17mnnVwmu1k3bnmbyROPt_6TxPaZY_QkOZmCOR5_jE,4835
18
18
  phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
19
- phoenix/datasets/evaluators/_utils.py,sha256=-MaNdoN1hA3FLzLyIDplUUkUtmM56BMIV83Gh-sgAsU,436
20
- phoenix/datasets/evaluators/code_evaluators.py,sha256=fwoKfyHD7_xBaHY8Ax78xcry7PtB8Y1FxIn82guAV5M,4640
21
- phoenix/datasets/evaluators/llm_evaluators.py,sha256=Ghg3bIBtQCdd6LuQ6VdcbkNQKI9ouZXwjlJV5GcdxOg,8675
19
+ phoenix/datasets/evaluators/code_evaluators.py,sha256=DdCcAi274t_TLs_aARd-GmWWpJrxVeNEAegMFEAfe0E,3894
20
+ phoenix/datasets/evaluators/llm_evaluators.py,sha256=aVfAHOWhskBiy0IVeq_ACTs7B37uXTTtDoNBS0XenIc,9165
21
+ phoenix/datasets/evaluators/utils.py,sha256=S7OGrb1sBWg5l9K35X29OKJe5wZ3k7xMhxJBclzxta0,10452
22
22
  phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
23
23
  phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
24
24
  phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
@@ -26,9 +26,9 @@ phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,1
26
26
  phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
27
27
  phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
28
28
  phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
29
- phoenix/db/models.py,sha256=zzZHXh1NpS3LyOOFp1BS7aVyrU1Qx3gcBY-H8ouoyjg,20282
29
+ phoenix/db/models.py,sha256=lYzI3tCDUl8njXb3Vf3R8e6y56-MErprjjfBE-o9Kao,20419
30
30
  phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- phoenix/db/insertion/dataset.py,sha256=2aBOTgjwRkmJqjE1FEQp7BTu1Jz4-bS1bKyeJgvSxfg,7305
31
+ phoenix/db/insertion/dataset.py,sha256=_vxy5e6W5jEuvO2fMKbbNCn9JvHkwI4LRKk_10eKFVg,7171
32
32
  phoenix/db/insertion/evaluation.py,sha256=fAerUy3QGf2wID_tiVmPvzxBDFGiONPl3pmpZDgJDWQ,7183
33
33
  phoenix/db/insertion/helpers.py,sha256=7tf6qQyJ05nn3IXaZEpj2b4Jz5boGLWT8tzlMaJ9tQY,2337
34
34
  phoenix/db/insertion/span.py,sha256=DNBjSrx5g2W5KuTB1dkHwtkb0SFnMIxN1jB-BAdGKFY,5634
@@ -36,7 +36,7 @@ phoenix/db/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
36
36
  phoenix/db/migrations/env.py,sha256=QbzB5zrRs6XQQmrYeUpuzeilcMlM-MsbaAgHHYcIHTI,3626
37
37
  phoenix/db/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
38
38
  phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgCE,605
39
- phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=RapdD9Sud_Gq45Vpz7VnDQB_toG6B6yHlwS93qAh_0c,8133
39
+ phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
40
40
  phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
41
41
  phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
@@ -58,18 +58,18 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
58
58
  phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
59
59
  phoenix/pointcloud/umap_parameters.py,sha256=lJsEOrbSuSiqI7g4Yt6xj7kgYxEqoep4ZHWLr6VWBqw,1760
60
60
  phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- phoenix/server/app.py,sha256=_D2DgupKJHv8DmS6VgWxvygumSM75qdlDg6qSj61PRU,18227
61
+ phoenix/server/app.py,sha256=LQrHWt5HG_pWqnR9Ozb3-vnAGiiRGuZ3uV_9-886Yxw,18340
62
62
  phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
63
63
  phoenix/server/main.py,sha256=mtzH_2Kyvuy3AHiiKfqiCdUQ6SGFzeT4q9fefbV6GLg,11114
64
64
  phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
65
65
  phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
66
66
  phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
67
67
  phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- phoenix/server/api/context.py,sha256=GfAD9QHg5erKwYGpqDj_8bL2GwmccARDZQc8yO-4Fm0,2669
68
+ phoenix/server/api/context.py,sha256=jb69SVdb5hpVbfM4U0pZi4sGa2a-0VKOJWcBjjS7l4s,2761
69
69
  phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
70
70
  phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
71
71
  phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
72
- phoenix/server/api/dataloaders/__init__.py,sha256=9fFjDNlCtOYTOKJi0uPIRh7xJMpCnrBOhoWGEdv1BrI,4618
72
+ phoenix/server/api/dataloaders/__init__.py,sha256=urbG3M-k2cpj2ymMLYQ28tzIXAG1edECxM-tJ22ylqE,4720
73
73
  phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
74
74
  phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
75
75
  phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
@@ -78,6 +78,7 @@ phoenix/server/api/dataloaders/document_retrieval_metrics.py,sha256=8tZYMNLZ7zxU
78
78
  phoenix/server/api/dataloaders/evaluation_summaries.py,sha256=z9aal3IQL_t30aNqpAS7x4tjq0xNkuEG8dWW-bhqZmo,5724
79
79
  phoenix/server/api/dataloaders/experiment_annotation_summaries.py,sha256=RsQ-o84kWVTYgIlh9VKkyw2kDMWIlHCRpS7RE2aw9vs,2881
80
80
  phoenix/server/api/dataloaders/experiment_error_rates.py,sha256=EHlTdZi8F94vo-qJUcnnXFvuSh_d0fTT0Xg4SfW_A70,1397
81
+ phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=wxHv08aZELJ91KTjHdt_x33M3wGDDa9GfbFHeRyOyGk,1343
81
82
  phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
82
83
  phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
83
84
  phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
@@ -92,9 +93,10 @@ phoenix/server/api/dataloaders/trace_row_ids.py,sha256=yAWuVFWUjDdmmwfXsGs_l6LuG
92
93
  phoenix/server/api/dataloaders/cache/__init__.py,sha256=SYoOM9n8FJaMdQarma5d1blu-jIg2GB8Shqg5ezSzZ8,106
93
94
  phoenix/server/api/dataloaders/cache/two_tier_cache.py,sha256=I38L1RsOis98OQftE7n1Q9QBZfFJO6OW_qIINkuJllo,2295
94
95
  phoenix/server/api/helpers/__init__.py,sha256=_V1eVkchZmTkhOfRC4QqR1sUB2xtIxdsMJkDouZq_IE,251
95
- phoenix/server/api/helpers/dataset_helpers.py,sha256=kIo_kPrV8O40CUypB57JCB5Ek3GJmZXPlz6NIULIsSM,6875
96
+ phoenix/server/api/helpers/dataset_helpers.py,sha256=A6UzEyAb4gFtyc_AV63_yl9OpN0vn8Vw1BBCTNjg9J0,6875
96
97
  phoenix/server/api/input_types/AddExamplesToDatasetInput.py,sha256=ZGXMV0H3DYHi4DdqGhejDzaWdFinyem1Mc8DVA7iCh0,436
97
98
  phoenix/server/api/input_types/AddSpansToDatasetInput.py,sha256=C4oZ0WqYqca1kleNOCMIM2_aY6Qnc5n1xXG51_C1V0w,368
99
+ phoenix/server/api/input_types/ClearProjectInput.py,sha256=cpPFRyQ3ffy2dLbCZgYpway-mCzhdm4QqnUg8caOBfQ,382
98
100
  phoenix/server/api/input_types/ClusterInput.py,sha256=EL4ftvZxQ8mVdruUPcdhMhByORmSmM8S-X6RPqU6GX0,179
99
101
  phoenix/server/api/input_types/Coordinates.py,sha256=meTwbIjwTfqx5DGD2DBlH9wQzdQVNM5a8x9dp1FfIgA,173
100
102
  phoenix/server/api/input_types/CreateDatasetInput.py,sha256=Q3MwouIx9jTQBRWDju75iMQXEGJCrL4aD4ESQp771nc,248
@@ -119,7 +121,7 @@ phoenix/server/api/mutations/auth.py,sha256=vPRFoj7J6PV6QeODewG4K0PhoOebS5AfMRpb
119
121
  phoenix/server/api/mutations/dataset_mutations.py,sha256=Zp2sFWyGyubILUQboR6bafRWafsfeRO2ffUWnkLlfgI,22532
120
122
  phoenix/server/api/mutations/experiment_mutations.py,sha256=Fw_yEdITGJ6A33M5JZ-2YnBTDoBqZUUFON6vy8JoVjE,2569
121
123
  phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
122
- phoenix/server/api/mutations/project_mutations.py,sha256=6A7BS3651iaeAwUszKXQB3NK4QJY_tGpALBMNw1bqp8,2021
124
+ phoenix/server/api/mutations/project_mutations.py,sha256=3SVDCZqxB0Iv60cOwBL8c-rY3QUUPs8PXbp-C_K1mWY,2267
123
125
  phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
126
  phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjGwLXfw,153
125
127
  phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
@@ -127,10 +129,10 @@ phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
127
129
  phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
128
130
  phoenix/server/api/routers/v1/__init__.py,sha256=B5eSaylPI7MoYia1-VgKrU8rDi-69r_hRwPU5yMLUTE,2808
129
131
  phoenix/server/api/routers/v1/dataset_examples.py,sha256=wtplRUv2ee9xGTrcEMgTn-7L4NX_73IcwUXkCMZEFc4,6726
130
- phoenix/server/api/routers/v1/datasets.py,sha256=2wkBOLqo8ttSN1VNVEcnPcLCitkSLGp62AjWlxJhV4Y,27605
132
+ phoenix/server/api/routers/v1/datasets.py,sha256=ws2Guou9mspwFx3-cBFZoD2VuTwWGoFZmtt2Sr3zg6k,31516
131
133
  phoenix/server/api/routers/v1/evaluations.py,sha256=rwSVg-rpujhsMcDVFt-VAr0Ix9TgvLcY_bSxeh8PzJI,9241
132
- phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xemnZ10WL5pErndP7jbaDipUj3Tkl813XSIjx7X5MBY,2656
133
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=o6IvcyFDY-cy3KqeO9FIKy3XAgbIJhx7SFUoxML-MeY,4337
134
+ phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xhrkPUc_4ncIBm24aUyzu47UU0CN1tGlbisn-oLqt_Y,2702
135
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=0AUNHA5nvpGDeoJUGK8VxP2TFN3iPwhMW3D9QmHstPk,4399
134
136
  phoenix/server/api/routers/v1/experiments.py,sha256=5Rh7q6sHswmk11PZSJ7KMrtqfIE16X_xSKkKSASK9-I,7251
135
137
  phoenix/server/api/routers/v1/spans.py,sha256=FEnmlRPBPl71BSGNBuPrz14fk8nmxJQYsKECdDbdUdw,3977
136
138
  phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
@@ -157,11 +159,11 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
157
159
  phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
158
160
  phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
159
161
  phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
160
- phoenix/server/api/types/Experiment.py,sha256=Lon2ZNZYdWXQmj3nLr_TXN8CCtZtC-AXYfyJuoqI2DM,4692
162
+ phoenix/server/api/types/Experiment.py,sha256=Cs0EKhVLI5l5LKFI0hQA-ekZuaiJcOHT88JGFBa2deU,4906
161
163
  phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
162
164
  phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
163
- phoenix/server/api/types/ExperimentRun.py,sha256=uM7HxaC8nEjtO7yLr8WjLEfYRvEvbX6ibR8I0fVzdeU,2976
164
- phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=GvWY6wukBhSr2Tk9Ef0R5bH5yCMxVakqeypoyYoUb6o,1774
165
+ phoenix/server/api/types/ExperimentRun.py,sha256=8jUIi3ApVCqQHwnYe59CYhrmh5iZ6-QmlH5WpF7UWtM,2990
166
+ phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=zGstMbS5OxNikEhD8VouY7Ls7YbxKm-0EmqvGeY3-DI,1773
165
167
  phoenix/server/api/types/ExportedFile.py,sha256=e3GTn7B5LgsTbqiwjhMCQH7VsiqXitrBO4aCMS1lHsg,163
166
168
  phoenix/server/api/types/Functionality.py,sha256=tzV9xdhB8zqfsjWxP66NDC7EZsplYkYO7jRbLWJIeeg,382
167
169
  phoenix/server/api/types/Inferences.py,sha256=HWuDZZrXPWVoEy_pA3bRsAOUYsCKgAxf9zshasGqu5Y,3403
@@ -197,12 +199,12 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
197
199
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
198
200
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
199
201
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
200
- phoenix/server/static/index.js,sha256=n8qF_l7ijW-7E8m63oViD8SpXOYjN3wvZUhgB8H6ZLo,3489949
202
+ phoenix/server/static/index.js,sha256=I9Y8svcPruUrXklKcZUxFz5HfLB0vOwczYLSwLAs_04,3500011
201
203
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
202
204
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
205
  phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
204
206
  phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
- phoenix/session/client.py,sha256=tq2qghwYa_mxNYLD41UNfD7n57msB9lYIe5H16lBqoo,20333
207
+ phoenix/session/client.py,sha256=R7dV38yjkIQa522nhG6jhDllWcXft2JJ7RlcPYpqiiQ,24846
206
208
  phoenix/session/data_extractor.py,sha256=dwhiDu-ISaXr8UI9I-CszZhB5BlUNmdDopjFZvMIXMw,2101
207
209
  phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
208
210
  phoenix/session/session.py,sha256=rjIuSSK2gAYIUPQTJc4E2ebew5o6I070FWRoFn4W3EI,26620
@@ -242,8 +244,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
242
244
  phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
243
245
  phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
244
246
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
- arize_phoenix-4.4.4rc4.dist-info/METADATA,sha256=YEUoxXSRba4zRgzM8-lcq7TIp9GNPZSjY_QGoyIJN-w,11012
246
- arize_phoenix-4.4.4rc4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
247
- arize_phoenix-4.4.4rc4.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
248
- arize_phoenix-4.4.4rc4.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
249
- arize_phoenix-4.4.4rc4.dist-info/RECORD,,
247
+ arize_phoenix-4.4.4rc5.dist-info/METADATA,sha256=yT0gbMlPkiRkZeC8Yj_eLyaufriREVn3jxz5-qTKDjI,11012
248
+ arize_phoenix-4.4.4rc5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
249
+ arize_phoenix-4.4.4rc5.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
250
+ arize_phoenix-4.4.4rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
251
+ arize_phoenix-4.4.4rc5.dist-info/RECORD,,
@@ -2,19 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import re
5
- from typing import TYPE_CHECKING, List, Optional, Union
5
+ from typing import Any, List, Optional, Union
6
6
 
7
- from phoenix.datasets.evaluators._utils import _unwrap_json
8
- from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
7
+ from phoenix.datasets.evaluators.utils import Evaluator
8
+ from phoenix.datasets.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
- class JSONParsable:
12
- annotator_kind = "CODE"
13
- name = "JSONParsable"
14
-
15
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
16
- assert exp_run.output is not None
17
- output = _unwrap_json(exp_run.output.result)
11
+ class JSONParsable(Evaluator):
12
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
18
13
  assert isinstance(output, str), "Experiment run output must be a string"
19
14
  try:
20
15
  json.loads(output)
@@ -26,18 +21,14 @@ class JSONParsable:
26
21
  )
27
22
 
28
23
 
29
- class ContainsKeyword:
30
- annotator_kind = "CODE"
31
-
24
+ class ContainsKeyword(Evaluator):
32
25
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
33
26
  self.keyword = keyword
34
- self.name = name or f"Contains({repr(keyword)})"
27
+ self._name = name or f"Contains({repr(keyword)})"
35
28
 
36
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
37
- assert exp_run.output is not None
38
- result = _unwrap_json(exp_run.output.result)
39
- assert isinstance(result, str), "Experiment run output must be a string"
40
- found = self.keyword in result
29
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
30
+ assert isinstance(output, str), "Experiment run output must be a string"
31
+ found = self.keyword in output
41
32
  return EvaluationResult(
42
33
  score=float(found),
43
34
  explanation=(
@@ -47,18 +38,14 @@ class ContainsKeyword:
47
38
  )
48
39
 
49
40
 
50
- class ContainsAnyKeyword:
51
- annotator_kind = "CODE"
52
-
41
+ class ContainsAnyKeyword(Evaluator):
53
42
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
54
43
  self.keywords = keywords
55
- self.name = name or f"ContainsAny({keywords})"
44
+ self._name = name or f"ContainsAny({keywords})"
56
45
 
57
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
58
- assert exp_run.output is not None
59
- result = _unwrap_json(exp_run.output.result)
60
- assert isinstance(result, str), "Experiment run output must be a string"
61
- found = [keyword for keyword in self.keywords if keyword in result]
46
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
47
+ assert isinstance(output, str), "Experiment run output must be a string"
48
+ found = [keyword for keyword in self.keywords if keyword in output]
62
49
  if found:
63
50
  explanation = f"the keywords {found} were found in the output"
64
51
  else:
@@ -69,18 +56,14 @@ class ContainsAnyKeyword:
69
56
  )
70
57
 
71
58
 
72
- class ContainsAllKeywords:
73
- annotator_kind = "CODE"
74
-
59
+ class ContainsAllKeywords(Evaluator):
75
60
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
76
61
  self.keywords = keywords
77
- self.name = name or f"ContainsAll({keywords})"
62
+ self._name = name or f"ContainsAll({keywords})"
78
63
 
79
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
80
- assert exp_run.output is not None
81
- result = _unwrap_json(exp_run.output.result)
82
- assert isinstance(result, str), "Experiment run output must be a string"
83
- not_found = [keyword for keyword in self.keywords if keyword not in result]
64
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
65
+ assert isinstance(output, str), "Experiment run output must be a string"
66
+ not_found = [keyword for keyword in self.keywords if keyword not in output]
84
67
  if not_found:
85
68
  contains_all = False
86
69
  explanation = f"the keywords {not_found} were not found in the output"
@@ -93,21 +76,17 @@ class ContainsAllKeywords:
93
76
  )
94
77
 
95
78
 
96
- class MatchesRegex:
97
- annotator_kind = "CODE"
98
-
79
+ class MatchesRegex(Evaluator):
99
80
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
100
81
  if isinstance(pattern, str):
101
82
  pattern = re.compile(pattern)
102
83
  self.pattern = pattern
103
84
  assert isinstance(pattern, re.Pattern)
104
- self.name = name or f"matches_({pattern})"
85
+ self._name = name or f"matches_({pattern})"
105
86
 
106
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
107
- assert exp_run.output is not None
108
- result = _unwrap_json(exp_run.output.result)
109
- assert isinstance(result, str), "Experiment run output must be a string"
110
- matches = self.pattern.findall(result)
87
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
88
+ assert isinstance(output, str), "Experiment run output must be a string"
89
+ matches = self.pattern.findall(output)
111
90
  if matches:
112
91
  explanation = (
113
92
  f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
@@ -118,10 +97,3 @@ class MatchesRegex:
118
97
  score=float(bool(matches)),
119
98
  explanation=explanation,
120
99
  )
121
-
122
-
123
- # Someday we'll do typing checking in unit tests.
124
- if TYPE_CHECKING:
125
- _: ExperimentEvaluator
126
- _ = JSONParsable()
127
- _ = ContainsKeyword("test")
@@ -1,14 +1,23 @@
1
1
  import re
2
- from typing import Callable, Optional, Type
3
-
4
- from phoenix.datasets.evaluators._utils import _unwrap_json
5
- from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
2
+ from types import MappingProxyType
3
+ from typing import Any, Callable, Optional, Type
4
+
5
+ from phoenix.datasets.evaluators.utils import (
6
+ ExampleInput,
7
+ ExampleMetadata,
8
+ ExperimentEvaluator,
9
+ LLMEvaluator,
10
+ _unwrap_json,
11
+ )
12
+ from phoenix.datasets.types import (
13
+ EvaluationResult,
14
+ TaskOutput,
15
+ )
6
16
  from phoenix.evals.models.base import BaseModel as LLMBaseModel
7
17
  from phoenix.evals.utils import snap_to_rail
8
18
 
9
19
 
10
- class LLMCriteriaEvaluator:
11
- annotator_kind = "LLM"
20
+ class LLMCriteriaEvaluator(LLMEvaluator):
12
21
  _base_template = (
13
22
  "Determine if the following text is {criteria}. {description}"
14
23
  "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -37,21 +46,23 @@ class LLMCriteriaEvaluator:
37
46
  self.criteria = criteria
38
47
  self.description = description
39
48
  self.template = self._format_base_template(self.criteria, self.description)
40
- self.name = name
49
+ self._name = name
41
50
 
42
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
43
- formatted_template = self._format_eval_template(exp_run)
51
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
52
+ formatted_template = self._format_eval_template(output)
44
53
  unparsed_response = self.model._generate(formatted_template)
45
54
  return self._parse_eval_output(unparsed_response)
46
55
 
47
- async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
48
- formatted_template = self._format_eval_template(exp_run)
56
+ async def async_evaluate(
57
+ self, *, output: Optional[TaskOutput] = None, **_: Any
58
+ ) -> EvaluationResult:
59
+ formatted_template = self._format_eval_template(output)
49
60
  unparsed_response = await self.model._async_generate(formatted_template)
50
61
  return self._parse_eval_output(unparsed_response)
51
62
 
52
- def _format_eval_template(self, experiment_run: ExperimentRun) -> str:
53
- assert experiment_run.output is not None
54
- result = _unwrap_json(experiment_run.output.result)
63
+ def _format_eval_template(self, output: TaskOutput) -> str:
64
+ assert output is not None
65
+ result = _unwrap_json(output)
55
66
  return self.template.format(text=str(result))
56
67
 
57
68
  def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -137,8 +148,7 @@ def _parse_label_from_explanation(raw_string: str) -> str:
137
148
  return raw_string
138
149
 
139
150
 
140
- class RelevanceEvaluator:
141
- annotator_kind = "LLM"
151
+ class RelevanceEvaluator(LLMEvaluator):
142
152
  template = (
143
153
  "Determine if the following response is relevant to the query. In this context, "
144
154
  "'relevance' means that the response directly addresses the core question or topic of the "
@@ -162,19 +172,24 @@ class RelevanceEvaluator:
162
172
  def __init__(
163
173
  self,
164
174
  model: LLMBaseModel,
165
- get_query: Optional[Callable[[Example, ExperimentRun], str]] = None,
166
- get_response: Optional[Callable[[Example, ExperimentRun], str]] = None,
175
+ get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
176
+ get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
167
177
  name: str = "RelevanceEvaluator",
168
178
  ):
169
179
  self.model = model
170
- self.name = name
180
+ self._name = name
171
181
  self.get_query = get_query or self._default_get_query
172
182
  self.get_response = get_response or self._default_get_response
173
183
 
174
- def _format_eval_template(self, example: Example, experiment_run: ExperimentRun) -> str:
175
- assert experiment_run.output is not None
176
- query = self.get_query(example, experiment_run)
177
- response = self.get_response(example, experiment_run)
184
+ def _format_eval_template(
185
+ self,
186
+ output: Optional[TaskOutput] = None,
187
+ input: ExampleInput = MappingProxyType({}),
188
+ metadata: ExampleMetadata = MappingProxyType({}),
189
+ ) -> str:
190
+ assert output is not None
191
+ query = self.get_query(input, metadata)
192
+ response = self.get_response(output, metadata)
178
193
  return self.template.format(query=query, response=response)
179
194
 
180
195
  def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -195,19 +210,35 @@ class RelevanceEvaluator:
195
210
  metadata={},
196
211
  )
197
212
 
198
- def _default_get_query(self, example: Example, experiment_run: ExperimentRun) -> str:
199
- return str(example.input)
213
+ def _default_get_query(self, input: ExampleInput, *args: Any, **kwargs: Any) -> str:
214
+ return str(input)
200
215
 
201
- def _default_get_response(self, example: Example, experiment_run: ExperimentRun) -> str:
202
- assert experiment_run.output is not None
203
- return str(_unwrap_json(experiment_run.output.result))
216
+ def _default_get_response(
217
+ self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
218
+ ) -> str:
219
+ assert output is not None
220
+ return str(_unwrap_json(output))
204
221
 
205
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
206
- formatted_template = self._format_eval_template(example, exp_run)
222
+ def evaluate(
223
+ self,
224
+ *,
225
+ output: Optional[TaskOutput] = None,
226
+ metadata: ExampleMetadata = MappingProxyType({}),
227
+ input: ExampleInput = MappingProxyType({}),
228
+ **_: Any,
229
+ ) -> EvaluationResult:
230
+ formatted_template = self._format_eval_template(output, input, metadata)
207
231
  unparsed_response = self.model._generate(formatted_template)
208
232
  return self._parse_eval_output(unparsed_response)
209
233
 
210
- async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
211
- formatted_template = self._format_eval_template(example, exp_run)
234
+ async def async_evaluate(
235
+ self,
236
+ *,
237
+ output: Optional[TaskOutput] = None,
238
+ metadata: ExampleMetadata = MappingProxyType({}),
239
+ input: ExampleInput = MappingProxyType({}),
240
+ **_: Any,
241
+ ) -> EvaluationResult:
242
+ formatted_template = self._format_eval_template(output, input, metadata)
212
243
  unparsed_response = await self.model._async_generate(formatted_template)
213
244
  return self._parse_eval_output(unparsed_response)