arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (42) hide show
  1. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +11 -5
  2. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +39 -36
  3. phoenix/config.py +21 -0
  4. phoenix/datetime_utils.py +4 -0
  5. phoenix/db/insertion/evaluation.py +4 -4
  6. phoenix/db/insertion/helpers.py +4 -12
  7. phoenix/db/insertion/span.py +3 -3
  8. phoenix/db/models.py +1 -1
  9. phoenix/experiments/__init__.py +6 -0
  10. phoenix/experiments/evaluators/__init__.py +29 -0
  11. phoenix/experiments/evaluators/base.py +153 -0
  12. phoenix/{datasets → experiments}/evaluators/code_evaluators.py +7 -7
  13. phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +9 -9
  14. phoenix/{datasets → experiments}/evaluators/utils.py +38 -141
  15. phoenix/{datasets/experiments.py → experiments/functions.py} +248 -182
  16. phoenix/experiments/types.py +722 -0
  17. phoenix/experiments/utils.py +9 -0
  18. phoenix/server/api/context.py +2 -0
  19. phoenix/server/api/dataloaders/__init__.py +2 -0
  20. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  21. phoenix/server/api/routers/v1/__init__.py +1 -1
  22. phoenix/server/api/routers/v1/dataset_examples.py +10 -10
  23. phoenix/server/api/routers/v1/datasets.py +6 -6
  24. phoenix/server/api/routers/v1/evaluations.py +4 -11
  25. phoenix/server/api/routers/v1/experiment_evaluations.py +22 -23
  26. phoenix/server/api/routers/v1/experiment_runs.py +4 -16
  27. phoenix/server/api/routers/v1/experiments.py +5 -5
  28. phoenix/server/api/routers/v1/spans.py +6 -4
  29. phoenix/server/api/types/Experiment.py +7 -0
  30. phoenix/server/app.py +2 -0
  31. phoenix/server/static/index.js +648 -570
  32. phoenix/session/client.py +256 -85
  33. phoenix/trace/fixtures.py +6 -6
  34. phoenix/utilities/json.py +8 -8
  35. phoenix/version.py +1 -1
  36. phoenix/datasets/__init__.py +0 -0
  37. phoenix/datasets/evaluators/__init__.py +0 -18
  38. phoenix/datasets/types.py +0 -178
  39. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
  40. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
  41. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
  42. /phoenix/{datasets → experiments}/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.4.4rc5
3
+ Version: 4.4.4rc6
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -20,7 +20,7 @@ Requires-Python: <3.13,>=3.8
20
20
  Requires-Dist: aioitertools
21
21
  Requires-Dist: aiosqlite
22
22
  Requires-Dist: alembic<2,>=1.3.0
23
- Requires-Dist: arize-phoenix-evals>=0.3.0
23
+ Requires-Dist: arize-phoenix-evals>=0.13.1
24
24
  Requires-Dist: cachetools
25
25
  Requires-Dist: grpcio
26
26
  Requires-Dist: hdbscan>=0.8.33
@@ -36,7 +36,7 @@ Requires-Dist: opentelemetry-exporter-otlp
36
36
  Requires-Dist: opentelemetry-proto>=1.12.0
37
37
  Requires-Dist: opentelemetry-sdk
38
38
  Requires-Dist: opentelemetry-semantic-conventions
39
- Requires-Dist: pandas
39
+ Requires-Dist: pandas>=1.0
40
40
  Requires-Dist: protobuf<6.0,>=3.20
41
41
  Requires-Dist: psutil
42
42
  Requires-Dist: pyarrow
@@ -79,6 +79,7 @@ Requires-Dist: llama-index>=0.10.3; extra == 'dev'
79
79
  Requires-Dist: nbqa; extra == 'dev'
80
80
  Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
81
81
  Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
82
+ Requires-Dist: pandas>=1.0; extra == 'dev'
82
83
  Requires-Dist: pre-commit; extra == 'dev'
83
84
  Requires-Dist: prometheus-client; extra == 'dev'
84
85
  Requires-Dist: psycopg[binary]; extra == 'dev'
@@ -88,10 +89,15 @@ Requires-Dist: pytest-postgresql; extra == 'dev'
88
89
  Requires-Dist: pytest==8.2.2; extra == 'dev'
89
90
  Requires-Dist: ruff==0.4.9; extra == 'dev'
90
91
  Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
92
+ Requires-Dist: tabulate; extra == 'dev'
93
+ Requires-Dist: types-tabulate; extra == 'dev'
91
94
  Provides-Extra: evals
92
95
  Provides-Extra: experimental
93
96
  Provides-Extra: llama-index
94
- Requires-Dist: llama-index==0.10.44; extra == 'llama-index'
97
+ Requires-Dist: llama-index-embeddings-openai; extra == 'llama-index'
98
+ Requires-Dist: llama-index-llms-openai; extra == 'llama-index'
99
+ Requires-Dist: llama-index-readers-file; extra == 'llama-index'
100
+ Requires-Dist: llama-index==0.10.51; extra == 'llama-index'
95
101
  Provides-Extra: pg
96
102
  Requires-Dist: asyncpg; extra == 'pg'
97
103
  Description-Content-Type: text/markdown
@@ -127,7 +133,7 @@ Description-Content-Type: text/markdown
127
133
 
128
134
  Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting. It provides:
129
135
 
130
- - **_Tracing_** - Trace your LLM application's runtime using using OpenTelemetry-based instrumentation.
136
+ - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
131
137
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
132
138
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
133
139
 
@@ -1,24 +1,16 @@
1
1
  phoenix/__init__.py,sha256=JMXBf8J0tAa5ycWDIn8QWYgQu6H2vL77RC4lful-YH8,1542
2
- phoenix/config.py,sha256=9xkQBn_Z-tsQct-zq0B80N4Xa2k2jfhivdc_qYCA5G8,7618
3
- phoenix/datetime_utils.py,sha256=oqkxJ5I7ggrCKYTEi8q-akC501calylD26NVOPQQcHw,3305
2
+ phoenix/config.py,sha256=eXciIho_PDh4ZSmq4Gtuo7Qz__yTluDP3_WUwig5OiU,8141
3
+ phoenix/datetime_utils.py,sha256=yDKjwX2Vtqw9h5F_ProtP-TsXidM43uIvmJ_pOzYc9A,3405
4
4
  phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
5
5
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
7
7
  phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
8
- phoenix/version.py,sha256=-Vg_bLotyeJdv0gFqG5-A64nsG-6AR0xZSp3sDDsV_w,25
8
+ phoenix/version.py,sha256=rZ0Z9PgUs79kMn4HpCH3vAEVOqqPCzzD7Xz8N5sa7qI,25
9
9
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
11
11
  phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
12
12
  phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
13
13
  phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
14
- phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- phoenix/datasets/experiments.py,sha256=RzZezHQcTpPcr7gY9rGtoYlfoesFNhNV7EO5f_oHNFk,21198
16
- phoenix/datasets/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
17
- phoenix/datasets/types.py,sha256=N17mnnVwmu1k3bnmbyROPt_6TxPaZY_QkOZmCOR5_jE,4835
18
- phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
19
- phoenix/datasets/evaluators/code_evaluators.py,sha256=DdCcAi274t_TLs_aARd-GmWWpJrxVeNEAegMFEAfe0E,3894
20
- phoenix/datasets/evaluators/llm_evaluators.py,sha256=aVfAHOWhskBiy0IVeq_ACTs7B37uXTTtDoNBS0XenIc,9165
21
- phoenix/datasets/evaluators/utils.py,sha256=S7OGrb1sBWg5l9K35X29OKJe5wZ3k7xMhxJBclzxta0,10452
22
14
  phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
23
15
  phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
24
16
  phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
@@ -26,18 +18,28 @@ phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,1
26
18
  phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
27
19
  phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
28
20
  phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
29
- phoenix/db/models.py,sha256=lYzI3tCDUl8njXb3Vf3R8e6y56-MErprjjfBE-o9Kao,20419
21
+ phoenix/db/models.py,sha256=zFtdhVuQFOvquyKsto62aqAVaTRUlq9gxU0j1M1yLdg,20408
30
22
  phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
23
  phoenix/db/insertion/dataset.py,sha256=_vxy5e6W5jEuvO2fMKbbNCn9JvHkwI4LRKk_10eKFVg,7171
32
- phoenix/db/insertion/evaluation.py,sha256=fAerUy3QGf2wID_tiVmPvzxBDFGiONPl3pmpZDgJDWQ,7183
33
- phoenix/db/insertion/helpers.py,sha256=7tf6qQyJ05nn3IXaZEpj2b4Jz5boGLWT8tzlMaJ9tQY,2337
34
- phoenix/db/insertion/span.py,sha256=DNBjSrx5g2W5KuTB1dkHwtkb0SFnMIxN1jB-BAdGKFY,5634
24
+ phoenix/db/insertion/evaluation.py,sha256=HoUncZN9ZlIr1QO0uA37SbWhrjmwQVYVJlgFX2VefY8,7211
25
+ phoenix/db/insertion/helpers.py,sha256=5AZQSyTGAthyaIl_l5jL4yva1IrTTBG9y2G7l1r2Yyk,1937
26
+ phoenix/db/insertion/span.py,sha256=d85O3R_Cc3aVFDJSgLLX66qNCPNbKtDxug_dSKFDfew,5655
35
27
  phoenix/db/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
28
  phoenix/db/migrations/env.py,sha256=QbzB5zrRs6XQQmrYeUpuzeilcMlM-MsbaAgHHYcIHTI,3626
37
29
  phoenix/db/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
38
30
  phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgCE,605
39
31
  phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
40
32
  phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
33
+ phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
34
+ phoenix/experiments/functions.py,sha256=w0A6BK80avoupxd3sPJZ_btftV1pXrkbZj4omR_H214,24723
35
+ phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
36
+ phoenix/experiments/types.py,sha256=tj7DxfsU_nQP5bNe_h6p4KvRjkXKaaB3FeaIerAi_iA,22790
37
+ phoenix/experiments/utils.py,sha256=ZZajvIrZTURhOX5Nx4nyogJEbI18sKCHYiYwOxz2vYU,340
38
+ phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
39
+ phoenix/experiments/evaluators/base.py,sha256=uhO4R06YWBbTxdpvXLldANnTxTA5r2h_Ktj-ZMLH57c,5305
40
+ phoenix/experiments/evaluators/code_evaluators.py,sha256=0qIKQS14Knze50ziJEPVEnNeV3QIs4g1IXtCmaWZu7o,3923
41
+ phoenix/experiments/evaluators/llm_evaluators.py,sha256=EFce6LKZwUZDBa5ZozvcdqeZpdWM6n6bmq7_oIzM2Nw,9211
42
+ phoenix/experiments/evaluators/utils.py,sha256=o84UTWN7fzjCGZDTS-KpGZ2VBrk2iSuO3X2LoC7pr3Y,6966
41
43
  phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
44
  phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
43
45
  phoenix/inferences/fixtures.py,sha256=FC2eRL4dpobKQHYOilFtDexUWFkMZ_w6jun_4WkbMk0,20792
@@ -58,18 +60,19 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
58
60
  phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
59
61
  phoenix/pointcloud/umap_parameters.py,sha256=lJsEOrbSuSiqI7g4Yt6xj7kgYxEqoep4ZHWLr6VWBqw,1760
60
62
  phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- phoenix/server/app.py,sha256=LQrHWt5HG_pWqnR9Ozb3-vnAGiiRGuZ3uV_9-886Yxw,18340
63
+ phoenix/server/app.py,sha256=Ld9NvW7sBT4aJn8CXAB_PIw4c5wlkxCcjT_hzjtn7dM,18478
62
64
  phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
63
65
  phoenix/server/main.py,sha256=mtzH_2Kyvuy3AHiiKfqiCdUQ6SGFzeT4q9fefbV6GLg,11114
64
66
  phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
65
67
  phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
66
68
  phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
67
69
  phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- phoenix/server/api/context.py,sha256=jb69SVdb5hpVbfM4U0pZi4sGa2a-0VKOJWcBjjS7l4s,2761
70
+ phoenix/server/api/context.py,sha256=4jcy203Gtx38399FP21iU3HmFsq-50EKFJlX4IW2Los,2878
69
71
  phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
70
72
  phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
71
73
  phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
72
- phoenix/server/api/dataloaders/__init__.py,sha256=urbG3M-k2cpj2ymMLYQ28tzIXAG1edECxM-tJ22ylqE,4720
74
+ phoenix/server/api/dataloaders/__init__.py,sha256=qehXL37vGdw7v5PFs3kbZVIuhuzrVNVeZACDQjYpwyo,4847
75
+ phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=RiO0AKC6Y5byafsV0zTJEIOt8Nudjte73f1T78cBe1k,1817
73
76
  phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
74
77
  phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
75
78
  phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
@@ -127,14 +130,14 @@ phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjG
127
130
  phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
128
131
  phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
132
  phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
130
- phoenix/server/api/routers/v1/__init__.py,sha256=B5eSaylPI7MoYia1-VgKrU8rDi-69r_hRwPU5yMLUTE,2808
131
- phoenix/server/api/routers/v1/dataset_examples.py,sha256=wtplRUv2ee9xGTrcEMgTn-7L4NX_73IcwUXkCMZEFc4,6726
132
- phoenix/server/api/routers/v1/datasets.py,sha256=ws2Guou9mspwFx3-cBFZoD2VuTwWGoFZmtt2Sr3zg6k,31516
133
- phoenix/server/api/routers/v1/evaluations.py,sha256=rwSVg-rpujhsMcDVFt-VAr0Ix9TgvLcY_bSxeh8PzJI,9241
134
- phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xhrkPUc_4ncIBm24aUyzu47UU0CN1tGlbisn-oLqt_Y,2702
135
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=0AUNHA5nvpGDeoJUGK8VxP2TFN3iPwhMW3D9QmHstPk,4399
136
- phoenix/server/api/routers/v1/experiments.py,sha256=5Rh7q6sHswmk11PZSJ7KMrtqfIE16X_xSKkKSASK9-I,7251
137
- phoenix/server/api/routers/v1/spans.py,sha256=FEnmlRPBPl71BSGNBuPrz14fk8nmxJQYsKECdDbdUdw,3977
133
+ phoenix/server/api/routers/v1/__init__.py,sha256=vvdpUa2LJPWEg8HbvDm_ANkBAwubPIFPbbHi7elOUws,2808
134
+ phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycwwGt3LnSyyGdMLKC5VKoGQ,6690
135
+ phoenix/server/api/routers/v1/datasets.py,sha256=f2gLG-geu-_wtEw4mKSzNWK2cFb5TYOyRL3tQ7Fl7Es,31544
136
+ phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
137
+ phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=HeyV3PXS1BxQpzNOUBpQlX_0JH_jbjZjTxrqy2ujwJQ,2746
138
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=_c7qmPIja_gpvoVaf_t7KtNc9Zz-0m9da9MS-EcbPBo,3918
139
+ phoenix/server/api/routers/v1/experiments.py,sha256=ntb0lRV2h90mFepWiZfQ1MIAJhOaK9tkWzTejmpwed0,7243
140
+ phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
138
141
  phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
139
142
  phoenix/server/api/types/AnnotatorKind.py,sha256=UmYb2KG0JfxdX0mW1qrXrUoIgjMOncRJr1i8mJki1sE,141
140
143
  phoenix/server/api/types/Cluster.py,sha256=ac4YfT1OH3xLVmex7EUmB6b9IpULnhLTt554LR0jglE,5689
@@ -159,7 +162,7 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
159
162
  phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
160
163
  phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
161
164
  phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
162
- phoenix/server/api/types/Experiment.py,sha256=Cs0EKhVLI5l5LKFI0hQA-ekZuaiJcOHT88JGFBa2deU,4906
165
+ phoenix/server/api/types/Experiment.py,sha256=ELYdYFKwgBllxx3cZ_X0XicHjLtshZl0bFqqJdVGXRQ,5177
163
166
  phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
164
167
  phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
165
168
  phoenix/server/api/types/ExperimentRun.py,sha256=8jUIi3ApVCqQHwnYe59CYhrmh5iZ6-QmlH5WpF7UWtM,2990
@@ -199,12 +202,12 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
199
202
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
200
203
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
201
204
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
202
- phoenix/server/static/index.js,sha256=I9Y8svcPruUrXklKcZUxFz5HfLB0vOwczYLSwLAs_04,3500011
205
+ phoenix/server/static/index.js,sha256=qAPO3xGdQ2mIA8TcIPNkIfLvoGeZ78fVKbGZevfJzoM,3518643
203
206
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
204
207
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
208
  phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
206
209
  phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
207
- phoenix/session/client.py,sha256=R7dV38yjkIQa522nhG6jhDllWcXft2JJ7RlcPYpqiiQ,24846
210
+ phoenix/session/client.py,sha256=5mnWVqMFbC8NYbX4m2oRla1VvlmrgabD1oT2UdwDRJ8,33201
208
211
  phoenix/session/data_extractor.py,sha256=dwhiDu-ISaXr8UI9I-CszZhB5BlUNmdDopjFZvMIXMw,2101
209
212
  phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
210
213
  phoenix/session/session.py,sha256=rjIuSSK2gAYIUPQTJc4E2ebew5o6I070FWRoFn4W3EI,26620
@@ -213,7 +216,7 @@ phoenix/trace/attributes.py,sha256=xSr2EvlkDS9Wiij7FywXzw7UmMAMLIARSEm_P686BKM,1
213
216
  phoenix/trace/errors.py,sha256=wB1z8qdPckngdfU-TORToekvg3344oNFAA83_hC2yFY,180
214
217
  phoenix/trace/evaluation_conventions.py,sha256=t8jydM3U0-T5YpiQKRJ3tWdWGlHtzKyttYdw-ddvPOk,1048
215
218
  phoenix/trace/exporter.py,sha256=eAYemdvDCHMugDJiaR29BFFMTQBdf3oerdkz34Cl3hE,4736
216
- phoenix/trace/fixtures.py,sha256=gBGFG2gkcBsSDzolzzR9AJDrB_fdOQfUaGgHV-EHdco,14204
219
+ phoenix/trace/fixtures.py,sha256=tDso17oAYMBQ8GJsAlHFzrC0otXPzUOEh4TgIjEfB7M,14260
217
220
  phoenix/trace/otel.py,sha256=WA720jvRadiZBAKjsYoPyXzypHwbyEK2OZRVUwtbjB8,9976
218
221
  phoenix/trace/projects.py,sha256=2BwlNjFE-uwpqYtCu5YyBiYZk9wRPpM13vh3-Cv7GkA,2157
219
222
  phoenix/trace/schemas.py,sha256=Mjc6fD9OyeMnEk5wPPSbveqnNUYWK3p3BxpOvSGanHU,5950
@@ -239,13 +242,13 @@ phoenix/trace/v1/evaluation_pb2.pyi,sha256=cCbbx06gwQmaH14s3J1X25TtaARh-k1abbxQd
239
242
  phoenix/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
240
243
  phoenix/utilities/deprecation.py,sha256=cFuTVvjSYyRlrdxdJewjJVieIEHPk30BukSRGRydQ3k,1046
241
244
  phoenix/utilities/error_handling.py,sha256=7b5rpGFj9EWZ8yrZK1IHvxB89suWk3lggDayUQcvZds,1946
242
- phoenix/utilities/json.py,sha256=nuD1SbPm871tbt9Cz7V2NUmS9m0jWgcpRUx1YOV3NYQ,1954
245
+ phoenix/utilities/json.py,sha256=y_w-McDfvlTeGJT28sCtyjzVkwFicakxERG-sGRc8Ak,1948
243
246
  phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,222
244
247
  phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
245
248
  phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
246
249
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
247
- arize_phoenix-4.4.4rc5.dist-info/METADATA,sha256=yT0gbMlPkiRkZeC8Yj_eLyaufriREVn3jxz5-qTKDjI,11012
248
- arize_phoenix-4.4.4rc5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
249
- arize_phoenix-4.4.4rc5.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
250
- arize_phoenix-4.4.4rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
251
- arize_phoenix-4.4.4rc5.dist-info/RECORD,,
250
+ arize_phoenix-4.4.4rc6.dist-info/METADATA,sha256=gyc5KyS4aFqefmGcezl1eC_8lCZ5DF0iHdSDh0V41f8,11337
251
+ arize_phoenix-4.4.4rc6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
252
+ arize_phoenix-4.4.4rc6.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
253
+ arize_phoenix-4.4.4rc6.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
254
+ arize_phoenix-4.4.4rc6.dist-info/RECORD,,
phoenix/config.py CHANGED
@@ -233,4 +233,25 @@ def get_env_client_headers() -> Optional[Dict[str, str]]:
233
233
  return None
234
234
 
235
235
 
236
+ def get_base_url() -> str:
237
+ host = get_env_host()
238
+ if host == "0.0.0.0":
239
+ host = "127.0.0.1"
240
+ base_url = get_env_collector_endpoint() or f"http://{host}:{get_env_port()}"
241
+ return base_url if base_url.endswith("/") else base_url + "/"
242
+
243
+
244
+ def get_web_base_url() -> str:
245
+ """Return the web UI base URL.
246
+
247
+ Returns:
248
+ str: the web UI base URL
249
+ """
250
+ from phoenix.session.session import active_session
251
+
252
+ if session := active_session():
253
+ return session.url
254
+ return get_base_url()
255
+
256
+
236
257
  DEFAULT_PROJECT_NAME = "default"
phoenix/datetime_utils.py CHANGED
@@ -14,6 +14,10 @@ from pandas.core.dtypes.common import (
14
14
  _LOCAL_TIMEZONE = datetime.now(timezone.utc).astimezone().tzinfo
15
15
 
16
16
 
17
+ def local_now() -> datetime:
18
+ return datetime.now(timezone.utc).astimezone(tz=_LOCAL_TIMEZONE)
19
+
20
+
17
21
  def normalize_datetime(
18
22
  dt: Optional[datetime],
19
23
  tz: Optional[tzinfo] = None,
@@ -6,7 +6,7 @@ from typing_extensions import assert_never
6
6
 
7
7
  from phoenix.db import models
8
8
  from phoenix.db.helpers import SupportedSQLDialect, num_docs_col
9
- from phoenix.db.insertion.helpers import OnConflict, insert_stmt
9
+ from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
10
10
  from phoenix.exceptions import PhoenixException
11
11
  from phoenix.trace import v1 as pb
12
12
 
@@ -91,7 +91,7 @@ async def _insert_trace_evaluation(
91
91
  set_.pop("metadata_")
92
92
  set_["metadata"] = values["metadata_"] # `metadata` must match database
93
93
  await session.execute(
94
- insert_stmt(
94
+ insert_on_conflict(
95
95
  dialect=dialect,
96
96
  table=models.TraceAnnotation,
97
97
  values=values,
@@ -139,7 +139,7 @@ async def _insert_span_evaluation(
139
139
  set_.pop("metadata_")
140
140
  set_["metadata"] = values["metadata_"] # `metadata` must match database
141
141
  await session.execute(
142
- insert_stmt(
142
+ insert_on_conflict(
143
143
  dialect=dialect,
144
144
  table=models.SpanAnnotation,
145
145
  values=values,
@@ -196,7 +196,7 @@ async def _insert_document_evaluation(
196
196
  set_.pop("metadata_")
197
197
  set_["metadata"] = values["metadata_"] # `metadata` must match database
198
198
  await session.execute(
199
- insert_stmt(
199
+ insert_on_conflict(
200
200
  dialect=dialect,
201
201
  table=models.DocumentAnnotation,
202
202
  values=values,
@@ -2,7 +2,7 @@ from abc import ABC
2
2
  from enum import Enum, auto
3
3
  from typing import Any, Awaitable, Callable, Mapping, Optional, Sequence
4
4
 
5
- from sqlalchemy import Insert, insert
5
+ from sqlalchemy import Insert
6
6
  from sqlalchemy.dialects.postgresql import insert as insert_postgresql
7
7
  from sqlalchemy.dialects.sqlite import insert as insert_sqlite
8
8
  from sqlalchemy.ext.asyncio import AsyncSession
@@ -25,26 +25,18 @@ class OnConflict(Enum):
25
25
  DO_UPDATE = auto()
26
26
 
27
27
 
28
- def insert_stmt(
28
+ def insert_on_conflict(
29
29
  dialect: SupportedSQLDialect,
30
30
  table: Any,
31
31
  values: Mapping[str, Any],
32
- constraint: Optional[str] = None,
33
- column_names: Sequence[str] = (),
32
+ constraint: str,
33
+ column_names: Sequence[str],
34
34
  on_conflict: OnConflict = OnConflict.DO_NOTHING,
35
35
  set_: Optional[Mapping[str, Any]] = None,
36
36
  ) -> Insert:
37
37
  """
38
38
  Dialect specific insertion statement using ON CONFLICT DO syntax.
39
39
  """
40
- if bool(constraint) != bool(column_names):
41
- raise ValueError(
42
- "Both `constraint` and `column_names` must be provided or omitted at the same time."
43
- )
44
- if (dialect is SupportedSQLDialect.POSTGRESQL and constraint is None) or (
45
- dialect is SupportedSQLDialect.SQLITE and not column_names
46
- ):
47
- return insert(table).values(values)
48
40
  if dialect is SupportedSQLDialect.POSTGRESQL:
49
41
  stmt_postgresql = insert_postgresql(table).values(values)
50
42
  if on_conflict is OnConflict.DO_NOTHING or not set_:
@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
7
7
 
8
8
  from phoenix.db import models
9
9
  from phoenix.db.helpers import SupportedSQLDialect
10
- from phoenix.db.insertion.helpers import OnConflict, insert_stmt
10
+ from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
11
11
  from phoenix.trace.attributes import get_attribute_value
12
12
  from phoenix.trace.schemas import Span, SpanStatusCode
13
13
 
@@ -27,7 +27,7 @@ async def insert_span(
27
27
  ) -> Optional[SpanInsertionEvent]:
28
28
  dialect = SupportedSQLDialect(session.bind.dialect.name)
29
29
  project_rowid = await session.scalar(
30
- insert_stmt(
30
+ insert_on_conflict(
31
31
  dialect=dialect,
32
32
  table=models.Project,
33
33
  constraint="uq_projects_name",
@@ -87,7 +87,7 @@ async def insert_span(
87
87
  cumulative_llm_token_count_prompt += cast(int, accumulation[1] or 0)
88
88
  cumulative_llm_token_count_completion += cast(int, accumulation[2] or 0)
89
89
  span_rowid = await session.scalar(
90
- insert_stmt(
90
+ insert_on_conflict(
91
91
  dialect=dialect,
92
92
  table=models.Span,
93
93
  constraint="uq_spans_span_id",
phoenix/db/models.py CHANGED
@@ -92,7 +92,7 @@ class UtcTimeStamp(TypeDecorator[datetime]):
92
92
 
93
93
 
94
94
  class ExperimentResult(TypedDict, total=False):
95
- result: Dict[str, Any]
95
+ result: Any
96
96
 
97
97
 
98
98
  class Base(DeclarativeBase):
@@ -0,0 +1,6 @@
1
+ from .functions import evaluate_experiment, run_experiment
2
+
3
+ __all__ = [
4
+ "evaluate_experiment",
5
+ "run_experiment",
6
+ ]
@@ -0,0 +1,29 @@
1
+ from phoenix.experiments.evaluators.code_evaluators import (
2
+ ContainsAllKeywords,
3
+ ContainsAnyKeyword,
4
+ ContainsKeyword,
5
+ JSONParsable,
6
+ MatchesRegex,
7
+ )
8
+ from phoenix.experiments.evaluators.llm_evaluators import (
9
+ CoherenceEvaluator,
10
+ ConcisenessEvaluator,
11
+ HelpfulnessEvaluator,
12
+ LLMCriteriaEvaluator,
13
+ RelevanceEvaluator,
14
+ )
15
+ from phoenix.experiments.evaluators.utils import create_evaluator
16
+
17
+ __all__ = [
18
+ "create_evaluator",
19
+ "ContainsAllKeywords",
20
+ "ContainsAnyKeyword",
21
+ "ContainsKeyword",
22
+ "JSONParsable",
23
+ "MatchesRegex",
24
+ "CoherenceEvaluator",
25
+ "ConcisenessEvaluator",
26
+ "LLMCriteriaEvaluator",
27
+ "HelpfulnessEvaluator",
28
+ "RelevanceEvaluator",
29
+ ]
@@ -0,0 +1,153 @@
1
+ import functools
2
+ import inspect
3
+ from abc import ABC
4
+ from types import MappingProxyType
5
+ from typing import Any, Awaitable, Callable, Optional, Union
6
+
7
+ from typing_extensions import TypeAlias
8
+
9
+ from phoenix.experiments.evaluators.utils import validate_signature
10
+ from phoenix.experiments.types import (
11
+ AnnotatorKind,
12
+ EvaluationResult,
13
+ EvaluatorKind,
14
+ EvaluatorName,
15
+ EvaluatorOutput,
16
+ ExampleInput,
17
+ ExampleMetadata,
18
+ ExampleOutput,
19
+ TaskOutput,
20
+ )
21
+
22
+
23
+ class Evaluator(ABC):
24
+ """
25
+ A helper super class to guide the implementation of an `Evaluator` object.
26
+ Subclasses must implement either the `evaluate` or `async_evaluate` method.
27
+ Implementing both methods is recommended, but not required.
28
+
29
+ This Class is intended to be subclassed, and should not be instantiated directly.
30
+ """
31
+
32
+ _kind: AnnotatorKind
33
+ _name: EvaluatorName
34
+
35
+ @functools.cached_property
36
+ def name(self) -> EvaluatorName:
37
+ if hasattr(self, "_name"):
38
+ return self._name
39
+ return self.__class__.__name__
40
+
41
+ @functools.cached_property
42
+ def kind(self) -> EvaluatorKind:
43
+ if hasattr(self, "_kind"):
44
+ return self._kind.value
45
+ return AnnotatorKind.CODE.value
46
+
47
+ def __new__(cls, *args: Any, **kwargs: Any) -> "Evaluator":
48
+ if cls is Evaluator:
49
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
50
+ return object.__new__(cls)
51
+
52
+ def evaluate(
53
+ self,
54
+ *,
55
+ output: Optional[TaskOutput] = None,
56
+ expected: Optional[ExampleOutput] = None,
57
+ metadata: ExampleMetadata = MappingProxyType({}),
58
+ input: ExampleInput = MappingProxyType({}),
59
+ **kwargs: Any,
60
+ ) -> EvaluationResult:
61
+ # For subclassing, one should implement either this sync method or the
62
+ # async version. Implementing both is recommended but not required.
63
+ raise NotImplementedError
64
+
65
+ async def async_evaluate(
66
+ self,
67
+ *,
68
+ output: Optional[TaskOutput] = None,
69
+ expected: Optional[ExampleOutput] = None,
70
+ metadata: ExampleMetadata = MappingProxyType({}),
71
+ input: ExampleInput = MappingProxyType({}),
72
+ **kwargs: Any,
73
+ ) -> EvaluationResult:
74
+ # For subclassing, one should implement either this async method or the
75
+ # sync version. Implementing both is recommended but not required.
76
+ return self.evaluate(
77
+ output=output,
78
+ expected=expected,
79
+ metadata=metadata,
80
+ input=input,
81
+ **kwargs,
82
+ )
83
+
84
+ def __init_subclass__(cls, is_abstract: bool = False, **kwargs: Any) -> None:
85
+ super().__init_subclass__(**kwargs)
86
+ if is_abstract:
87
+ return
88
+ evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
89
+ for super_cls in inspect.getmro(cls):
90
+ if super_cls in (LLMEvaluator, Evaluator):
91
+ break
92
+ if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
93
+ assert callable(evaluate), "`evaluate()` method should be callable"
94
+ # need to remove the first param, i.e. `self`
95
+ _validate_sig(functools.partial(evaluate, None), "evaluate")
96
+ return
97
+ if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
98
+ assert callable(async_evaluate), "`async_evaluate()` method should be callable"
99
+ # need to remove the first param, i.e. `self`
100
+ _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
101
+ return
102
+ raise ValueError(
103
+ f"Evaluator must implement either "
104
+ f"`def evaluate{evaluate_fn_signature}` or "
105
+ f"`async def async_evaluate{evaluate_fn_signature}`"
106
+ )
107
+
108
+
109
+ def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
110
+ sig = inspect.signature(fn)
111
+ validate_signature(sig)
112
+ for param in sig.parameters.values():
113
+ if param.kind is inspect.Parameter.VAR_KEYWORD:
114
+ return
115
+ else:
116
+ raise ValueError(f"`{fn_name}` should allow variadic keyword arguments `**kwargs`")
117
+
118
+
119
+ class CodeEvaluator(Evaluator, ABC, is_abstract=True):
120
+ """
121
+ A convenience super class for defining code evaluators.
122
+
123
+ This class is intended to be subclassed, and should not be instantiated directly.
124
+ """
125
+
126
+ _kind = AnnotatorKind.CODE
127
+
128
+ def __new__(cls, *args: Any, **kwargs: Any) -> "CodeEvaluator":
129
+ if cls is CodeEvaluator:
130
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
131
+ return object.__new__(cls)
132
+
133
+
134
+ class LLMEvaluator(Evaluator, ABC, is_abstract=True):
135
+ """
136
+ A convenience super class for defining LLM evaluators.
137
+
138
+ This class is intended to be subclassed, and should not be instantiated directly.
139
+ """
140
+
141
+ _kind = AnnotatorKind.LLM
142
+
143
+ def __new__(cls, *args: Any, **kwargs: Any) -> "LLMEvaluator":
144
+ if cls is LLMEvaluator:
145
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
146
+ return object.__new__(cls)
147
+
148
+
149
+ ExperimentEvaluator: TypeAlias = Union[
150
+ Evaluator,
151
+ Callable[..., EvaluatorOutput],
152
+ Callable[..., Awaitable[EvaluatorOutput]],
153
+ ]
@@ -4,11 +4,11 @@ import json
4
4
  import re
5
5
  from typing import Any, List, Optional, Union
6
6
 
7
- from phoenix.datasets.evaluators.utils import Evaluator
8
- from phoenix.datasets.types import EvaluationResult, TaskOutput
7
+ from phoenix.experiments.evaluators.base import CodeEvaluator
8
+ from phoenix.experiments.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
- class JSONParsable(Evaluator):
11
+ class JSONParsable(CodeEvaluator):
12
12
  def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
13
13
  assert isinstance(output, str), "Experiment run output must be a string"
14
14
  try:
@@ -21,7 +21,7 @@ class JSONParsable(Evaluator):
21
21
  )
22
22
 
23
23
 
24
- class ContainsKeyword(Evaluator):
24
+ class ContainsKeyword(CodeEvaluator):
25
25
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
26
26
  self.keyword = keyword
27
27
  self._name = name or f"Contains({repr(keyword)})"
@@ -38,7 +38,7 @@ class ContainsKeyword(Evaluator):
38
38
  )
39
39
 
40
40
 
41
- class ContainsAnyKeyword(Evaluator):
41
+ class ContainsAnyKeyword(CodeEvaluator):
42
42
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
43
43
  self.keywords = keywords
44
44
  self._name = name or f"ContainsAny({keywords})"
@@ -56,7 +56,7 @@ class ContainsAnyKeyword(Evaluator):
56
56
  )
57
57
 
58
58
 
59
- class ContainsAllKeywords(Evaluator):
59
+ class ContainsAllKeywords(CodeEvaluator):
60
60
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
61
61
  self.keywords = keywords
62
62
  self._name = name or f"ContainsAll({keywords})"
@@ -76,7 +76,7 @@ class ContainsAllKeywords(Evaluator):
76
76
  )
77
77
 
78
78
 
79
- class MatchesRegex(Evaluator):
79
+ class MatchesRegex(CodeEvaluator):
80
80
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
81
81
  if isinstance(pattern, str):
82
82
  pattern = re.compile(pattern)
@@ -2,19 +2,19 @@ import re
2
2
  from types import MappingProxyType
3
3
  from typing import Any, Callable, Optional, Type
4
4
 
5
- from phoenix.datasets.evaluators.utils import (
6
- ExampleInput,
7
- ExampleMetadata,
5
+ from phoenix.evals.models.base import BaseModel as LLMBaseModel
6
+ from phoenix.evals.utils import snap_to_rail
7
+ from phoenix.experiments.evaluators.base import (
8
8
  ExperimentEvaluator,
9
9
  LLMEvaluator,
10
- _unwrap_json,
11
10
  )
12
- from phoenix.datasets.types import (
11
+ from phoenix.experiments.evaluators.utils import unwrap_json
12
+ from phoenix.experiments.types import (
13
13
  EvaluationResult,
14
+ ExampleInput,
15
+ ExampleMetadata,
14
16
  TaskOutput,
15
17
  )
16
- from phoenix.evals.models.base import BaseModel as LLMBaseModel
17
- from phoenix.evals.utils import snap_to_rail
18
18
 
19
19
 
20
20
  class LLMCriteriaEvaluator(LLMEvaluator):
@@ -62,7 +62,7 @@ class LLMCriteriaEvaluator(LLMEvaluator):
62
62
 
63
63
  def _format_eval_template(self, output: TaskOutput) -> str:
64
64
  assert output is not None
65
- result = _unwrap_json(output)
65
+ result = unwrap_json(output)
66
66
  return self.template.format(text=str(result))
67
67
 
68
68
  def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -217,7 +217,7 @@ class RelevanceEvaluator(LLMEvaluator):
217
217
  self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
218
218
  ) -> str:
219
219
  assert output is not None
220
- return str(_unwrap_json(output))
220
+ return str(unwrap_json(output))
221
221
 
222
222
  def evaluate(
223
223
  self,