arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/METADATA +2 -2
- {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/RECORD +30 -28
- phoenix/datasets/evaluators/code_evaluators.py +25 -53
- phoenix/datasets/evaluators/llm_evaluators.py +63 -32
- phoenix/datasets/evaluators/utils.py +292 -0
- phoenix/datasets/experiments.py +147 -82
- phoenix/datasets/tracing.py +19 -0
- phoenix/datasets/types.py +18 -52
- phoenix/db/insertion/dataset.py +19 -16
- phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
- phoenix/db/models.py +8 -3
- phoenix/server/api/context.py +2 -0
- phoenix/server/api/dataloaders/__init__.py +2 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
- phoenix/server/api/helpers/dataset_helpers.py +8 -7
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/mutations/project_mutations.py +9 -4
- phoenix/server/api/routers/v1/datasets.py +146 -42
- phoenix/server/api/routers/v1/experiment_evaluations.py +1 -0
- phoenix/server/api/routers/v1/experiment_runs.py +2 -2
- phoenix/server/api/types/Experiment.py +5 -0
- phoenix/server/api/types/ExperimentRun.py +1 -1
- phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
- phoenix/server/app.py +2 -0
- phoenix/server/static/index.js +610 -564
- phoenix/session/client.py +124 -2
- phoenix/version.py +1 -1
- phoenix/datasets/evaluators/_utils.py +0 -13
- {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 4.4.
|
|
3
|
+
Version: 4.4.4rc5
|
|
4
4
|
Summary: AI Observability and Evaluation
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -31,7 +31,7 @@ Requires-Dist: openinference-instrumentation
|
|
|
31
31
|
Requires-Dist: openinference-instrumentation-langchain>=0.1.12
|
|
32
32
|
Requires-Dist: openinference-instrumentation-llama-index>=1.2.0
|
|
33
33
|
Requires-Dist: openinference-instrumentation-openai>=0.1.4
|
|
34
|
-
Requires-Dist: openinference-semantic-conventions>=0.1.
|
|
34
|
+
Requires-Dist: openinference-semantic-conventions>=0.1.9
|
|
35
35
|
Requires-Dist: opentelemetry-exporter-otlp
|
|
36
36
|
Requires-Dist: opentelemetry-proto>=1.12.0
|
|
37
37
|
Requires-Dist: opentelemetry-sdk
|
|
@@ -5,20 +5,20 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
|
|
|
5
5
|
phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
6
|
phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
|
|
7
7
|
phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
|
|
8
|
-
phoenix/version.py,sha256
|
|
8
|
+
phoenix/version.py,sha256=-Vg_bLotyeJdv0gFqG5-A64nsG-6AR0xZSp3sDDsV_w,25
|
|
9
9
|
phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
|
|
11
11
|
phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
|
|
12
12
|
phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
|
|
13
13
|
phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
|
|
14
14
|
phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
phoenix/datasets/experiments.py,sha256=
|
|
16
|
-
phoenix/datasets/tracing.py,sha256=
|
|
17
|
-
phoenix/datasets/types.py,sha256=
|
|
15
|
+
phoenix/datasets/experiments.py,sha256=RzZezHQcTpPcr7gY9rGtoYlfoesFNhNV7EO5f_oHNFk,21198
|
|
16
|
+
phoenix/datasets/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
|
|
17
|
+
phoenix/datasets/types.py,sha256=N17mnnVwmu1k3bnmbyROPt_6TxPaZY_QkOZmCOR5_jE,4835
|
|
18
18
|
phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
|
|
19
|
-
phoenix/datasets/evaluators/
|
|
20
|
-
phoenix/datasets/evaluators/
|
|
21
|
-
phoenix/datasets/evaluators/
|
|
19
|
+
phoenix/datasets/evaluators/code_evaluators.py,sha256=DdCcAi274t_TLs_aARd-GmWWpJrxVeNEAegMFEAfe0E,3894
|
|
20
|
+
phoenix/datasets/evaluators/llm_evaluators.py,sha256=aVfAHOWhskBiy0IVeq_ACTs7B37uXTTtDoNBS0XenIc,9165
|
|
21
|
+
phoenix/datasets/evaluators/utils.py,sha256=S7OGrb1sBWg5l9K35X29OKJe5wZ3k7xMhxJBclzxta0,10452
|
|
22
22
|
phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
|
|
23
23
|
phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
|
|
24
24
|
phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
|
|
@@ -26,9 +26,9 @@ phoenix/db/bulk_inserter.py,sha256=zbZGWZFDybKaGLGzpxgLwxAS5sC0_wXcvM0be4kUhh8,1
|
|
|
26
26
|
phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
|
|
27
27
|
phoenix/db/helpers.py,sha256=L2_jP1iIWpUREhKLYYb4_vf_6v_BiU1E73Z2PczGm6s,1589
|
|
28
28
|
phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
|
|
29
|
-
phoenix/db/models.py,sha256=
|
|
29
|
+
phoenix/db/models.py,sha256=lYzI3tCDUl8njXb3Vf3R8e6y56-MErprjjfBE-o9Kao,20419
|
|
30
30
|
phoenix/db/insertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
phoenix/db/insertion/dataset.py,sha256=
|
|
31
|
+
phoenix/db/insertion/dataset.py,sha256=_vxy5e6W5jEuvO2fMKbbNCn9JvHkwI4LRKk_10eKFVg,7171
|
|
32
32
|
phoenix/db/insertion/evaluation.py,sha256=fAerUy3QGf2wID_tiVmPvzxBDFGiONPl3pmpZDgJDWQ,7183
|
|
33
33
|
phoenix/db/insertion/helpers.py,sha256=7tf6qQyJ05nn3IXaZEpj2b4Jz5boGLWT8tzlMaJ9tQY,2337
|
|
34
34
|
phoenix/db/insertion/span.py,sha256=DNBjSrx5g2W5KuTB1dkHwtkb0SFnMIxN1jB-BAdGKFY,5634
|
|
@@ -36,7 +36,7 @@ phoenix/db/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
36
36
|
phoenix/db/migrations/env.py,sha256=QbzB5zrRs6XQQmrYeUpuzeilcMlM-MsbaAgHHYcIHTI,3626
|
|
37
37
|
phoenix/db/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
|
|
38
38
|
phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgCE,605
|
|
39
|
-
phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=
|
|
39
|
+
phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
|
|
40
40
|
phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
|
|
41
41
|
phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
42
|
phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
|
|
@@ -58,18 +58,18 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
|
|
|
58
58
|
phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
|
|
59
59
|
phoenix/pointcloud/umap_parameters.py,sha256=lJsEOrbSuSiqI7g4Yt6xj7kgYxEqoep4ZHWLr6VWBqw,1760
|
|
60
60
|
phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
-
phoenix/server/app.py,sha256=
|
|
61
|
+
phoenix/server/app.py,sha256=LQrHWt5HG_pWqnR9Ozb3-vnAGiiRGuZ3uV_9-886Yxw,18340
|
|
62
62
|
phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
|
|
63
63
|
phoenix/server/main.py,sha256=mtzH_2Kyvuy3AHiiKfqiCdUQ6SGFzeT4q9fefbV6GLg,11114
|
|
64
64
|
phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
|
|
65
65
|
phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
|
|
66
66
|
phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
|
|
67
67
|
phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
phoenix/server/api/context.py,sha256=
|
|
68
|
+
phoenix/server/api/context.py,sha256=jb69SVdb5hpVbfM4U0pZi4sGa2a-0VKOJWcBjjS7l4s,2761
|
|
69
69
|
phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
|
|
70
70
|
phoenix/server/api/queries.py,sha256=wp5BlapuxDIoaQJm7mzG0dURfVxR32vXSJVC0JqG4_Y,19845
|
|
71
71
|
phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
|
|
72
|
-
phoenix/server/api/dataloaders/__init__.py,sha256=
|
|
72
|
+
phoenix/server/api/dataloaders/__init__.py,sha256=urbG3M-k2cpj2ymMLYQ28tzIXAG1edECxM-tJ22ylqE,4720
|
|
73
73
|
phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
|
|
74
74
|
phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
|
|
75
75
|
phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
|
|
@@ -78,6 +78,7 @@ phoenix/server/api/dataloaders/document_retrieval_metrics.py,sha256=8tZYMNLZ7zxU
|
|
|
78
78
|
phoenix/server/api/dataloaders/evaluation_summaries.py,sha256=z9aal3IQL_t30aNqpAS7x4tjq0xNkuEG8dWW-bhqZmo,5724
|
|
79
79
|
phoenix/server/api/dataloaders/experiment_annotation_summaries.py,sha256=RsQ-o84kWVTYgIlh9VKkyw2kDMWIlHCRpS7RE2aw9vs,2881
|
|
80
80
|
phoenix/server/api/dataloaders/experiment_error_rates.py,sha256=EHlTdZi8F94vo-qJUcnnXFvuSh_d0fTT0Xg4SfW_A70,1397
|
|
81
|
+
phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=wxHv08aZELJ91KTjHdt_x33M3wGDDa9GfbFHeRyOyGk,1343
|
|
81
82
|
phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
|
|
82
83
|
phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
|
|
83
84
|
phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
|
|
@@ -92,9 +93,10 @@ phoenix/server/api/dataloaders/trace_row_ids.py,sha256=yAWuVFWUjDdmmwfXsGs_l6LuG
|
|
|
92
93
|
phoenix/server/api/dataloaders/cache/__init__.py,sha256=SYoOM9n8FJaMdQarma5d1blu-jIg2GB8Shqg5ezSzZ8,106
|
|
93
94
|
phoenix/server/api/dataloaders/cache/two_tier_cache.py,sha256=I38L1RsOis98OQftE7n1Q9QBZfFJO6OW_qIINkuJllo,2295
|
|
94
95
|
phoenix/server/api/helpers/__init__.py,sha256=_V1eVkchZmTkhOfRC4QqR1sUB2xtIxdsMJkDouZq_IE,251
|
|
95
|
-
phoenix/server/api/helpers/dataset_helpers.py,sha256=
|
|
96
|
+
phoenix/server/api/helpers/dataset_helpers.py,sha256=A6UzEyAb4gFtyc_AV63_yl9OpN0vn8Vw1BBCTNjg9J0,6875
|
|
96
97
|
phoenix/server/api/input_types/AddExamplesToDatasetInput.py,sha256=ZGXMV0H3DYHi4DdqGhejDzaWdFinyem1Mc8DVA7iCh0,436
|
|
97
98
|
phoenix/server/api/input_types/AddSpansToDatasetInput.py,sha256=C4oZ0WqYqca1kleNOCMIM2_aY6Qnc5n1xXG51_C1V0w,368
|
|
99
|
+
phoenix/server/api/input_types/ClearProjectInput.py,sha256=cpPFRyQ3ffy2dLbCZgYpway-mCzhdm4QqnUg8caOBfQ,382
|
|
98
100
|
phoenix/server/api/input_types/ClusterInput.py,sha256=EL4ftvZxQ8mVdruUPcdhMhByORmSmM8S-X6RPqU6GX0,179
|
|
99
101
|
phoenix/server/api/input_types/Coordinates.py,sha256=meTwbIjwTfqx5DGD2DBlH9wQzdQVNM5a8x9dp1FfIgA,173
|
|
100
102
|
phoenix/server/api/input_types/CreateDatasetInput.py,sha256=Q3MwouIx9jTQBRWDju75iMQXEGJCrL4aD4ESQp771nc,248
|
|
@@ -119,7 +121,7 @@ phoenix/server/api/mutations/auth.py,sha256=vPRFoj7J6PV6QeODewG4K0PhoOebS5AfMRpb
|
|
|
119
121
|
phoenix/server/api/mutations/dataset_mutations.py,sha256=Zp2sFWyGyubILUQboR6bafRWafsfeRO2ffUWnkLlfgI,22532
|
|
120
122
|
phoenix/server/api/mutations/experiment_mutations.py,sha256=Fw_yEdITGJ6A33M5JZ-2YnBTDoBqZUUFON6vy8JoVjE,2569
|
|
121
123
|
phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
|
|
122
|
-
phoenix/server/api/mutations/project_mutations.py,sha256=
|
|
124
|
+
phoenix/server/api/mutations/project_mutations.py,sha256=3SVDCZqxB0Iv60cOwBL8c-rY3QUUPs8PXbp-C_K1mWY,2267
|
|
123
125
|
phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
126
|
phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjGwLXfw,153
|
|
125
127
|
phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
|
|
@@ -127,10 +129,10 @@ phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
127
129
|
phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
|
|
128
130
|
phoenix/server/api/routers/v1/__init__.py,sha256=B5eSaylPI7MoYia1-VgKrU8rDi-69r_hRwPU5yMLUTE,2808
|
|
129
131
|
phoenix/server/api/routers/v1/dataset_examples.py,sha256=wtplRUv2ee9xGTrcEMgTn-7L4NX_73IcwUXkCMZEFc4,6726
|
|
130
|
-
phoenix/server/api/routers/v1/datasets.py,sha256=
|
|
132
|
+
phoenix/server/api/routers/v1/datasets.py,sha256=ws2Guou9mspwFx3-cBFZoD2VuTwWGoFZmtt2Sr3zg6k,31516
|
|
131
133
|
phoenix/server/api/routers/v1/evaluations.py,sha256=rwSVg-rpujhsMcDVFt-VAr0Ix9TgvLcY_bSxeh8PzJI,9241
|
|
132
|
-
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=
|
|
133
|
-
phoenix/server/api/routers/v1/experiment_runs.py,sha256=
|
|
134
|
+
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xhrkPUc_4ncIBm24aUyzu47UU0CN1tGlbisn-oLqt_Y,2702
|
|
135
|
+
phoenix/server/api/routers/v1/experiment_runs.py,sha256=0AUNHA5nvpGDeoJUGK8VxP2TFN3iPwhMW3D9QmHstPk,4399
|
|
134
136
|
phoenix/server/api/routers/v1/experiments.py,sha256=5Rh7q6sHswmk11PZSJ7KMrtqfIE16X_xSKkKSASK9-I,7251
|
|
135
137
|
phoenix/server/api/routers/v1/spans.py,sha256=FEnmlRPBPl71BSGNBuPrz14fk8nmxJQYsKECdDbdUdw,3977
|
|
136
138
|
phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
|
|
@@ -157,11 +159,11 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
|
|
|
157
159
|
phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
|
|
158
160
|
phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
|
|
159
161
|
phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
|
|
160
|
-
phoenix/server/api/types/Experiment.py,sha256=
|
|
162
|
+
phoenix/server/api/types/Experiment.py,sha256=Cs0EKhVLI5l5LKFI0hQA-ekZuaiJcOHT88JGFBa2deU,4906
|
|
161
163
|
phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
|
|
162
164
|
phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
|
|
163
|
-
phoenix/server/api/types/ExperimentRun.py,sha256=
|
|
164
|
-
phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=
|
|
165
|
+
phoenix/server/api/types/ExperimentRun.py,sha256=8jUIi3ApVCqQHwnYe59CYhrmh5iZ6-QmlH5WpF7UWtM,2990
|
|
166
|
+
phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=zGstMbS5OxNikEhD8VouY7Ls7YbxKm-0EmqvGeY3-DI,1773
|
|
165
167
|
phoenix/server/api/types/ExportedFile.py,sha256=e3GTn7B5LgsTbqiwjhMCQH7VsiqXitrBO4aCMS1lHsg,163
|
|
166
168
|
phoenix/server/api/types/Functionality.py,sha256=tzV9xdhB8zqfsjWxP66NDC7EZsplYkYO7jRbLWJIeeg,382
|
|
167
169
|
phoenix/server/api/types/Inferences.py,sha256=HWuDZZrXPWVoEy_pA3bRsAOUYsCKgAxf9zshasGqu5Y,3403
|
|
@@ -197,12 +199,12 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
|
|
|
197
199
|
phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
|
|
198
200
|
phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
|
|
199
201
|
phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
|
|
200
|
-
phoenix/server/static/index.js,sha256=
|
|
202
|
+
phoenix/server/static/index.js,sha256=I9Y8svcPruUrXklKcZUxFz5HfLB0vOwczYLSwLAs_04,3500011
|
|
201
203
|
phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
|
|
202
204
|
phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
205
|
phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
|
|
204
206
|
phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
-
phoenix/session/client.py,sha256=
|
|
207
|
+
phoenix/session/client.py,sha256=R7dV38yjkIQa522nhG6jhDllWcXft2JJ7RlcPYpqiiQ,24846
|
|
206
208
|
phoenix/session/data_extractor.py,sha256=dwhiDu-ISaXr8UI9I-CszZhB5BlUNmdDopjFZvMIXMw,2101
|
|
207
209
|
phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
|
|
208
210
|
phoenix/session/session.py,sha256=rjIuSSK2gAYIUPQTJc4E2ebew5o6I070FWRoFn4W3EI,26620
|
|
@@ -242,8 +244,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
|
|
|
242
244
|
phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
|
|
243
245
|
phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
|
|
244
246
|
phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
245
|
-
arize_phoenix-4.4.
|
|
246
|
-
arize_phoenix-4.4.
|
|
247
|
-
arize_phoenix-4.4.
|
|
248
|
-
arize_phoenix-4.4.
|
|
249
|
-
arize_phoenix-4.4.
|
|
247
|
+
arize_phoenix-4.4.4rc5.dist-info/METADATA,sha256=yT0gbMlPkiRkZeC8Yj_eLyaufriREVn3jxz5-qTKDjI,11012
|
|
248
|
+
arize_phoenix-4.4.4rc5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
249
|
+
arize_phoenix-4.4.4rc5.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
|
|
250
|
+
arize_phoenix-4.4.4rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
|
|
251
|
+
arize_phoenix-4.4.4rc5.dist-info/RECORD,,
|
|
@@ -2,19 +2,14 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Any, List, Optional, Union
|
|
6
6
|
|
|
7
|
-
from phoenix.datasets.evaluators.
|
|
8
|
-
from phoenix.datasets.types import EvaluationResult,
|
|
7
|
+
from phoenix.datasets.evaluators.utils import Evaluator
|
|
8
|
+
from phoenix.datasets.types import EvaluationResult, TaskOutput
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class JSONParsable:
|
|
12
|
-
|
|
13
|
-
name = "JSONParsable"
|
|
14
|
-
|
|
15
|
-
def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
|
|
16
|
-
assert exp_run.output is not None
|
|
17
|
-
output = _unwrap_json(exp_run.output.result)
|
|
11
|
+
class JSONParsable(Evaluator):
|
|
12
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
18
13
|
assert isinstance(output, str), "Experiment run output must be a string"
|
|
19
14
|
try:
|
|
20
15
|
json.loads(output)
|
|
@@ -26,18 +21,14 @@ class JSONParsable:
|
|
|
26
21
|
)
|
|
27
22
|
|
|
28
23
|
|
|
29
|
-
class ContainsKeyword:
|
|
30
|
-
annotator_kind = "CODE"
|
|
31
|
-
|
|
24
|
+
class ContainsKeyword(Evaluator):
|
|
32
25
|
def __init__(self, keyword: str, name: Optional[str] = None) -> None:
|
|
33
26
|
self.keyword = keyword
|
|
34
|
-
self.
|
|
27
|
+
self._name = name or f"Contains({repr(keyword)})"
|
|
35
28
|
|
|
36
|
-
def evaluate(self,
|
|
37
|
-
assert
|
|
38
|
-
|
|
39
|
-
assert isinstance(result, str), "Experiment run output must be a string"
|
|
40
|
-
found = self.keyword in result
|
|
29
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
30
|
+
assert isinstance(output, str), "Experiment run output must be a string"
|
|
31
|
+
found = self.keyword in output
|
|
41
32
|
return EvaluationResult(
|
|
42
33
|
score=float(found),
|
|
43
34
|
explanation=(
|
|
@@ -47,18 +38,14 @@ class ContainsKeyword:
|
|
|
47
38
|
)
|
|
48
39
|
|
|
49
40
|
|
|
50
|
-
class ContainsAnyKeyword:
|
|
51
|
-
annotator_kind = "CODE"
|
|
52
|
-
|
|
41
|
+
class ContainsAnyKeyword(Evaluator):
|
|
53
42
|
def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
|
|
54
43
|
self.keywords = keywords
|
|
55
|
-
self.
|
|
44
|
+
self._name = name or f"ContainsAny({keywords})"
|
|
56
45
|
|
|
57
|
-
def evaluate(self,
|
|
58
|
-
assert
|
|
59
|
-
|
|
60
|
-
assert isinstance(result, str), "Experiment run output must be a string"
|
|
61
|
-
found = [keyword for keyword in self.keywords if keyword in result]
|
|
46
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
47
|
+
assert isinstance(output, str), "Experiment run output must be a string"
|
|
48
|
+
found = [keyword for keyword in self.keywords if keyword in output]
|
|
62
49
|
if found:
|
|
63
50
|
explanation = f"the keywords {found} were found in the output"
|
|
64
51
|
else:
|
|
@@ -69,18 +56,14 @@ class ContainsAnyKeyword:
|
|
|
69
56
|
)
|
|
70
57
|
|
|
71
58
|
|
|
72
|
-
class ContainsAllKeywords:
|
|
73
|
-
annotator_kind = "CODE"
|
|
74
|
-
|
|
59
|
+
class ContainsAllKeywords(Evaluator):
|
|
75
60
|
def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
|
|
76
61
|
self.keywords = keywords
|
|
77
|
-
self.
|
|
62
|
+
self._name = name or f"ContainsAll({keywords})"
|
|
78
63
|
|
|
79
|
-
def evaluate(self,
|
|
80
|
-
assert
|
|
81
|
-
|
|
82
|
-
assert isinstance(result, str), "Experiment run output must be a string"
|
|
83
|
-
not_found = [keyword for keyword in self.keywords if keyword not in result]
|
|
64
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
65
|
+
assert isinstance(output, str), "Experiment run output must be a string"
|
|
66
|
+
not_found = [keyword for keyword in self.keywords if keyword not in output]
|
|
84
67
|
if not_found:
|
|
85
68
|
contains_all = False
|
|
86
69
|
explanation = f"the keywords {not_found} were not found in the output"
|
|
@@ -93,21 +76,17 @@ class ContainsAllKeywords:
|
|
|
93
76
|
)
|
|
94
77
|
|
|
95
78
|
|
|
96
|
-
class MatchesRegex:
|
|
97
|
-
annotator_kind = "CODE"
|
|
98
|
-
|
|
79
|
+
class MatchesRegex(Evaluator):
|
|
99
80
|
def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
|
|
100
81
|
if isinstance(pattern, str):
|
|
101
82
|
pattern = re.compile(pattern)
|
|
102
83
|
self.pattern = pattern
|
|
103
84
|
assert isinstance(pattern, re.Pattern)
|
|
104
|
-
self.
|
|
85
|
+
self._name = name or f"matches_({pattern})"
|
|
105
86
|
|
|
106
|
-
def evaluate(self,
|
|
107
|
-
assert
|
|
108
|
-
|
|
109
|
-
assert isinstance(result, str), "Experiment run output must be a string"
|
|
110
|
-
matches = self.pattern.findall(result)
|
|
87
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
88
|
+
assert isinstance(output, str), "Experiment run output must be a string"
|
|
89
|
+
matches = self.pattern.findall(output)
|
|
111
90
|
if matches:
|
|
112
91
|
explanation = (
|
|
113
92
|
f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
|
|
@@ -118,10 +97,3 @@ class MatchesRegex:
|
|
|
118
97
|
score=float(bool(matches)),
|
|
119
98
|
explanation=explanation,
|
|
120
99
|
)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
# Someday we'll do typing checking in unit tests.
|
|
124
|
-
if TYPE_CHECKING:
|
|
125
|
-
_: ExperimentEvaluator
|
|
126
|
-
_ = JSONParsable()
|
|
127
|
-
_ = ContainsKeyword("test")
|
|
@@ -1,14 +1,23 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from phoenix.datasets.
|
|
2
|
+
from types import MappingProxyType
|
|
3
|
+
from typing import Any, Callable, Optional, Type
|
|
4
|
+
|
|
5
|
+
from phoenix.datasets.evaluators.utils import (
|
|
6
|
+
ExampleInput,
|
|
7
|
+
ExampleMetadata,
|
|
8
|
+
ExperimentEvaluator,
|
|
9
|
+
LLMEvaluator,
|
|
10
|
+
_unwrap_json,
|
|
11
|
+
)
|
|
12
|
+
from phoenix.datasets.types import (
|
|
13
|
+
EvaluationResult,
|
|
14
|
+
TaskOutput,
|
|
15
|
+
)
|
|
6
16
|
from phoenix.evals.models.base import BaseModel as LLMBaseModel
|
|
7
17
|
from phoenix.evals.utils import snap_to_rail
|
|
8
18
|
|
|
9
19
|
|
|
10
|
-
class LLMCriteriaEvaluator:
|
|
11
|
-
annotator_kind = "LLM"
|
|
20
|
+
class LLMCriteriaEvaluator(LLMEvaluator):
|
|
12
21
|
_base_template = (
|
|
13
22
|
"Determine if the following text is {criteria}. {description}"
|
|
14
23
|
"First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
|
|
@@ -37,21 +46,23 @@ class LLMCriteriaEvaluator:
|
|
|
37
46
|
self.criteria = criteria
|
|
38
47
|
self.description = description
|
|
39
48
|
self.template = self._format_base_template(self.criteria, self.description)
|
|
40
|
-
self.
|
|
49
|
+
self._name = name
|
|
41
50
|
|
|
42
|
-
def evaluate(self,
|
|
43
|
-
formatted_template = self._format_eval_template(
|
|
51
|
+
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
52
|
+
formatted_template = self._format_eval_template(output)
|
|
44
53
|
unparsed_response = self.model._generate(formatted_template)
|
|
45
54
|
return self._parse_eval_output(unparsed_response)
|
|
46
55
|
|
|
47
|
-
async def async_evaluate(
|
|
48
|
-
|
|
56
|
+
async def async_evaluate(
|
|
57
|
+
self, *, output: Optional[TaskOutput] = None, **_: Any
|
|
58
|
+
) -> EvaluationResult:
|
|
59
|
+
formatted_template = self._format_eval_template(output)
|
|
49
60
|
unparsed_response = await self.model._async_generate(formatted_template)
|
|
50
61
|
return self._parse_eval_output(unparsed_response)
|
|
51
62
|
|
|
52
|
-
def _format_eval_template(self,
|
|
53
|
-
assert
|
|
54
|
-
result = _unwrap_json(
|
|
63
|
+
def _format_eval_template(self, output: TaskOutput) -> str:
|
|
64
|
+
assert output is not None
|
|
65
|
+
result = _unwrap_json(output)
|
|
55
66
|
return self.template.format(text=str(result))
|
|
56
67
|
|
|
57
68
|
def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
|
|
@@ -137,8 +148,7 @@ def _parse_label_from_explanation(raw_string: str) -> str:
|
|
|
137
148
|
return raw_string
|
|
138
149
|
|
|
139
150
|
|
|
140
|
-
class RelevanceEvaluator:
|
|
141
|
-
annotator_kind = "LLM"
|
|
151
|
+
class RelevanceEvaluator(LLMEvaluator):
|
|
142
152
|
template = (
|
|
143
153
|
"Determine if the following response is relevant to the query. In this context, "
|
|
144
154
|
"'relevance' means that the response directly addresses the core question or topic of the "
|
|
@@ -162,19 +172,24 @@ class RelevanceEvaluator:
|
|
|
162
172
|
def __init__(
|
|
163
173
|
self,
|
|
164
174
|
model: LLMBaseModel,
|
|
165
|
-
get_query: Optional[Callable[[
|
|
166
|
-
get_response: Optional[Callable[[
|
|
175
|
+
get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
|
|
176
|
+
get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
|
|
167
177
|
name: str = "RelevanceEvaluator",
|
|
168
178
|
):
|
|
169
179
|
self.model = model
|
|
170
|
-
self.
|
|
180
|
+
self._name = name
|
|
171
181
|
self.get_query = get_query or self._default_get_query
|
|
172
182
|
self.get_response = get_response or self._default_get_response
|
|
173
183
|
|
|
174
|
-
def _format_eval_template(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
184
|
+
def _format_eval_template(
|
|
185
|
+
self,
|
|
186
|
+
output: Optional[TaskOutput] = None,
|
|
187
|
+
input: ExampleInput = MappingProxyType({}),
|
|
188
|
+
metadata: ExampleMetadata = MappingProxyType({}),
|
|
189
|
+
) -> str:
|
|
190
|
+
assert output is not None
|
|
191
|
+
query = self.get_query(input, metadata)
|
|
192
|
+
response = self.get_response(output, metadata)
|
|
178
193
|
return self.template.format(query=query, response=response)
|
|
179
194
|
|
|
180
195
|
def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
|
|
@@ -195,19 +210,35 @@ class RelevanceEvaluator:
|
|
|
195
210
|
metadata={},
|
|
196
211
|
)
|
|
197
212
|
|
|
198
|
-
def _default_get_query(self,
|
|
199
|
-
return str(
|
|
213
|
+
def _default_get_query(self, input: ExampleInput, *args: Any, **kwargs: Any) -> str:
|
|
214
|
+
return str(input)
|
|
200
215
|
|
|
201
|
-
def _default_get_response(
|
|
202
|
-
|
|
203
|
-
|
|
216
|
+
def _default_get_response(
|
|
217
|
+
self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
|
|
218
|
+
) -> str:
|
|
219
|
+
assert output is not None
|
|
220
|
+
return str(_unwrap_json(output))
|
|
204
221
|
|
|
205
|
-
def evaluate(
|
|
206
|
-
|
|
222
|
+
def evaluate(
|
|
223
|
+
self,
|
|
224
|
+
*,
|
|
225
|
+
output: Optional[TaskOutput] = None,
|
|
226
|
+
metadata: ExampleMetadata = MappingProxyType({}),
|
|
227
|
+
input: ExampleInput = MappingProxyType({}),
|
|
228
|
+
**_: Any,
|
|
229
|
+
) -> EvaluationResult:
|
|
230
|
+
formatted_template = self._format_eval_template(output, input, metadata)
|
|
207
231
|
unparsed_response = self.model._generate(formatted_template)
|
|
208
232
|
return self._parse_eval_output(unparsed_response)
|
|
209
233
|
|
|
210
|
-
async def async_evaluate(
|
|
211
|
-
|
|
234
|
+
async def async_evaluate(
|
|
235
|
+
self,
|
|
236
|
+
*,
|
|
237
|
+
output: Optional[TaskOutput] = None,
|
|
238
|
+
metadata: ExampleMetadata = MappingProxyType({}),
|
|
239
|
+
input: ExampleInput = MappingProxyType({}),
|
|
240
|
+
**_: Any,
|
|
241
|
+
) -> EvaluationResult:
|
|
242
|
+
formatted_template = self._format_eval_template(output, input, metadata)
|
|
212
243
|
unparsed_response = await self.model._async_generate(formatted_template)
|
|
213
244
|
return self._parse_eval_output(unparsed_response)
|