arize-phoenix 0.0.2rc3__py3-none-any.whl → 0.0.2rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/METADATA +25 -21
- {arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/RECORD +25 -26
- phoenix/__about__.py +1 -1
- phoenix/__init__.py +2 -2
- phoenix/core/embedding_dimension.py +33 -0
- phoenix/datasets/__init__.py +2 -1
- phoenix/datasets/dataset.py +31 -4
- phoenix/{server → datasets}/fixtures.py +47 -10
- phoenix/datasets/validation.py +1 -1
- phoenix/metrics/metrics.py +29 -5
- phoenix/metrics/mixins.py +11 -3
- phoenix/metrics/timeseries.py +11 -7
- phoenix/pointcloud/clustering.py +3 -3
- phoenix/pointcloud/pointcloud.py +9 -7
- phoenix/server/api/input_types/Granularity.py +2 -0
- phoenix/server/api/interceptor.py +28 -0
- phoenix/server/api/types/Dimension.py +23 -33
- phoenix/server/api/types/EmbeddingDimension.py +39 -111
- phoenix/server/api/types/TimeSeries.py +117 -3
- phoenix/server/api/types/UMAPPoints.py +62 -14
- phoenix/server/main.py +3 -3
- phoenix/server/static/index.js +720 -634
- phoenix/session/session.py +48 -6
- phoenix/server/api/types/DataQualityTimeSeries.py +0 -36
- phoenix/server/api/types/DriftTimeSeries.py +0 -10
- {arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/WHEEL +0 -0
- {arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2rc5
|
|
4
4
|
Summary: ML Observability in your notebook
|
|
5
5
|
Project-URL: Documentation, https://github.com/Arize-ai/phoenix#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -65,12 +65,12 @@ Phoenix provides MLOps insights at lightning speed with zero-config observabilit
|
|
|
65
65
|
|
|
66
66
|
**_Phoenix is under active development. APIs may change at any time._**
|
|
67
67
|
|
|
68
|
-
-
|
|
69
|
-
-
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
-
|
|
73
|
-
-
|
|
68
|
+
- [Installation](#installation)
|
|
69
|
+
- [Getting Started](#getting-started)
|
|
70
|
+
- [Documentation](#documentation)
|
|
71
|
+
- [Community](#community)
|
|
72
|
+
- [Contributing](#contributing)
|
|
73
|
+
- [License](#license)
|
|
74
74
|
|
|
75
75
|
## Installation
|
|
76
76
|
|
|
@@ -87,8 +87,9 @@ After installing `arize-phoenix` in your Jupyter or Colab environment, open your
|
|
|
87
87
|
```python
|
|
88
88
|
import phoenix as px
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
px.launch_app(
|
|
90
|
+
datasets = px.load_datasets("sentiment_classification_language_drift")
|
|
91
|
+
session = px.launch_app(datasets.primary, datasets.reference)
|
|
92
|
+
session.view()
|
|
92
93
|
```
|
|
93
94
|
|
|
94
95
|
Next, visualize your embeddings and inspect problematic clusters of your production data.
|
|
@@ -96,6 +97,7 @@ Next, visualize your embeddings and inspect problematic clusters of your product
|
|
|
96
97
|
TODO(#297): Include GIF where we navigate to embeddings, zoom in and rotate, and select a cluster.
|
|
97
98
|
|
|
98
99
|
Don't forget to close the app when you're done.
|
|
100
|
+
|
|
99
101
|
```
|
|
100
102
|
px.close_app()
|
|
101
103
|
```
|
|
@@ -109,21 +111,23 @@ For in-depth examples and explanations, read the [docs](https://docs.arize.com/p
|
|
|
109
111
|
## Community
|
|
110
112
|
|
|
111
113
|
Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
|
|
112
|
-
|
|
113
|
-
-
|
|
114
|
-
-
|
|
115
|
-
-
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
-
|
|
119
|
-
-
|
|
120
|
-
-
|
|
121
|
-
-
|
|
114
|
+
|
|
115
|
+
- 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
|
|
116
|
+
- 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
|
|
117
|
+
- 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
|
|
118
|
+
- 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
|
|
119
|
+
- 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
|
|
120
|
+
- 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
|
|
121
|
+
- ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
|
|
122
|
+
- ✉️ Subscribe to our mailing list. TODO(#294): Add link
|
|
123
|
+
- 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
|
|
124
|
+
- 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
|
|
122
125
|
|
|
123
126
|
## Contributing
|
|
124
127
|
|
|
125
|
-
-
|
|
126
|
-
-
|
|
128
|
+
- 💻 Read our [developer's guide](./DEVELOPMENT.md).
|
|
129
|
+
- 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the _#phoenix-devs_ channel.
|
|
127
130
|
|
|
128
131
|
## License
|
|
132
|
+
|
|
129
133
|
Arize-Phoenix is licensed under the [Elastic License 2.0 (ELv2)](./LICENSE).
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
phoenix/__about__.py,sha256=
|
|
2
|
-
phoenix/__init__.py,sha256=
|
|
1
|
+
phoenix/__about__.py,sha256=Rcreqov76fDT2KTP7xGIbEQJbsc9Ci3LJLC7G-moRXA,25
|
|
2
|
+
phoenix/__init__.py,sha256=DmsdM2c7lcyD2nFPzG0VBqr6SjwCh0PMkAj2dupbtGw,142
|
|
3
3
|
phoenix/config.py,sha256=6QOq4xK3anOC1hZloymFfWzsts7SNFAJhtvmZVJem1k,1326
|
|
4
4
|
phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
5
5
|
phoenix/services.py,sha256=O53yauOjcc4ov_ihyHarsJoP9dity49JMc_Db3vX2nw,3091
|
|
@@ -7,55 +7,54 @@ phoenix/core/__init__.py,sha256=qleckHhpSoKy0hDR2X7zVYrV91WLFM_wOxS7_5Ar4i4,263
|
|
|
7
7
|
phoenix/core/dimension.py,sha256=xBtQyQdZXr_hyU1e-bAGlyVinqjTB_2kVsmDdHZzv48,727
|
|
8
8
|
phoenix/core/dimension_data_type.py,sha256=FEBp4p06LlFpAXq-ftxDAFymBtU_pYTFmJjFc6P3BPk,111
|
|
9
9
|
phoenix/core/dimension_type.py,sha256=EKFmPzuGr1cn7t4vD-XDk9s836j6U3iRbSu3Z2sO8sM,171
|
|
10
|
-
phoenix/core/embedding_dimension.py,sha256=
|
|
10
|
+
phoenix/core/embedding_dimension.py,sha256=qYQMfOpDEfUfbOhi9MB1x5OnaYfEgBPE8bC6XKhHirQ,1017
|
|
11
11
|
phoenix/core/model.py,sha256=nbyNFpqHp9GTpa4FHwmmt93df6XbKNJBDD__07Cypmc,7397
|
|
12
|
-
phoenix/datasets/__init__.py,sha256=
|
|
13
|
-
phoenix/datasets/dataset.py,sha256=
|
|
12
|
+
phoenix/datasets/__init__.py,sha256=QEfV-u0qR9MZe5BTcxossAvvvQGjgCb_IVW5MANLtH0,188
|
|
13
|
+
phoenix/datasets/dataset.py,sha256=Nz-bfpp-uYLztx3k0HMzjwDCygpISTkQ16se9ab0qfA,20020
|
|
14
14
|
phoenix/datasets/errors.py,sha256=8Z3jzNzFajki0dVFbDdiKB8EIqVC56csD6wKOguqh_c,7524
|
|
15
15
|
phoenix/datasets/event.py,sha256=YtXb0PGKgorEHwVaoR8tQVE5TjXm1M1FmbbJO913Uno,266
|
|
16
|
+
phoenix/datasets/fixtures.py,sha256=vXyqUZRjLqEsq-Nhgl37bpIy3y1GN2LTYXY-3Nz8mmQ,9636
|
|
16
17
|
phoenix/datasets/schema.py,sha256=y7811ReNAPag8ZAJzAVLA_gC4_j-M0NQCaCvttrfA-c,3041
|
|
17
|
-
phoenix/datasets/validation.py,sha256=
|
|
18
|
+
phoenix/datasets/validation.py,sha256=jxjg5osBkCGhr0sPMV6CejL3TqdUPsxD5vgBCgysncs,6483
|
|
18
19
|
phoenix/metrics/README.md,sha256=5gekqTU-5gGdMwvcfNp2Wlu8p1ul9kGY_jq0XXQusoI,1964
|
|
19
20
|
phoenix/metrics/__init__.py,sha256=VxTJtaatJZBd1k0OGSOkvt2oKbtHmtD5e_qSq6Pt0TU,348
|
|
20
21
|
phoenix/metrics/cardinality.py,sha256=WpZ4P0b3ZX2AQRNC_QZLgHCtl_IV7twv9Tg2VfmT358,799
|
|
21
22
|
phoenix/metrics/embeddings.py,sha256=E_vyZu3fwyyh1Cnt23jDB7hJUMk-kj9WMnERi5Xy0Vc,370
|
|
22
23
|
phoenix/metrics/median.py,sha256=M-d00yh-JVodi7QC-smA6jbAgjbWajYHlOEBKAWk0IY,119
|
|
23
|
-
phoenix/metrics/metrics.py,sha256=
|
|
24
|
-
phoenix/metrics/mixins.py,sha256=
|
|
24
|
+
phoenix/metrics/metrics.py,sha256=VjK0FqcrcT1qUdOvCSCVTES9b4xoNaUabo8Vc2tGeVM,3269
|
|
25
|
+
phoenix/metrics/mixins.py,sha256=YtwtREljfXO2xY4d-ihusPqFkguiRYSnuS3CKfPFRFg,2767
|
|
25
26
|
phoenix/metrics/percent_empty.py,sha256=0pRA-_nFqGgYfTnxe_uIZX9RQV-O1ADzh6KQZIbsXnk,465
|
|
26
|
-
phoenix/metrics/timeseries.py,sha256=
|
|
27
|
+
phoenix/metrics/timeseries.py,sha256=mb7yYsns3ojq4QlqdTIJMDKXR6K6HHCbe4aVGlf68mY,5282
|
|
27
28
|
phoenix/pointcloud/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
28
|
-
phoenix/pointcloud/clustering.py,sha256=
|
|
29
|
-
phoenix/pointcloud/pointcloud.py,sha256=
|
|
29
|
+
phoenix/pointcloud/clustering.py,sha256=q_r4Mmgq4Ntvk7XdvdtllPeEXWwFkn05OMlT7NLI-Bs,777
|
|
30
|
+
phoenix/pointcloud/pointcloud.py,sha256=als0aitTA__4PrSqBk6qPPKe8fIG-ZSnlVRVkfMorBU,2290
|
|
30
31
|
phoenix/pointcloud/projectors.py,sha256=ekZvKYmb0BibaISytzmUgcDwrfW5Fk-kB52HEtnx7jo,557
|
|
31
32
|
phoenix/server/__init__.py,sha256=jzUH8jjxFZJ59UympBQWpHXqWtF0kE5G7eBsc59y-9s,28
|
|
32
33
|
phoenix/server/app.py,sha256=TqEbNgyb-bjxADUldJG2Unjs-wN-EdbsaBdighqlUT4,3434
|
|
33
|
-
phoenix/server/
|
|
34
|
-
phoenix/server/main.py,sha256=VpMidqR_jj-ghCKJklmFkaYnLPrfdcN8NfvnTOoimns,2542
|
|
34
|
+
phoenix/server/main.py,sha256=tIcPCx_WUHosNLpk3ecA2FaxyCoNV1Nx718eCQxhYr4,2535
|
|
35
35
|
phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
phoenix/server/api/context.py,sha256=PqhhhDdbxFIAaI57dS1y_C-XwXTbkQ6Ny9Ui0fJ_Eo0,399
|
|
37
|
+
phoenix/server/api/interceptor.py,sha256=7LBUtCGHJVLKz8VXE-GPy--_toQ4kI3nXSdPGyNuWMY,872
|
|
37
38
|
phoenix/server/api/loaders.py,sha256=wTtp4Bcv5AjdSF32HnHNfnFWrn67Zp9Cu-hitu-ZDIc,2107
|
|
38
39
|
phoenix/server/api/schema.py,sha256=Pk8nhEFAbhTOD8VSdreVZQsroaU5lWkAKNdtcsA7VGA,1037
|
|
39
40
|
phoenix/server/api/input_types/DimensionInput.py,sha256=Vfx5FmiMKey4-EHDQsQRPzSAMRJMN5oVMLDUl4NKAa8,164
|
|
40
|
-
phoenix/server/api/input_types/Granularity.py,sha256=
|
|
41
|
+
phoenix/server/api/input_types/Granularity.py,sha256=zpVCc49t8wWV34AlMqntZ_3oxpBYngRt1oBxvG6bOqE,2281
|
|
41
42
|
phoenix/server/api/input_types/TimeRange.py,sha256=8GhSVyFC3byuvpcOG2lhC5ZKXgXW0g_UtaVdDPAfxwk,334
|
|
42
43
|
phoenix/server/api/input_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
44
|
phoenix/server/api/types/DataQualityMetric.py,sha256=eq2W8TutOdoeWcRWRUSTNBKR9eP0lfZUb1tlqYJ6flw,243
|
|
44
|
-
phoenix/server/api/types/DataQualityTimeSeries.py,sha256=vT_lxtxeOVwq-v8hKctCPfSCW5I635Z1ctuAOMMU5ds,897
|
|
45
45
|
phoenix/server/api/types/Dataset.py,sha256=AhtsvHB-6PtjdsRO43ZvLjtfkTxoKbKa8Z1tJyY5jKA,5710
|
|
46
|
-
phoenix/server/api/types/Dimension.py,sha256
|
|
46
|
+
phoenix/server/api/types/Dimension.py,sha256=B39ESy5BWNkDh2R0G5iMy8Kt0g7cd27fe9FnruKWdd4,3255
|
|
47
47
|
phoenix/server/api/types/DimensionDataType.py,sha256=TwnepdoO-0kknxHXyO4G8YHZKDCCrCjEYXPYLyfPPww,147
|
|
48
48
|
phoenix/server/api/types/DimensionType.py,sha256=sn_c-NsH04ZJbXAGlNURxgCNCFxvmDuZG7P8z1_rJn0,179
|
|
49
49
|
phoenix/server/api/types/DimensionWithValue.py,sha256=fq975pbIBzPwW4dXF0f-s-FoqkLqoVirapZdymxyxYA,266
|
|
50
50
|
phoenix/server/api/types/DriftMetric.py,sha256=xkJVWmwXNzaeAb7t-phbs7eIpUDT0QNZtJe6l3RBFa0,129
|
|
51
|
-
phoenix/server/api/types/
|
|
52
|
-
phoenix/server/api/types/EmbeddingDimension.py,sha256=fKvuag3Gm-SP22kof3TCc9qNBcub3xaojmkOkCWbKPw,15982
|
|
51
|
+
phoenix/server/api/types/EmbeddingDimension.py,sha256=391VLgmtz6_9oP8wqruvUwhmEDRKOS5A4XCIdnPZwXQ,12268
|
|
53
52
|
phoenix/server/api/types/EmbeddingMetadata.py,sha256=_bsYv1GPBBPl39ffbDaTHcOcGWI_zY7IAbUZQqTTxsc,226
|
|
54
53
|
phoenix/server/api/types/Event.py,sha256=Uq-RlzaAzgqPQ7pFYF1qXBnlIvcQ4R0wIxkMHK61dD0,264
|
|
55
54
|
phoenix/server/api/types/EventMetadata.py,sha256=9L6D6twmDvNJ0C09euPrZF6ZE3nTkn2WmiunxhjK6jQ,306
|
|
56
55
|
phoenix/server/api/types/Model.py,sha256=vlgBgp3XU4DJ1lbQc-2lDS7PNYVjyJhserctaSbqdp0,2858
|
|
57
|
-
phoenix/server/api/types/TimeSeries.py,sha256=
|
|
58
|
-
phoenix/server/api/types/UMAPPoints.py,sha256=
|
|
56
|
+
phoenix/server/api/types/TimeSeries.py,sha256=3OyJ-HQ1m_FDBkFFOBFcTt6nXb4KWyzl7F6OEbsOMds,4364
|
|
57
|
+
phoenix/server/api/types/UMAPPoints.py,sha256=dPemcJ_afOsAneOI_aRnqgM9pGZYMOghaP5KsGqDWvE,3142
|
|
59
58
|
phoenix/server/api/types/__init__.py,sha256=77AN3W0O7WVSxPUQEgASD-I2nkyoRcUvOTNxcRs66gU,332
|
|
60
59
|
phoenix/server/api/types/node.py,sha256=b7WzOizw9RbidVaspMrEGe43wrCcwDmg6JrhM65styE,3687
|
|
61
60
|
phoenix/server/api/types/pagination.py,sha256=pP0xyv1BCMCEzLTP7jDq7HAKFY0hPHUWr1KqSs8QZ7U,5229
|
|
@@ -70,12 +69,12 @@ phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZV
|
|
|
70
69
|
phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
|
|
71
70
|
phoenix/server/static/index.css,sha256=jeV8eWPiHUcUmb_0yp_rI1odc-RnxlXVgMT-x9HGWbo,1817
|
|
72
71
|
phoenix/server/static/index.html,sha256=GxcHJSEWqjPiXM5ogPiAvZSiXBerEx-rVUYbtZEW184,661
|
|
73
|
-
phoenix/server/static/index.js,sha256=
|
|
72
|
+
phoenix/server/static/index.js,sha256=vUOQV0qU836SP3inue2ekJddI5hcSLIBRGLWM_wAaLQ,2119681
|
|
74
73
|
phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
-
phoenix/session/session.py,sha256=
|
|
74
|
+
phoenix/session/session.py,sha256=9IftFku8qhpZfXuk55mA5Pp-fCwFTfg5DRKa0-c8Jo4,4848
|
|
76
75
|
phoenix/utils/__init__.py,sha256=alIDGBnxWH4JvP-UW-7N99seBBi0r1GV1h8f1ERFBec,21
|
|
77
76
|
phoenix/utils/utils.py,sha256=hZK3a_nLFYiQb1O6EcMF3mVmhTjnfuJ5WMcjhvOu7zk,427
|
|
78
|
-
arize_phoenix-0.0.
|
|
79
|
-
arize_phoenix-0.0.
|
|
80
|
-
arize_phoenix-0.0.
|
|
81
|
-
arize_phoenix-0.0.
|
|
77
|
+
arize_phoenix-0.0.2rc5.dist-info/METADATA,sha256=XO3GJezeR3QkhIUcZ2KkcDKC_0he7CmAn6jDuHUJ_GM,5275
|
|
78
|
+
arize_phoenix-0.0.2rc5.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
|
|
79
|
+
arize_phoenix-0.0.2rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
|
|
80
|
+
arize_phoenix-0.0.2rc5.dist-info/RECORD,,
|
phoenix/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.2rc5"
|
phoenix/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from .datasets import Dataset, EmbeddingColumnNames, Schema
|
|
2
|
-
from .session.session import close_app, launch_app
|
|
1
|
+
from .datasets import Dataset, EmbeddingColumnNames, Schema, load_datasets
|
|
2
|
+
from .session.session import active_session, close_app, launch_app
|
|
@@ -1,6 +1,39 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from typing import Set
|
|
3
|
+
|
|
4
|
+
from phoenix.datasets.dataset import DatasetType
|
|
5
|
+
from phoenix.datasets.event import EventId
|
|
2
6
|
|
|
3
7
|
|
|
4
8
|
@dataclass
|
|
5
9
|
class EmbeddingDimension:
|
|
6
10
|
name: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def calculate_drift_ratio(events: Set[EventId]) -> float:
|
|
14
|
+
"""
|
|
15
|
+
Calculates the drift score of the cluster. The score will be a value
|
|
16
|
+
representing the balance of points between the primary and the reference
|
|
17
|
+
datasets, and will be on a scale between 1 (all primary) and -1 (all
|
|
18
|
+
reference), with 0 being an even balance between the two datasets.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
drift_ratio : float
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
if not events:
|
|
26
|
+
return float("nan")
|
|
27
|
+
|
|
28
|
+
primary_point_count = 0
|
|
29
|
+
reference_point_count = 0
|
|
30
|
+
|
|
31
|
+
for event in events:
|
|
32
|
+
if event.dataset_id == DatasetType.PRIMARY:
|
|
33
|
+
primary_point_count += 1
|
|
34
|
+
else:
|
|
35
|
+
reference_point_count += 1
|
|
36
|
+
|
|
37
|
+
return (primary_point_count - reference_point_count) / (
|
|
38
|
+
primary_point_count + reference_point_count
|
|
39
|
+
)
|
phoenix/datasets/__init__.py
CHANGED
phoenix/datasets/dataset.py
CHANGED
|
@@ -4,7 +4,7 @@ import sys
|
|
|
4
4
|
import uuid
|
|
5
5
|
from copy import deepcopy
|
|
6
6
|
from dataclasses import fields, replace
|
|
7
|
-
from datetime import datetime
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from functools import cached_property
|
|
10
10
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
|
|
@@ -37,7 +37,28 @@ if hasattr(sys, "ps1"):
|
|
|
37
37
|
|
|
38
38
|
class Dataset:
|
|
39
39
|
"""
|
|
40
|
-
A dataset
|
|
40
|
+
A dataset to use for analysis using phoenix.
|
|
41
|
+
Used to construct a phoenix session via px.launch_app
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
dataframe : pandas.DataFrame
|
|
46
|
+
The pandas dataframe containing the data to analyze
|
|
47
|
+
schema : phoenix.Schema
|
|
48
|
+
the schema of the dataset. Maps dataframe columns to the appropriate
|
|
49
|
+
model inference dimensions (features, predictions, actuals).
|
|
50
|
+
name : str, optional
|
|
51
|
+
The name of the dataset. If not provided, a random name will be generated.
|
|
52
|
+
Is helpful for identifying the dataset in the application.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
dataset : Session
|
|
57
|
+
The session object that can be used to view the application
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
>>> primary_dataset = px.Dataset(dataframe=production_dataframe, schema=schema, name="primary")
|
|
41
62
|
"""
|
|
42
63
|
|
|
43
64
|
_data_file_name: str = "data.parquet"
|
|
@@ -85,9 +106,15 @@ class Dataset:
|
|
|
85
106
|
|
|
86
107
|
@cached_property
|
|
87
108
|
def end_time(self) -> datetime:
|
|
88
|
-
"""
|
|
109
|
+
"""
|
|
110
|
+
Returns the datetime of the latest inference in the dataset.
|
|
111
|
+
end_datetime equals max(timestamp) + 1 microsecond, so that it can be
|
|
112
|
+
used as part of a right-open interval.
|
|
113
|
+
"""
|
|
89
114
|
timestamp_col_name: str = cast(str, self.schema.timestamp_column_name)
|
|
90
|
-
end_datetime: datetime = self.__dataframe[timestamp_col_name].max()
|
|
115
|
+
end_datetime: datetime = self.__dataframe[timestamp_col_name].max() + timedelta(
|
|
116
|
+
microseconds=1,
|
|
117
|
+
) # adding a microsecond, so it can be used as part of a right open interval
|
|
91
118
|
return end_datetime
|
|
92
119
|
|
|
93
120
|
@property
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from dataclasses import dataclass, replace
|
|
4
|
-
from typing import Tuple
|
|
4
|
+
from typing import Dict, Tuple
|
|
5
5
|
|
|
6
6
|
from pandas import read_parquet
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from .dataset import Dataset
|
|
9
|
+
from .schema import EmbeddingColumnNames, Schema
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -189,23 +190,24 @@ FIXTURES: Tuple[Fixture, ...] = (
|
|
|
189
190
|
NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
|
|
190
191
|
|
|
191
192
|
|
|
192
|
-
def download_fixture_if_missing(fixture_name: str) ->
|
|
193
|
+
def download_fixture_if_missing(fixture_name: str) -> Tuple[Dataset, Dataset]:
|
|
193
194
|
"""
|
|
194
195
|
Downloads primary and reference datasets for a fixture if they are not found
|
|
195
196
|
locally.
|
|
196
197
|
"""
|
|
197
198
|
fixture = _get_fixture_by_name(fixture_name=fixture_name)
|
|
198
199
|
primary_dataset_name, reference_dataset_name = get_dataset_names_from_fixture_name(fixture_name)
|
|
199
|
-
_download_and_persist_dataset_if_missing(
|
|
200
|
+
primary_dataset = _download_and_persist_dataset_if_missing(
|
|
200
201
|
dataset_name=primary_dataset_name,
|
|
201
202
|
dataset_url=fixture.primary_dataset_url,
|
|
202
203
|
schema=fixture.primary_schema,
|
|
203
204
|
)
|
|
204
|
-
_download_and_persist_dataset_if_missing(
|
|
205
|
+
reference_dataset = _download_and_persist_dataset_if_missing(
|
|
205
206
|
dataset_name=reference_dataset_name,
|
|
206
207
|
dataset_url=fixture.reference_dataset_url,
|
|
207
208
|
schema=fixture.reference_schema,
|
|
208
209
|
)
|
|
210
|
+
return primary_dataset, reference_dataset
|
|
209
211
|
|
|
210
212
|
|
|
211
213
|
def get_dataset_names_from_fixture_name(fixture_name: str) -> Tuple[str, str]:
|
|
@@ -223,27 +225,62 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
|
|
|
223
225
|
if the input fixture name does not match any known fixture names.
|
|
224
226
|
"""
|
|
225
227
|
if fixture_name not in NAME_TO_FIXTURE:
|
|
226
|
-
|
|
228
|
+
valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
|
|
229
|
+
raise ValueError(f'"{fixture_name}" is invalid. Valid names are: {valid_fixture_names}')
|
|
227
230
|
return NAME_TO_FIXTURE[fixture_name]
|
|
228
231
|
|
|
229
232
|
|
|
230
233
|
def _download_and_persist_dataset_if_missing(
|
|
231
234
|
dataset_name: str, dataset_url: str, schema: Schema
|
|
232
|
-
) ->
|
|
235
|
+
) -> Dataset:
|
|
233
236
|
"""
|
|
234
237
|
Downloads a dataset from the given URL if it is not found locally.
|
|
235
238
|
"""
|
|
236
239
|
try:
|
|
237
|
-
Dataset.from_name(dataset_name)
|
|
238
|
-
return
|
|
240
|
+
return Dataset.from_name(dataset_name)
|
|
239
241
|
except FileNotFoundError:
|
|
240
242
|
pass
|
|
241
243
|
|
|
242
244
|
logger.info(f'Downloading dataset: "{dataset_name}"')
|
|
243
|
-
Dataset(
|
|
245
|
+
dataset = Dataset(
|
|
244
246
|
dataframe=read_parquet(dataset_url),
|
|
245
247
|
schema=schema,
|
|
246
248
|
name=dataset_name,
|
|
247
249
|
persist_to_disc=True,
|
|
248
250
|
)
|
|
249
251
|
logger.info("Download complete.")
|
|
252
|
+
return dataset
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dataclass(frozen=True)
|
|
256
|
+
class DatasetDict(Dict[str, Dataset]):
|
|
257
|
+
"""A dictionary of datasets, split out by dataset type (primary, reference)."""
|
|
258
|
+
|
|
259
|
+
primary: Dataset
|
|
260
|
+
reference: Dataset
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def load_datasets(use_case: str) -> DatasetDict:
|
|
264
|
+
"""
|
|
265
|
+
Loads the primary and reference datasets for a given use-case.
|
|
266
|
+
|
|
267
|
+
Parameters
|
|
268
|
+
----------
|
|
269
|
+
use_case: str
|
|
270
|
+
Name of the phoenix supported use case
|
|
271
|
+
Valid values include:
|
|
272
|
+
- "sentiment_classification_language_drift"
|
|
273
|
+
- "fashion_mnist"
|
|
274
|
+
- "ner_token_drift"
|
|
275
|
+
- "credit_card_fraud"
|
|
276
|
+
- "click_through_rate"
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
_______
|
|
281
|
+
datasets: DatasetDict
|
|
282
|
+
A dictionary of datasets, split out by dataset type (primary, reference).
|
|
283
|
+
|
|
284
|
+
"""
|
|
285
|
+
primary_dataset, reference_dataset = download_fixture_if_missing(use_case)
|
|
286
|
+
return DatasetDict(primary=primary_dataset, reference=reference_dataset)
|
phoenix/datasets/validation.py
CHANGED
|
@@ -69,7 +69,7 @@ def _check_valid_embedding_data(dataframe: DataFrame, schema: Schema) -> List[er
|
|
|
69
69
|
embedding_errors.append(
|
|
70
70
|
err.InvalidEmbeddingVectorDataType(
|
|
71
71
|
embedding_feature_name=embedding_name,
|
|
72
|
-
vector_column_type=str(type(
|
|
72
|
+
vector_column_type=str(type(vector)),
|
|
73
73
|
)
|
|
74
74
|
)
|
|
75
75
|
break
|
phoenix/metrics/metrics.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
+
import math
|
|
1
2
|
import warnings
|
|
3
|
+
from functools import cached_property
|
|
2
4
|
from typing import Union, cast
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
import numpy.typing as npt
|
|
6
8
|
import pandas as pd
|
|
7
9
|
import sklearn # type: ignore
|
|
10
|
+
from scipy.spatial.distance import euclidean # type: ignore
|
|
8
11
|
|
|
9
12
|
from .mixins import (
|
|
10
13
|
BaseMetric,
|
|
14
|
+
DriftOperator,
|
|
11
15
|
EvaluationMetric,
|
|
12
16
|
OptionalUnaryOperator,
|
|
13
17
|
UnaryOperator,
|
|
@@ -18,7 +22,7 @@ from .mixins import (
|
|
|
18
22
|
|
|
19
23
|
class Count(OptionalUnaryOperator, ZeroInitialValue, BaseMetric):
|
|
20
24
|
def calc(self, df: pd.DataFrame) -> int:
|
|
21
|
-
return df.loc[:, self.operand].count() if self.operand else
|
|
25
|
+
return df.loc[:, self.operand].count() if self.operand else df.size
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class Sum(UnaryOperator, BaseMetric):
|
|
@@ -29,7 +33,7 @@ class Sum(UnaryOperator, BaseMetric):
|
|
|
29
33
|
class VectorSum(UnaryOperator, VectorOperator, ZeroInitialValue, BaseMetric):
|
|
30
34
|
def calc(self, df: pd.DataFrame) -> Union[float, npt.NDArray[np.float64]]:
|
|
31
35
|
return np.sum( # type: ignore
|
|
32
|
-
df.loc[:, self.operand].to_numpy(),
|
|
36
|
+
df.loc[:, self.operand].dropna().to_numpy(),
|
|
33
37
|
initial=self.initial_value(),
|
|
34
38
|
)
|
|
35
39
|
|
|
@@ -45,9 +49,7 @@ class VectorMean(UnaryOperator, VectorOperator, BaseMetric):
|
|
|
45
49
|
warnings.simplefilter("ignore", category=RuntimeWarning)
|
|
46
50
|
return cast(
|
|
47
51
|
Union[float, npt.NDArray[np.float64]],
|
|
48
|
-
np.mean(
|
|
49
|
-
df.loc[:, self.operand].to_numpy(),
|
|
50
|
-
),
|
|
52
|
+
np.mean(df.loc[:, self.operand].dropna()),
|
|
51
53
|
)
|
|
52
54
|
|
|
53
55
|
|
|
@@ -80,3 +82,25 @@ class AccuracyScore(EvaluationMetric):
|
|
|
80
82
|
return cast(
|
|
81
83
|
float, sklearn.metrics.accuracy_score(df.loc[:, self.actual], df.loc[:, self.predicted])
|
|
82
84
|
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class EuclideanDistance(DriftOperator, VectorOperator):
|
|
88
|
+
@cached_property
|
|
89
|
+
def ref_value(self) -> Union[float, npt.NDArray[np.float64]]:
|
|
90
|
+
if self.reference_data is None or self.reference_data.empty:
|
|
91
|
+
return float("nan")
|
|
92
|
+
return cast(
|
|
93
|
+
Union[float, npt.NDArray[np.float64]],
|
|
94
|
+
np.mean(self.reference_data.loc[:, self.operand].dropna()),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def calc(self, df: pd.DataFrame) -> float:
|
|
98
|
+
if df.empty or (isinstance(self.ref_value, float) and not math.isfinite(self.ref_value)):
|
|
99
|
+
return float("nan")
|
|
100
|
+
return cast(
|
|
101
|
+
float,
|
|
102
|
+
euclidean(
|
|
103
|
+
np.mean(df.loc[:, self.operand].dropna()),
|
|
104
|
+
self.ref_value,
|
|
105
|
+
),
|
|
106
|
+
)
|
phoenix/metrics/mixins.py
CHANGED
|
@@ -4,7 +4,7 @@ BaseMetric. Other mixins provide specialized functionalities. Mixins rely
|
|
|
4
4
|
on cooperative multiple inheritance and method resolution order in Python.
|
|
5
5
|
"""
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Any, Mapping, Optional, Tuple
|
|
7
|
+
from typing import Any, Mapping, Optional, Tuple
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
@@ -21,9 +21,9 @@ class ZeroInitialValue(ABC):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class VectorOperator(ABC):
|
|
24
|
-
shape:
|
|
24
|
+
shape: int
|
|
25
25
|
|
|
26
|
-
def __init__(self, shape:
|
|
26
|
+
def __init__(self, shape: int = 0, **kwargs: Any):
|
|
27
27
|
self.shape = shape
|
|
28
28
|
super().__init__(**kwargs)
|
|
29
29
|
|
|
@@ -89,3 +89,11 @@ class EvaluationMetric(BaseMetric, ABC):
|
|
|
89
89
|
|
|
90
90
|
def input_columns(self) -> Tuple[ColumnName, ...]:
|
|
91
91
|
return (self.predicted, self.actual)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class DriftOperator(UnaryOperator, BaseMetric, ABC):
|
|
95
|
+
reference_data: Optional[pd.DataFrame]
|
|
96
|
+
|
|
97
|
+
def __init__(self, reference_data: Optional[pd.DataFrame] = None, **kwargs: Any):
|
|
98
|
+
self.reference_data = reference_data
|
|
99
|
+
super().__init__(**kwargs)
|
phoenix/metrics/timeseries.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime, timedelta
|
|
2
2
|
from functools import partial
|
|
3
3
|
from itertools import accumulate, chain, repeat, takewhile
|
|
4
|
-
from typing import Any, Callable, Generator, Iterable, List, Tuple,
|
|
4
|
+
from typing import Any, Callable, Generator, Iterable, List, Tuple, cast
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from typing_extensions import TypeAlias
|
|
@@ -66,20 +66,20 @@ def _aggregator(
|
|
|
66
66
|
Calls groupby on the dataframe and apply metric calculations on each group.
|
|
67
67
|
"""
|
|
68
68
|
calcs: Tuple[Metric, ...] = tuple(metrics)
|
|
69
|
-
columns:
|
|
69
|
+
columns: List[int] = list(
|
|
70
70
|
set(
|
|
71
71
|
dataframe.columns.get_loc(column_name)
|
|
72
72
|
for calc in calcs
|
|
73
73
|
for column_name in calc.input_columns()
|
|
74
74
|
),
|
|
75
|
-
)
|
|
75
|
+
)
|
|
76
76
|
return pd.concat(
|
|
77
77
|
chain(
|
|
78
78
|
(pd.DataFrame(),),
|
|
79
79
|
(
|
|
80
80
|
dataframe.iloc[
|
|
81
81
|
slice(*row_interval_from_sorted_time_index(dataframe.index, start, end)),
|
|
82
|
-
columns,
|
|
82
|
+
columns or [0], # need at least one, so take the first one
|
|
83
83
|
]
|
|
84
84
|
.groupby(group, group_keys=True)
|
|
85
85
|
.apply(partial(_calculate, calcs=calcs))
|
|
@@ -105,16 +105,20 @@ def _groupers(
|
|
|
105
105
|
"""
|
|
106
106
|
Yields pandas.Groupers from time series parameters.
|
|
107
107
|
"""
|
|
108
|
+
if not sampling_interval:
|
|
109
|
+
return
|
|
108
110
|
divisible = evaluation_window % sampling_interval == timedelta()
|
|
109
|
-
max_offset =
|
|
111
|
+
max_offset = end_time - start_time
|
|
112
|
+
if divisible and evaluation_window < max_offset:
|
|
113
|
+
max_offset = evaluation_window
|
|
110
114
|
yield from (
|
|
111
115
|
(
|
|
112
|
-
start_time if divisible else
|
|
116
|
+
(start_time if divisible else end_time - offset) - evaluation_window,
|
|
113
117
|
end_time - offset,
|
|
114
118
|
pd.Grouper( # type: ignore # mypy finds the wrong Grouper
|
|
115
119
|
freq=evaluation_window,
|
|
116
120
|
origin=end_time,
|
|
117
|
-
offset
|
|
121
|
+
offset=-offset,
|
|
118
122
|
# Each point in timeseries will be labeled by the end instant of
|
|
119
123
|
# its evaluation window.
|
|
120
124
|
label="right",
|
phoenix/pointcloud/clustering.py
CHANGED
|
@@ -7,7 +7,7 @@ from hdbscan import HDBSCAN
|
|
|
7
7
|
from typing_extensions import TypeAlias
|
|
8
8
|
|
|
9
9
|
RowIndex: TypeAlias = int
|
|
10
|
-
|
|
10
|
+
RawCluster: TypeAlias = Set[RowIndex]
|
|
11
11
|
Matrix: TypeAlias = npt.NDArray[np.float64]
|
|
12
12
|
|
|
13
13
|
|
|
@@ -16,9 +16,9 @@ class Hdbscan:
|
|
|
16
16
|
min_cluster_size: int = 20
|
|
17
17
|
min_samples: float = 1
|
|
18
18
|
|
|
19
|
-
def find_clusters(self, mat: Matrix) -> List[
|
|
19
|
+
def find_clusters(self, mat: Matrix) -> List[RawCluster]:
|
|
20
20
|
cluster_ids: npt.NDArray[np.int_] = HDBSCAN(**asdict(self)).fit_predict(mat)
|
|
21
|
-
ans: List[
|
|
21
|
+
ans: List[RawCluster] = [set() for _ in range(np.max(cluster_ids) + 1)]
|
|
22
22
|
for row_idx, cluster_id in enumerate(cluster_ids):
|
|
23
23
|
if cluster_id > -1:
|
|
24
24
|
ans[cluster_id].add(row_idx)
|
phoenix/pointcloud/pointcloud.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Dict, Hashable, List, Mapping, Protocol,
|
|
2
|
+
from typing import Dict, Hashable, List, Mapping, Protocol, Tuple, TypeVar
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import numpy.typing as npt
|
|
6
6
|
from typing_extensions import TypeAlias
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from phoenix.pointcloud.clustering import RawCluster
|
|
9
|
+
|
|
9
10
|
Vector: TypeAlias = npt.NDArray[np.float64]
|
|
10
11
|
Matrix: TypeAlias = npt.NDArray[np.float64]
|
|
11
|
-
ClusterId: TypeAlias = int
|
|
12
12
|
RowIndex: TypeAlias = int
|
|
13
|
-
|
|
13
|
+
Identifier = TypeVar("Identifier", bound=Hashable)
|
|
14
|
+
ClusterId: TypeAlias = int
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class DimensionalityReducer(Protocol):
|
|
@@ -19,7 +20,7 @@ class DimensionalityReducer(Protocol):
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class ClustersFinder(Protocol):
|
|
22
|
-
def find_clusters(self, mat: Matrix) -> List[
|
|
23
|
+
def find_clusters(self, mat: Matrix) -> List[RawCluster]:
|
|
23
24
|
...
|
|
24
25
|
|
|
25
26
|
|
|
@@ -48,15 +49,16 @@ class PointCloud:
|
|
|
48
49
|
Returns
|
|
49
50
|
-------
|
|
50
51
|
projections : dictionary
|
|
51
|
-
Projected vectors in the low
|
|
52
|
+
Projected vectors in the low dimensional space, mapped back to the
|
|
52
53
|
input vectors' identifiers.
|
|
53
54
|
|
|
54
|
-
cluster_membership:
|
|
55
|
+
cluster_membership: dictionary
|
|
55
56
|
Cluster membership by way of cluster_ids in the form of integers
|
|
56
57
|
0,1,2,... mapped back to the input vectors' identifiers. Note that
|
|
57
58
|
some vectors may not belong to any cluster and are excluded here.
|
|
58
59
|
|
|
59
60
|
"""
|
|
61
|
+
|
|
60
62
|
if not data:
|
|
61
63
|
return {}, {}
|
|
62
64
|
identifiers, vectors = zip(*data.items())
|