PyPI - arize-phoenix - Versions diffs - 0.0.2rc3__py3-none-any.whl → 0.0.2rc5__py3-none-any.whl - Mend

arize-phoenix 0.0.2rc3py3-none-any.whl → 0.0.2rc5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (27) hide show

{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/METADATA +25 -21
{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/RECORD +25 -26
phoenix/__about__.py +1 -1
phoenix/__init__.py +2 -2
phoenix/core/embedding_dimension.py +33 -0
phoenix/datasets/__init__.py +2 -1
phoenix/datasets/dataset.py +31 -4
phoenix/{server → datasets}/fixtures.py +47 -10
phoenix/datasets/validation.py +1 -1
phoenix/metrics/metrics.py +29 -5
phoenix/metrics/mixins.py +11 -3
phoenix/metrics/timeseries.py +11 -7
phoenix/pointcloud/clustering.py +3 -3
phoenix/pointcloud/pointcloud.py +9 -7
phoenix/server/api/input_types/Granularity.py +2 -0
phoenix/server/api/interceptor.py +28 -0
phoenix/server/api/types/Dimension.py +23 -33
phoenix/server/api/types/EmbeddingDimension.py +39 -111
phoenix/server/api/types/TimeSeries.py +117 -3
phoenix/server/api/types/UMAPPoints.py +62 -14
phoenix/server/main.py +3 -3
phoenix/server/static/index.js +720 -634
phoenix/session/session.py +48 -6
phoenix/server/api/types/DataQualityTimeSeries.py +0 -36
phoenix/server/api/types/DriftTimeSeries.py +0 -10
{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/WHEEL +0 -0
{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/licenses/LICENSE +0 -0

{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arize-phoenix
-Version: 0.0.2rc3
+Version: 0.0.2rc5
 Summary: ML Observability in your notebook
 Project-URL: Documentation, https://github.com/Arize-ai/phoenix#readme
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -65,12 +65,12 @@ Phoenix provides MLOps insights at lightning speed with zero-config observabilit
 **_Phoenix is under active development. APIs may change at any time._**
-- [Installation](#installation)
-- [Getting Started](#getting-started)
-- [Documentation](#documentation)
-- [Community](#community)
-- [Contributing](#contributing)
-- [License](#license)
+-   [Installation](#installation)
+-   [Getting Started](#getting-started)
+-   [Documentation](#documentation)
+-   [Community](#community)
+-   [Contributing](#contributing)
+-   [License](#license)
 ## Installation
@@ -87,8 +87,9 @@ After installing `arize-phoenix` in your Jupyter or Colab environment, open your
 ```python
 import phoenix as px
-train_ds, prod_ds = px.load_dataset("sentiment_classification_language_drift")
-px.launch_app(train_ds, prod_ds)
+datasets = px.load_datasets("sentiment_classification_language_drift")
+session = px.launch_app(datasets.primary, datasets.reference)
+session.view()
 ```
 Next, visualize your embeddings and inspect problematic clusters of your production data.
@@ -96,6 +97,7 @@ Next, visualize your embeddings and inspect problematic clusters of your product
 TODO(#297): Include GIF where we navigate to embeddings, zoom in and rotate, and select a cluster.
 Don't forget to close the app when you're done.
 ```
 px.close_app()
 ```
@@ -109,21 +111,23 @@ For in-depth examples and explanations, read the [docs](https://docs.arize.com/p
 ## Community
 Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
-- 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
-- 💡 Ask questions and provide feedback in the *#phoenix-support* channel.
-- 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
-- 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
-- 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
-- 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
-- ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
-- ✉️ Subscribe to our mailing list. TODO(#294): Add link
-- 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
-- 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
+-   🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
+-   💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
+-   🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
+-   🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
+-   🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
+-   🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
+-   ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
+-   ✉️ Subscribe to our mailing list. TODO(#294): Add link
+-   🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
+-   👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
 ## Contributing
-- 💻 Read our [developer's guide](./DEVELOPMENT.md).
-- 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the *#phoenix-devs* channel.
+-   💻 Read our [developer's guide](./DEVELOPMENT.md).
+-   🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the _#phoenix-devs_ channel.
 ## License
 Arize-Phoenix is licensed under the [Elastic License 2.0 (ELv2)](./LICENSE).

{arize_phoenix-0.0.2rc3.dist-info → arize_phoenix-0.0.2rc5.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-phoenix/__about__.py,sha256=t_VuTRZc8mSmKgvF5Cg6y7s1LcUdbuFBxC9Xis1nJbc,25
-phoenix/__init__.py,sha256=kCljvlVCuFP3DDO4L-te0ei0PlhrORfnbHjISKw61ZE,111
+phoenix/__about__.py,sha256=Rcreqov76fDT2KTP7xGIbEQJbsc9Ci3LJLC7G-moRXA,25
+phoenix/__init__.py,sha256=DmsdM2c7lcyD2nFPzG0VBqr6SjwCh0PMkAj2dupbtGw,142
 phoenix/config.py,sha256=6QOq4xK3anOC1hZloymFfWzsts7SNFAJhtvmZVJem1k,1326
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=O53yauOjcc4ov_ihyHarsJoP9dity49JMc_Db3vX2nw,3091
@@ -7,55 +7,54 @@ phoenix/core/__init__.py,sha256=qleckHhpSoKy0hDR2X7zVYrV91WLFM_wOxS7_5Ar4i4,263
 phoenix/core/dimension.py,sha256=xBtQyQdZXr_hyU1e-bAGlyVinqjTB_2kVsmDdHZzv48,727
 phoenix/core/dimension_data_type.py,sha256=FEBp4p06LlFpAXq-ftxDAFymBtU_pYTFmJjFc6P3BPk,111
 phoenix/core/dimension_type.py,sha256=EKFmPzuGr1cn7t4vD-XDk9s836j6U3iRbSu3Z2sO8sM,171
-phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
+phoenix/core/embedding_dimension.py,sha256=qYQMfOpDEfUfbOhi9MB1x5OnaYfEgBPE8bC6XKhHirQ,1017
 phoenix/core/model.py,sha256=nbyNFpqHp9GTpa4FHwmmt93df6XbKNJBDD__07Cypmc,7397
-phoenix/datasets/__init__.py,sha256=C34ouS52lAnAJtrsLRbHFpT-pZWGsIO_RZc6zrC4sjg,135
-phoenix/datasets/dataset.py,sha256=AW-Og97LR8Vdw3CVVi4dWx24dyXezcG9TwEyhfKQdm4,19013
+phoenix/datasets/__init__.py,sha256=QEfV-u0qR9MZe5BTcxossAvvvQGjgCb_IVW5MANLtH0,188
+phoenix/datasets/dataset.py,sha256=Nz-bfpp-uYLztx3k0HMzjwDCygpISTkQ16se9ab0qfA,20020
 phoenix/datasets/errors.py,sha256=8Z3jzNzFajki0dVFbDdiKB8EIqVC56csD6wKOguqh_c,7524
 phoenix/datasets/event.py,sha256=YtXb0PGKgorEHwVaoR8tQVE5TjXm1M1FmbbJO913Uno,266
+phoenix/datasets/fixtures.py,sha256=vXyqUZRjLqEsq-Nhgl37bpIy3y1GN2LTYXY-3Nz8mmQ,9636
 phoenix/datasets/schema.py,sha256=y7811ReNAPag8ZAJzAVLA_gC4_j-M0NQCaCvttrfA-c,3041
-phoenix/datasets/validation.py,sha256=OTnr3BVKGUr0dH-3j5oDlFt2jnoa8UF1vCZnYBjtTIk,6490
+phoenix/datasets/validation.py,sha256=jxjg5osBkCGhr0sPMV6CejL3TqdUPsxD5vgBCgysncs,6483
 phoenix/metrics/README.md,sha256=5gekqTU-5gGdMwvcfNp2Wlu8p1ul9kGY_jq0XXQusoI,1964
 phoenix/metrics/__init__.py,sha256=VxTJtaatJZBd1k0OGSOkvt2oKbtHmtD5e_qSq6Pt0TU,348
 phoenix/metrics/cardinality.py,sha256=WpZ4P0b3ZX2AQRNC_QZLgHCtl_IV7twv9Tg2VfmT358,799
 phoenix/metrics/embeddings.py,sha256=E_vyZu3fwyyh1Cnt23jDB7hJUMk-kj9WMnERi5Xy0Vc,370
 phoenix/metrics/median.py,sha256=M-d00yh-JVodi7QC-smA6jbAgjbWajYHlOEBKAWk0IY,119
-phoenix/metrics/metrics.py,sha256=hG1PxFQUoOLQ6dJWM-Gh7ZSYtJ9kzIJpSAo2G8qJCkk,2414
-phoenix/metrics/mixins.py,sha256=OPDNNTW3n2EvUCS-j0fpgMHuxWFCQlxRrL4kdu008S8,2553
+phoenix/metrics/metrics.py,sha256=VjK0FqcrcT1qUdOvCSCVTES9b4xoNaUabo8Vc2tGeVM,3269
+phoenix/metrics/mixins.py,sha256=YtwtREljfXO2xY4d-ihusPqFkguiRYSnuS3CKfPFRFg,2767
 phoenix/metrics/percent_empty.py,sha256=0pRA-_nFqGgYfTnxe_uIZX9RQV-O1ADzh6KQZIbsXnk,465
-phoenix/metrics/timeseries.py,sha256=QGKIHLtX4LSQWUaMcEReLOyiMyLuDQDmPAaesxGiYww,5180
+phoenix/metrics/timeseries.py,sha256=mb7yYsns3ojq4QlqdTIJMDKXR6K6HHCbe4aVGlf68mY,5282
 phoenix/pointcloud/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-phoenix/pointcloud/clustering.py,sha256=NaE7rs031hQ5Hk9wuXgL4cs6g02NFxnUFxpq_aN0b9s,768
-phoenix/pointcloud/pointcloud.py,sha256=_hV6PQ0D7uP32eTkxnwOK1pE7cDtw8dhnwQqKtcD-7U,2269
+phoenix/pointcloud/clustering.py,sha256=q_r4Mmgq4Ntvk7XdvdtllPeEXWwFkn05OMlT7NLI-Bs,777
+phoenix/pointcloud/pointcloud.py,sha256=als0aitTA__4PrSqBk6qPPKe8fIG-ZSnlVRVkfMorBU,2290
 phoenix/pointcloud/projectors.py,sha256=ekZvKYmb0BibaISytzmUgcDwrfW5Fk-kB52HEtnx7jo,557
 phoenix/server/__init__.py,sha256=jzUH8jjxFZJ59UympBQWpHXqWtF0kE5G7eBsc59y-9s,28
 phoenix/server/app.py,sha256=TqEbNgyb-bjxADUldJG2Unjs-wN-EdbsaBdighqlUT4,3434
-phoenix/server/fixtures.py,sha256=-fXc6vYwFx0SpbtqY75v5DXw4cI51M-vkzzb9Fennxw,8434
-phoenix/server/main.py,sha256=VpMidqR_jj-ghCKJklmFkaYnLPrfdcN8NfvnTOoimns,2542
+phoenix/server/main.py,sha256=tIcPCx_WUHosNLpk3ecA2FaxyCoNV1Nx718eCQxhYr4,2535
 phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/api/context.py,sha256=PqhhhDdbxFIAaI57dS1y_C-XwXTbkQ6Ny9Ui0fJ_Eo0,399
+phoenix/server/api/interceptor.py,sha256=7LBUtCGHJVLKz8VXE-GPy--_toQ4kI3nXSdPGyNuWMY,872
 phoenix/server/api/loaders.py,sha256=wTtp4Bcv5AjdSF32HnHNfnFWrn67Zp9Cu-hitu-ZDIc,2107
 phoenix/server/api/schema.py,sha256=Pk8nhEFAbhTOD8VSdreVZQsroaU5lWkAKNdtcsA7VGA,1037
 phoenix/server/api/input_types/DimensionInput.py,sha256=Vfx5FmiMKey4-EHDQsQRPzSAMRJMN5oVMLDUl4NKAa8,164
-phoenix/server/api/input_types/Granularity.py,sha256=ve6AtPYmo4lq9Znq9_c-1K4L5rh1wT45xooPiGaGRrk,2216
+phoenix/server/api/input_types/Granularity.py,sha256=zpVCc49t8wWV34AlMqntZ_3oxpBYngRt1oBxvG6bOqE,2281
 phoenix/server/api/input_types/TimeRange.py,sha256=8GhSVyFC3byuvpcOG2lhC5ZKXgXW0g_UtaVdDPAfxwk,334
 phoenix/server/api/input_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/api/types/DataQualityMetric.py,sha256=eq2W8TutOdoeWcRWRUSTNBKR9eP0lfZUb1tlqYJ6flw,243
-phoenix/server/api/types/DataQualityTimeSeries.py,sha256=vT_lxtxeOVwq-v8hKctCPfSCW5I635Z1ctuAOMMU5ds,897
 phoenix/server/api/types/Dataset.py,sha256=AhtsvHB-6PtjdsRO43ZvLjtfkTxoKbKa8Z1tJyY5jKA,5710
-phoenix/server/api/types/Dimension.py,sha256=-LTEqPCVmzy5nmSC_YvyDKQEkGDHT2m188jaToy74w0,4205
+phoenix/server/api/types/Dimension.py,sha256=B39ESy5BWNkDh2R0G5iMy8Kt0g7cd27fe9FnruKWdd4,3255
 phoenix/server/api/types/DimensionDataType.py,sha256=TwnepdoO-0kknxHXyO4G8YHZKDCCrCjEYXPYLyfPPww,147
 phoenix/server/api/types/DimensionType.py,sha256=sn_c-NsH04ZJbXAGlNURxgCNCFxvmDuZG7P8z1_rJn0,179
 phoenix/server/api/types/DimensionWithValue.py,sha256=fq975pbIBzPwW4dXF0f-s-FoqkLqoVirapZdymxyxYA,266
 phoenix/server/api/types/DriftMetric.py,sha256=xkJVWmwXNzaeAb7t-phbs7eIpUDT0QNZtJe6l3RBFa0,129
-phoenix/server/api/types/DriftTimeSeries.py,sha256=OqX6BInHMus0ToMlIJT79u_s-Aw2XQX8GcDQ-yuB9cw,158
-phoenix/server/api/types/EmbeddingDimension.py,sha256=fKvuag3Gm-SP22kof3TCc9qNBcub3xaojmkOkCWbKPw,15982
+phoenix/server/api/types/EmbeddingDimension.py,sha256=391VLgmtz6_9oP8wqruvUwhmEDRKOS5A4XCIdnPZwXQ,12268
 phoenix/server/api/types/EmbeddingMetadata.py,sha256=_bsYv1GPBBPl39ffbDaTHcOcGWI_zY7IAbUZQqTTxsc,226
 phoenix/server/api/types/Event.py,sha256=Uq-RlzaAzgqPQ7pFYF1qXBnlIvcQ4R0wIxkMHK61dD0,264
 phoenix/server/api/types/EventMetadata.py,sha256=9L6D6twmDvNJ0C09euPrZF6ZE3nTkn2WmiunxhjK6jQ,306
 phoenix/server/api/types/Model.py,sha256=vlgBgp3XU4DJ1lbQc-2lDS7PNYVjyJhserctaSbqdp0,2858
-phoenix/server/api/types/TimeSeries.py,sha256=4BSes4jkK49rl3yHzhbkqzWJaZhk8FDctzAeIgunXA4,591
-phoenix/server/api/types/UMAPPoints.py,sha256=UNbHQ3fP2mqIXh3MW6eB8q6G-91jf3t1Rh2gqNumNeU,1555
+phoenix/server/api/types/TimeSeries.py,sha256=3OyJ-HQ1m_FDBkFFOBFcTt6nXb4KWyzl7F6OEbsOMds,4364
+phoenix/server/api/types/UMAPPoints.py,sha256=dPemcJ_afOsAneOI_aRnqgM9pGZYMOghaP5KsGqDWvE,3142
 phoenix/server/api/types/__init__.py,sha256=77AN3W0O7WVSxPUQEgASD-I2nkyoRcUvOTNxcRs66gU,332
 phoenix/server/api/types/node.py,sha256=b7WzOizw9RbidVaspMrEGe43wrCcwDmg6JrhM65styE,3687
 phoenix/server/api/types/pagination.py,sha256=pP0xyv1BCMCEzLTP7jDq7HAKFY0hPHUWr1KqSs8QZ7U,5229
@@ -70,12 +69,12 @@ phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZV
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=jeV8eWPiHUcUmb_0yp_rI1odc-RnxlXVgMT-x9HGWbo,1817
 phoenix/server/static/index.html,sha256=GxcHJSEWqjPiXM5ogPiAvZSiXBerEx-rVUYbtZEW184,661
-phoenix/server/static/index.js,sha256=wqFEiiGxxsb4ws2d9CvZr584k_L91QA9cOqvCcdGTAE,2100737
+phoenix/server/static/index.js,sha256=vUOQV0qU836SP3inue2ekJddI5hcSLIBRGLWM_wAaLQ,2119681
 phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/session/session.py,sha256=2Fq0CFGsv8XiY83pnO9Xz057JkI7SNWczp0mi-gf-zQ,3423
+phoenix/session/session.py,sha256=9IftFku8qhpZfXuk55mA5Pp-fCwFTfg5DRKa0-c8Jo4,4848
 phoenix/utils/__init__.py,sha256=alIDGBnxWH4JvP-UW-7N99seBBi0r1GV1h8f1ERFBec,21
 phoenix/utils/utils.py,sha256=hZK3a_nLFYiQb1O6EcMF3mVmhTjnfuJ5WMcjhvOu7zk,427
-arize_phoenix-0.0.2rc3.dist-info/METADATA,sha256=02TyHimms_kaLnfNsZQeoJoRPp_Od4b18JgbooTS0WE,5200
-arize_phoenix-0.0.2rc3.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
-arize_phoenix-0.0.2rc3.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-0.0.2rc3.dist-info/RECORD,,
+arize_phoenix-0.0.2rc5.dist-info/METADATA,sha256=XO3GJezeR3QkhIUcZ2KkcDKC_0he7CmAn6jDuHUJ_GM,5275
+arize_phoenix-0.0.2rc5.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
+arize_phoenix-0.0.2rc5.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-0.0.2rc5.dist-info/RECORD,,

phoenix/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.~~2rc3~~"
1	+ __version__ = "0.0.2rc5"

phoenix/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
-from .datasets import Dataset, EmbeddingColumnNames, Schema
-from .session.session import close_app, launch_app
+from .datasets import Dataset, EmbeddingColumnNames, Schema, load_datasets
+from .session.session import active_session, close_app, launch_app

phoenix/core/embedding_dimension.py CHANGED Viewed

@@ -1,6 +1,39 @@
 from dataclasses import dataclass
+from typing import Set
+from phoenix.datasets.dataset import DatasetType
+from phoenix.datasets.event import EventId
 @dataclass
 class EmbeddingDimension:
     name: str
+def calculate_drift_ratio(events: Set[EventId]) -> float:
+    """
+    Calculates the drift score of the cluster. The score will be a value
+    representing the balance of points between the primary and the reference
+    datasets, and will be on a scale between 1 (all primary) and -1 (all
+    reference), with 0 being an even balance between the two datasets.
+    Returns
+    -------
+    drift_ratio : float
+    """
+    if not events:
+        return float("nan")
+    primary_point_count = 0
+    reference_point_count = 0
+    for event in events:
+        if event.dataset_id == DatasetType.PRIMARY:
+            primary_point_count += 1
+        else:
+            reference_point_count += 1
+    return (primary_point_count - reference_point_count) / (
+        primary_point_count + reference_point_count
+    )

phoenix/datasets/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from .dataset import Dataset
+from .fixtures import load_datasets
 from .schema import EmbeddingColumnNames, Schema
-__all__ = ["Dataset", "Schema", "EmbeddingColumnNames"]
+__all__ = ["Dataset", "Schema", "EmbeddingColumnNames", "load_datasets"]

phoenix/datasets/dataset.py CHANGED Viewed

@@ -4,7 +4,7 @@ import sys
 import uuid
 from copy import deepcopy
 from dataclasses import fields, replace
-from datetime import datetime
+from datetime import datetime, timedelta
 from enum import Enum
 from functools import cached_property
 from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
@@ -37,7 +37,28 @@ if hasattr(sys, "ps1"):
 class Dataset:
     """
-    A dataset represents data for a set of inferences. It is represented as a dataframe + schema
+    A dataset to use for analysis using phoenix.
+    Used to construct a phoenix session via px.launch_app
+    Parameters
+    ----------
+    dataframe : pandas.DataFrame
+        The pandas dataframe containing the data to analyze
+    schema : phoenix.Schema
+        the schema of the dataset. Maps dataframe columns to the appropriate
+        model inference dimensions (features, predictions, actuals).
+    name : str, optional
+        The name of the dataset. If not provided, a random name will be generated.
+        Is helpful for identifying the dataset in the application.
+    Returns
+    -------
+    dataset : Session
+        The session object that can be used to view the application
+    Examples
+    --------
+    >>> primary_dataset = px.Dataset(dataframe=production_dataframe, schema=schema, name="primary")
     """
     _data_file_name: str = "data.parquet"
@@ -85,9 +106,15 @@ class Dataset:
     @cached_property
     def end_time(self) -> datetime:
-        """Returns the datetime of the latest inference in the dataset"""
+        """
+        Returns the datetime of the latest inference in the dataset.
+        end_datetime equals max(timestamp) + 1 microsecond, so that it can be
+        used as part of a right-open interval.
+        """
         timestamp_col_name: str = cast(str, self.schema.timestamp_column_name)
-        end_datetime: datetime = self.__dataframe[timestamp_col_name].max()
+        end_datetime: datetime = self.__dataframe[timestamp_col_name].max() + timedelta(
+            microseconds=1,
+        )  # adding a microsecond, so it can be used as part of a right open interval
         return end_datetime
     @property

phoenix/{server → datasets}/fixtures.py RENAMED Viewed

@@ -1,11 +1,12 @@
 import logging
 import os
 from dataclasses import dataclass, replace
-from typing import Tuple
+from typing import Dict, Tuple
 from pandas import read_parquet
-from phoenix.datasets import Dataset, EmbeddingColumnNames, Schema
+from .dataset import Dataset
+from .schema import EmbeddingColumnNames, Schema
 logger = logging.getLogger(__name__)
@@ -189,23 +190,24 @@ FIXTURES: Tuple[Fixture, ...] = (
 NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
-def download_fixture_if_missing(fixture_name: str) -> None:
+def download_fixture_if_missing(fixture_name: str) -> Tuple[Dataset, Dataset]:
     """
     Downloads primary and reference datasets for a fixture if they are not found
     locally.
     """
     fixture = _get_fixture_by_name(fixture_name=fixture_name)
     primary_dataset_name, reference_dataset_name = get_dataset_names_from_fixture_name(fixture_name)
-    _download_and_persist_dataset_if_missing(
+    primary_dataset = _download_and_persist_dataset_if_missing(
         dataset_name=primary_dataset_name,
         dataset_url=fixture.primary_dataset_url,
         schema=fixture.primary_schema,
     )
-    _download_and_persist_dataset_if_missing(
+    reference_dataset = _download_and_persist_dataset_if_missing(
         dataset_name=reference_dataset_name,
         dataset_url=fixture.reference_dataset_url,
         schema=fixture.reference_schema,
     )
+    return primary_dataset, reference_dataset
 def get_dataset_names_from_fixture_name(fixture_name: str) -> Tuple[str, str]:
@@ -223,27 +225,62 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
     if the input fixture name does not match any known fixture names.
     """
     if fixture_name not in NAME_TO_FIXTURE:
-        raise ValueError(f'"{fixture_name}" is not a valid fixture name.')
+        valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
+        raise ValueError(f'"{fixture_name}" is invalid. Valid names are: {valid_fixture_names}')
     return NAME_TO_FIXTURE[fixture_name]
 def _download_and_persist_dataset_if_missing(
     dataset_name: str, dataset_url: str, schema: Schema
-) -> None:
+) -> Dataset:
     """
     Downloads a dataset from the given URL if it is not found locally.
     """
     try:
-        Dataset.from_name(dataset_name)
-        return
+        return Dataset.from_name(dataset_name)
     except FileNotFoundError:
         pass
     logger.info(f'Downloading dataset: "{dataset_name}"')
-    Dataset(
+    dataset = Dataset(
         dataframe=read_parquet(dataset_url),
         schema=schema,
         name=dataset_name,
         persist_to_disc=True,
     )
     logger.info("Download complete.")
+    return dataset
+@dataclass(frozen=True)
+class DatasetDict(Dict[str, Dataset]):
+    """A dictionary of datasets, split out by dataset type (primary, reference)."""
+    primary: Dataset
+    reference: Dataset
+def load_datasets(use_case: str) -> DatasetDict:
+    """
+    Loads the primary and reference datasets for a given use-case.
+    Parameters
+    ----------
+        use_case: str
+            Name of the phoenix supported use case
+            Valid values include:
+                - "sentiment_classification_language_drift"
+                - "fashion_mnist"
+                - "ner_token_drift"
+                - "credit_card_fraud"
+                - "click_through_rate"
+    Returns
+    _______
+        datasets: DatasetDict
+            A dictionary of datasets, split out by dataset type (primary, reference).
+    """
+    primary_dataset, reference_dataset = download_fixture_if_missing(use_case)
+    return DatasetDict(primary=primary_dataset, reference=reference_dataset)

phoenix/datasets/validation.py CHANGED Viewed

@@ -69,7 +69,7 @@ def _check_valid_embedding_data(dataframe: DataFrame, schema: Schema) -> List[er
                 embedding_errors.append(
                     err.InvalidEmbeddingVectorDataType(
                         embedding_feature_name=embedding_name,
-                        vector_column_type=str(type(vector_column)),
+                        vector_column_type=str(type(vector)),
                     )
                 )
                 break

phoenix/metrics/metrics.py CHANGED Viewed

@@ -1,13 +1,17 @@
+import math
 import warnings
+from functools import cached_property
 from typing import Union, cast
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 import sklearn  # type: ignore
+from scipy.spatial.distance import euclidean  # type: ignore
 from .mixins import (
     BaseMetric,
+    DriftOperator,
     EvaluationMetric,
     OptionalUnaryOperator,
     UnaryOperator,
@@ -18,7 +22,7 @@ from .mixins import (
 class Count(OptionalUnaryOperator, ZeroInitialValue, BaseMetric):
     def calc(self, df: pd.DataFrame) -> int:
-        return df.loc[:, self.operand].count() if self.operand else len(df)
+        return df.loc[:, self.operand].count() if self.operand else df.size
 class Sum(UnaryOperator, BaseMetric):
@@ -29,7 +33,7 @@ class Sum(UnaryOperator, BaseMetric):
 class VectorSum(UnaryOperator, VectorOperator, ZeroInitialValue, BaseMetric):
     def calc(self, df: pd.DataFrame) -> Union[float, npt.NDArray[np.float64]]:
         return np.sum(  # type: ignore
-            df.loc[:, self.operand].to_numpy(),
+            df.loc[:, self.operand].dropna().to_numpy(),
             initial=self.initial_value(),
         )
@@ -45,9 +49,7 @@ class VectorMean(UnaryOperator, VectorOperator, BaseMetric):
             warnings.simplefilter("ignore", category=RuntimeWarning)
             return cast(
                 Union[float, npt.NDArray[np.float64]],
-                np.mean(
-                    df.loc[:, self.operand].to_numpy(),
-                ),
+                np.mean(df.loc[:, self.operand].dropna()),
             )
@@ -80,3 +82,25 @@ class AccuracyScore(EvaluationMetric):
         return cast(
             float, sklearn.metrics.accuracy_score(df.loc[:, self.actual], df.loc[:, self.predicted])
         )
+class EuclideanDistance(DriftOperator, VectorOperator):
+    @cached_property
+    def ref_value(self) -> Union[float, npt.NDArray[np.float64]]:
+        if self.reference_data is None or self.reference_data.empty:
+            return float("nan")
+        return cast(
+            Union[float, npt.NDArray[np.float64]],
+            np.mean(self.reference_data.loc[:, self.operand].dropna()),
+        )
+    def calc(self, df: pd.DataFrame) -> float:
+        if df.empty or (isinstance(self.ref_value, float) and not math.isfinite(self.ref_value)):
+            return float("nan")
+        return cast(
+            float,
+            euclidean(
+                np.mean(df.loc[:, self.operand].dropna()),
+                self.ref_value,
+            ),
+        )

phoenix/metrics/mixins.py CHANGED Viewed

@@ -4,7 +4,7 @@ BaseMetric. Other mixins provide specialized functionalities. Mixins rely
 on cooperative multiple inheritance and method resolution order in Python.
 """
 from abc import ABC, abstractmethod
-from typing import Any, Mapping, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple
 import numpy as np
 import pandas as pd
@@ -21,9 +21,9 @@ class ZeroInitialValue(ABC):
 class VectorOperator(ABC):
-    shape: Union[int, Tuple[int, ...]]
+    shape: int
-    def __init__(self, shape: Union[int, Tuple[int, ...]], **kwargs: Any):
+    def __init__(self, shape: int = 0, **kwargs: Any):
         self.shape = shape
         super().__init__(**kwargs)
@@ -89,3 +89,11 @@ class EvaluationMetric(BaseMetric, ABC):
     def input_columns(self) -> Tuple[ColumnName, ...]:
         return (self.predicted, self.actual)
+class DriftOperator(UnaryOperator, BaseMetric, ABC):
+    reference_data: Optional[pd.DataFrame]
+    def __init__(self, reference_data: Optional[pd.DataFrame] = None, **kwargs: Any):
+        self.reference_data = reference_data
+        super().__init__(**kwargs)

phoenix/metrics/timeseries.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from datetime import datetime, timedelta
 from functools import partial
 from itertools import accumulate, chain, repeat, takewhile
-from typing import Any, Callable, Generator, Iterable, List, Tuple, Union, cast
+from typing import Any, Callable, Generator, Iterable, List, Tuple, cast
 import pandas as pd
 from typing_extensions import TypeAlias
@@ -66,20 +66,20 @@ def _aggregator(
     Calls groupby on the dataframe and apply metric calculations on each group.
     """
     calcs: Tuple[Metric, ...] = tuple(metrics)
-    columns: Union[List[int], slice] = list(
+    columns: List[int] = list(
         set(
             dataframe.columns.get_loc(column_name)
             for calc in calcs
             for column_name in calc.input_columns()
         ),
-    ) or slice(None)
+    )
     return pd.concat(
         chain(
             (pd.DataFrame(),),
             (
                 dataframe.iloc[
                     slice(*row_interval_from_sorted_time_index(dataframe.index, start, end)),
-                    columns,
+                    columns or [0],  # need at least one, so take the first one
                 ]
                 .groupby(group, group_keys=True)
                 .apply(partial(_calculate, calcs=calcs))
@@ -105,16 +105,20 @@ def _groupers(
     """
     Yields pandas.Groupers from time series parameters.
     """
+    if not sampling_interval:
+        return
     divisible = evaluation_window % sampling_interval == timedelta()
-    max_offset = evaluation_window if divisible else end_time - start_time
+    max_offset = end_time - start_time
+    if divisible and evaluation_window < max_offset:
+        max_offset = evaluation_window
     yield from (
         (
-            start_time if divisible else max(start_time, end_time - offset - evaluation_window),
+            (start_time if divisible else end_time - offset) - evaluation_window,
             end_time - offset,
             pd.Grouper(  # type: ignore  # mypy finds the wrong Grouper
                 freq=evaluation_window,
                 origin=end_time,
-                offset=offset,
+                offset=-offset,
                 # Each point in timeseries will be labeled by the end instant of
                 # its evaluation window.
                 label="right",

phoenix/pointcloud/clustering.py CHANGED Viewed

@@ -7,7 +7,7 @@ from hdbscan import HDBSCAN
 from typing_extensions import TypeAlias
 RowIndex: TypeAlias = int
-Cluster: TypeAlias = Set[RowIndex]
+RawCluster: TypeAlias = Set[RowIndex]
 Matrix: TypeAlias = npt.NDArray[np.float64]
@@ -16,9 +16,9 @@ class Hdbscan:
     min_cluster_size: int = 20
     min_samples: float = 1
-    def find_clusters(self, mat: Matrix) -> List[Cluster]:
+    def find_clusters(self, mat: Matrix) -> List[RawCluster]:
         cluster_ids: npt.NDArray[np.int_] = HDBSCAN(**asdict(self)).fit_predict(mat)
-        ans: List[Cluster] = [set() for _ in range(np.max(cluster_ids) + 1)]
+        ans: List[RawCluster] = [set() for _ in range(np.max(cluster_ids) + 1)]
         for row_idx, cluster_id in enumerate(cluster_ids):
             if cluster_id > -1:
                 ans[cluster_id].add(row_idx)

phoenix/pointcloud/pointcloud.py CHANGED Viewed

@@ -1,16 +1,17 @@
 from dataclasses import dataclass
-from typing import Dict, Hashable, List, Mapping, Protocol, Set, Tuple, TypeVar
+from typing import Dict, Hashable, List, Mapping, Protocol, Tuple, TypeVar
 import numpy as np
 import numpy.typing as npt
 from typing_extensions import TypeAlias
-Identifier = TypeVar("Identifier", bound=Hashable)
+from phoenix.pointcloud.clustering import RawCluster
 Vector: TypeAlias = npt.NDArray[np.float64]
 Matrix: TypeAlias = npt.NDArray[np.float64]
-ClusterId: TypeAlias = int
 RowIndex: TypeAlias = int
-Cluster: TypeAlias = Set[RowIndex]
+Identifier = TypeVar("Identifier", bound=Hashable)
+ClusterId: TypeAlias = int
 class DimensionalityReducer(Protocol):
@@ -19,7 +20,7 @@ class DimensionalityReducer(Protocol):
 class ClustersFinder(Protocol):
-    def find_clusters(self, mat: Matrix) -> List[Cluster]:
+    def find_clusters(self, mat: Matrix) -> List[RawCluster]:
         ...
@@ -48,15 +49,16 @@ class PointCloud:
         Returns
         -------
         projections : dictionary
-            Projected vectors in the low demension space, mapped back to the
+            Projected vectors in the low dimensional space, mapped back to the
             input vectors' identifiers.
-        cluster_membership: dictinary
+        cluster_membership: dictionary
             Cluster membership by way of cluster_ids in the form of integers
             0,1,2,... mapped back to the input vectors' identifiers. Note that
             some vectors may not belong to any cluster and are excluded here.
         """
         if not data:
             return {}, {}
         identifiers, vectors = zip(*data.items())

arize-phoenix 0.0.2rc3__py3-none-any.whl → 0.0.2rc5__py3-none-any.whl

Potentially problematic release.

arize-phoenix 0.0.2rc3py3-none-any.whl → 0.0.2rc5py3-none-any.whl