arize-phoenix 0.0.2rc3__tar.gz → 0.0.2rc5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (90) hide show
  1. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/PKG-INFO +25 -21
  2. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/README.md +24 -20
  3. arize_phoenix-0.0.2rc5/src/phoenix/__about__.py +1 -0
  4. arize_phoenix-0.0.2rc5/src/phoenix/__init__.py +2 -0
  5. arize_phoenix-0.0.2rc5/src/phoenix/core/embedding_dimension.py +39 -0
  6. arize_phoenix-0.0.2rc5/src/phoenix/datasets/__init__.py +5 -0
  7. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/datasets/dataset.py +31 -4
  8. {arize_phoenix-0.0.2rc3/src/phoenix/server → arize_phoenix-0.0.2rc5/src/phoenix/datasets}/fixtures.py +47 -10
  9. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/datasets/validation.py +1 -1
  10. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/metrics.py +29 -5
  11. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/mixins.py +11 -3
  12. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/timeseries.py +11 -7
  13. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/pointcloud/clustering.py +3 -3
  14. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/pointcloud/pointcloud.py +9 -7
  15. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/input_types/Granularity.py +2 -0
  16. arize_phoenix-0.0.2rc5/src/phoenix/server/api/interceptor.py +28 -0
  17. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/Dimension.py +23 -33
  18. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/EmbeddingDimension.py +39 -111
  19. arize_phoenix-0.0.2rc5/src/phoenix/server/api/types/TimeSeries.py +141 -0
  20. arize_phoenix-0.0.2rc5/src/phoenix/server/api/types/UMAPPoints.py +117 -0
  21. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/main.py +3 -3
  22. arize_phoenix-0.0.2rc5/src/phoenix/server/static/index.js +4581 -0
  23. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/session/session.py +48 -6
  24. arize_phoenix-0.0.2rc3/src/phoenix/__about__.py +0 -1
  25. arize_phoenix-0.0.2rc3/src/phoenix/__init__.py +0 -2
  26. arize_phoenix-0.0.2rc3/src/phoenix/core/embedding_dimension.py +0 -6
  27. arize_phoenix-0.0.2rc3/src/phoenix/datasets/__init__.py +0 -4
  28. arize_phoenix-0.0.2rc3/src/phoenix/server/api/types/DataQualityTimeSeries.py +0 -36
  29. arize_phoenix-0.0.2rc3/src/phoenix/server/api/types/DriftTimeSeries.py +0 -10
  30. arize_phoenix-0.0.2rc3/src/phoenix/server/api/types/TimeSeries.py +0 -27
  31. arize_phoenix-0.0.2rc3/src/phoenix/server/api/types/UMAPPoints.py +0 -69
  32. arize_phoenix-0.0.2rc3/src/phoenix/server/static/index.js +0 -4495
  33. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/.gitignore +0 -0
  34. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/LICENSE +0 -0
  35. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/pyproject.toml +0 -0
  36. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/config.py +0 -0
  37. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/core/__init__.py +0 -0
  38. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/core/dimension.py +0 -0
  39. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/core/dimension_data_type.py +0 -0
  40. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/core/dimension_type.py +0 -0
  41. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/core/model.py +0 -0
  42. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/datasets/errors.py +0 -0
  43. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/datasets/event.py +0 -0
  44. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/datasets/schema.py +0 -0
  45. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/README.md +0 -0
  46. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/__init__.py +0 -0
  47. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/cardinality.py +0 -0
  48. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/embeddings.py +0 -0
  49. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/median.py +0 -0
  50. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/metrics/percent_empty.py +0 -0
  51. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/pointcloud/__init__.py +0 -0
  52. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/pointcloud/projectors.py +0 -0
  53. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/py.typed +0 -0
  54. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/__init__.py +0 -0
  55. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/__init__.py +0 -0
  56. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/context.py +0 -0
  57. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  58. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  59. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/input_types/__init__.py +0 -0
  60. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/loaders.py +0 -0
  61. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/schema.py +0 -0
  62. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  63. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/Dataset.py +0 -0
  64. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  65. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/DimensionType.py +0 -0
  66. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  67. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/DriftMetric.py +0 -0
  68. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  69. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/Event.py +0 -0
  70. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  71. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/Model.py +0 -0
  72. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/__init__.py +0 -0
  73. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/node.py +0 -0
  74. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/api/types/pagination.py +0 -0
  75. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/app.py +0 -0
  76. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  77. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  78. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  79. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  80. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  81. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  82. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  83. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  84. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/favicon.ico +0 -0
  85. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/index.css +0 -0
  86. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/server/static/index.html +0 -0
  87. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/services.py +0 -0
  88. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/session/__init__.py +0 -0
  89. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/utils/__init__.py +0 -0
  90. {arize_phoenix-0.0.2rc3 → arize_phoenix-0.0.2rc5}/src/phoenix/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 0.0.2rc3
3
+ Version: 0.0.2rc5
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://github.com/Arize-ai/phoenix#readme
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -65,12 +65,12 @@ Phoenix provides MLOps insights at lightning speed with zero-config observabilit
65
65
 
66
66
  **_Phoenix is under active development. APIs may change at any time._**
67
67
 
68
- - [Installation](#installation)
69
- - [Getting Started](#getting-started)
70
- - [Documentation](#documentation)
71
- - [Community](#community)
72
- - [Contributing](#contributing)
73
- - [License](#license)
68
+ - [Installation](#installation)
69
+ - [Getting Started](#getting-started)
70
+ - [Documentation](#documentation)
71
+ - [Community](#community)
72
+ - [Contributing](#contributing)
73
+ - [License](#license)
74
74
 
75
75
  ## Installation
76
76
 
@@ -87,8 +87,9 @@ After installing `arize-phoenix` in your Jupyter or Colab environment, open your
87
87
  ```python
88
88
  import phoenix as px
89
89
 
90
- train_ds, prod_ds = px.load_dataset("sentiment_classification_language_drift")
91
- px.launch_app(train_ds, prod_ds)
90
+ datasets = px.load_datasets("sentiment_classification_language_drift")
91
+ session = px.launch_app(datasets.primary, datasets.reference)
92
+ session.view()
92
93
  ```
93
94
 
94
95
  Next, visualize your embeddings and inspect problematic clusters of your production data.
@@ -96,6 +97,7 @@ Next, visualize your embeddings and inspect problematic clusters of your product
96
97
  TODO(#297): Include GIF where we navigate to embeddings, zoom in and rotate, and select a cluster.
97
98
 
98
99
  Don't forget to close the app when you're done.
100
+
99
101
  ```
100
102
  px.close_app()
101
103
  ```
@@ -109,21 +111,23 @@ For in-depth examples and explanations, read the [docs](https://docs.arize.com/p
109
111
  ## Community
110
112
 
111
113
  Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
112
- - 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
113
- - 💡 Ask questions and provide feedback in the *#phoenix-support* channel.
114
- - 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
115
- - 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
116
- - 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
117
- - 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
118
- - ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
119
- - ✉️ Subscribe to our mailing list. TODO(#294): Add link
120
- - 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
121
- - 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
114
+
115
+ - 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
116
+ - 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
117
+ - 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
118
+ - 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
119
+ - 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
120
+ - 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
121
+ - ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
122
+ - ✉️ Subscribe to our mailing list. TODO(#294): Add link
123
+ - 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
124
+ - 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
122
125
 
123
126
  ## Contributing
124
127
 
125
- - 💻 Read our [developer's guide](./DEVELOPMENT.md).
126
- - 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the *#phoenix-devs* channel.
128
+ - 💻 Read our [developer's guide](./DEVELOPMENT.md).
129
+ - 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the _#phoenix-devs_ channel.
127
130
 
128
131
  ## License
132
+
129
133
  Arize-Phoenix is licensed under the [Elastic License 2.0 (ELv2)](./LICENSE).
@@ -25,12 +25,12 @@ Phoenix provides MLOps insights at lightning speed with zero-config observabilit
25
25
 
26
26
  **_Phoenix is under active development. APIs may change at any time._**
27
27
 
28
- - [Installation](#installation)
29
- - [Getting Started](#getting-started)
30
- - [Documentation](#documentation)
31
- - [Community](#community)
32
- - [Contributing](#contributing)
33
- - [License](#license)
28
+ - [Installation](#installation)
29
+ - [Getting Started](#getting-started)
30
+ - [Documentation](#documentation)
31
+ - [Community](#community)
32
+ - [Contributing](#contributing)
33
+ - [License](#license)
34
34
 
35
35
  ## Installation
36
36
 
@@ -47,8 +47,9 @@ After installing `arize-phoenix` in your Jupyter or Colab environment, open your
47
47
  ```python
48
48
  import phoenix as px
49
49
 
50
- train_ds, prod_ds = px.load_dataset("sentiment_classification_language_drift")
51
- px.launch_app(train_ds, prod_ds)
50
+ datasets = px.load_datasets("sentiment_classification_language_drift")
51
+ session = px.launch_app(datasets.primary, datasets.reference)
52
+ session.view()
52
53
  ```
53
54
 
54
55
  Next, visualize your embeddings and inspect problematic clusters of your production data.
@@ -56,6 +57,7 @@ Next, visualize your embeddings and inspect problematic clusters of your product
56
57
  TODO(#297): Include GIF where we navigate to embeddings, zoom in and rotate, and select a cluster.
57
58
 
58
59
  Don't forget to close the app when you're done.
60
+
59
61
  ```
60
62
  px.close_app()
61
63
  ```
@@ -69,21 +71,23 @@ For in-depth examples and explanations, read the [docs](https://docs.arize.com/p
69
71
  ## Community
70
72
 
71
73
  Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
72
- - 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
73
- - 💡 Ask questions and provide feedback in the *#phoenix-support* channel.
74
- - 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
75
- - 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
76
- - 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
77
- - 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
78
- - ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
79
- - ✉️ Subscribe to our mailing list. TODO(#294): Add link
80
- - 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
81
- - 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
74
+
75
+ - 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
76
+ - 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
77
+ - 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
78
+ - 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
79
+ - 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
80
+ - 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
81
+ - ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
82
+ - ✉️ Subscribe to our mailing list. TODO(#294): Add link
83
+ - 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
84
+ - 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
82
85
 
83
86
  ## Contributing
84
87
 
85
- - 💻 Read our [developer's guide](./DEVELOPMENT.md).
86
- - 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the *#phoenix-devs* channel.
88
+ - 💻 Read our [developer's guide](./DEVELOPMENT.md).
89
+ - 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the _#phoenix-devs_ channel.
87
90
 
88
91
  ## License
92
+
89
93
  Arize-Phoenix is licensed under the [Elastic License 2.0 (ELv2)](./LICENSE).
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2rc5"
@@ -0,0 +1,2 @@
1
+ from .datasets import Dataset, EmbeddingColumnNames, Schema, load_datasets
2
+ from .session.session import active_session, close_app, launch_app
@@ -0,0 +1,39 @@
1
+ from dataclasses import dataclass
2
+ from typing import Set
3
+
4
+ from phoenix.datasets.dataset import DatasetType
5
+ from phoenix.datasets.event import EventId
6
+
7
+
8
+ @dataclass
9
+ class EmbeddingDimension:
10
+ name: str
11
+
12
+
13
+ def calculate_drift_ratio(events: Set[EventId]) -> float:
14
+ """
15
+ Calculates the drift score of the cluster. The score will be a value
16
+ representing the balance of points between the primary and the reference
17
+ datasets, and will be on a scale between 1 (all primary) and -1 (all
18
+ reference), with 0 being an even balance between the two datasets.
19
+
20
+ Returns
21
+ -------
22
+ drift_ratio : float
23
+
24
+ """
25
+ if not events:
26
+ return float("nan")
27
+
28
+ primary_point_count = 0
29
+ reference_point_count = 0
30
+
31
+ for event in events:
32
+ if event.dataset_id == DatasetType.PRIMARY:
33
+ primary_point_count += 1
34
+ else:
35
+ reference_point_count += 1
36
+
37
+ return (primary_point_count - reference_point_count) / (
38
+ primary_point_count + reference_point_count
39
+ )
@@ -0,0 +1,5 @@
1
+ from .dataset import Dataset
2
+ from .fixtures import load_datasets
3
+ from .schema import EmbeddingColumnNames, Schema
4
+
5
+ __all__ = ["Dataset", "Schema", "EmbeddingColumnNames", "load_datasets"]
@@ -4,7 +4,7 @@ import sys
4
4
  import uuid
5
5
  from copy import deepcopy
6
6
  from dataclasses import fields, replace
7
- from datetime import datetime
7
+ from datetime import datetime, timedelta
8
8
  from enum import Enum
9
9
  from functools import cached_property
10
10
  from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
@@ -37,7 +37,28 @@ if hasattr(sys, "ps1"):
37
37
 
38
38
  class Dataset:
39
39
  """
40
- A dataset represents data for a set of inferences. It is represented as a dataframe + schema
40
+ A dataset to use for analysis using phoenix.
41
+ Used to construct a phoenix session via px.launch_app
42
+
43
+ Parameters
44
+ ----------
45
+ dataframe : pandas.DataFrame
46
+ The pandas dataframe containing the data to analyze
47
+ schema : phoenix.Schema
48
+ the schema of the dataset. Maps dataframe columns to the appropriate
49
+ model inference dimensions (features, predictions, actuals).
50
+ name : str, optional
51
+ The name of the dataset. If not provided, a random name will be generated.
52
+ Is helpful for identifying the dataset in the application.
53
+
54
+ Returns
55
+ -------
56
+ dataset : Session
57
+ The session object that can be used to view the application
58
+
59
+ Examples
60
+ --------
61
+ >>> primary_dataset = px.Dataset(dataframe=production_dataframe, schema=schema, name="primary")
41
62
  """
42
63
 
43
64
  _data_file_name: str = "data.parquet"
@@ -85,9 +106,15 @@ class Dataset:
85
106
 
86
107
  @cached_property
87
108
  def end_time(self) -> datetime:
88
- """Returns the datetime of the latest inference in the dataset"""
109
+ """
110
+ Returns the datetime of the latest inference in the dataset.
111
+ end_datetime equals max(timestamp) + 1 microsecond, so that it can be
112
+ used as part of a right-open interval.
113
+ """
89
114
  timestamp_col_name: str = cast(str, self.schema.timestamp_column_name)
90
- end_datetime: datetime = self.__dataframe[timestamp_col_name].max()
115
+ end_datetime: datetime = self.__dataframe[timestamp_col_name].max() + timedelta(
116
+ microseconds=1,
117
+ ) # adding a microsecond, so it can be used as part of a right open interval
91
118
  return end_datetime
92
119
 
93
120
  @property
@@ -1,11 +1,12 @@
1
1
  import logging
2
2
  import os
3
3
  from dataclasses import dataclass, replace
4
- from typing import Tuple
4
+ from typing import Dict, Tuple
5
5
 
6
6
  from pandas import read_parquet
7
7
 
8
- from phoenix.datasets import Dataset, EmbeddingColumnNames, Schema
8
+ from .dataset import Dataset
9
+ from .schema import EmbeddingColumnNames, Schema
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -189,23 +190,24 @@ FIXTURES: Tuple[Fixture, ...] = (
189
190
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
190
191
 
191
192
 
192
- def download_fixture_if_missing(fixture_name: str) -> None:
193
+ def download_fixture_if_missing(fixture_name: str) -> Tuple[Dataset, Dataset]:
193
194
  """
194
195
  Downloads primary and reference datasets for a fixture if they are not found
195
196
  locally.
196
197
  """
197
198
  fixture = _get_fixture_by_name(fixture_name=fixture_name)
198
199
  primary_dataset_name, reference_dataset_name = get_dataset_names_from_fixture_name(fixture_name)
199
- _download_and_persist_dataset_if_missing(
200
+ primary_dataset = _download_and_persist_dataset_if_missing(
200
201
  dataset_name=primary_dataset_name,
201
202
  dataset_url=fixture.primary_dataset_url,
202
203
  schema=fixture.primary_schema,
203
204
  )
204
- _download_and_persist_dataset_if_missing(
205
+ reference_dataset = _download_and_persist_dataset_if_missing(
205
206
  dataset_name=reference_dataset_name,
206
207
  dataset_url=fixture.reference_dataset_url,
207
208
  schema=fixture.reference_schema,
208
209
  )
210
+ return primary_dataset, reference_dataset
209
211
 
210
212
 
211
213
  def get_dataset_names_from_fixture_name(fixture_name: str) -> Tuple[str, str]:
@@ -223,27 +225,62 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
223
225
  if the input fixture name does not match any known fixture names.
224
226
  """
225
227
  if fixture_name not in NAME_TO_FIXTURE:
226
- raise ValueError(f'"{fixture_name}" is not a valid fixture name.')
228
+ valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
229
+ raise ValueError(f'"{fixture_name}" is invalid. Valid names are: {valid_fixture_names}')
227
230
  return NAME_TO_FIXTURE[fixture_name]
228
231
 
229
232
 
230
233
  def _download_and_persist_dataset_if_missing(
231
234
  dataset_name: str, dataset_url: str, schema: Schema
232
- ) -> None:
235
+ ) -> Dataset:
233
236
  """
234
237
  Downloads a dataset from the given URL if it is not found locally.
235
238
  """
236
239
  try:
237
- Dataset.from_name(dataset_name)
238
- return
240
+ return Dataset.from_name(dataset_name)
239
241
  except FileNotFoundError:
240
242
  pass
241
243
 
242
244
  logger.info(f'Downloading dataset: "{dataset_name}"')
243
- Dataset(
245
+ dataset = Dataset(
244
246
  dataframe=read_parquet(dataset_url),
245
247
  schema=schema,
246
248
  name=dataset_name,
247
249
  persist_to_disc=True,
248
250
  )
249
251
  logger.info("Download complete.")
252
+ return dataset
253
+
254
+
255
+ @dataclass(frozen=True)
256
+ class DatasetDict(Dict[str, Dataset]):
257
+ """A dictionary of datasets, split out by dataset type (primary, reference)."""
258
+
259
+ primary: Dataset
260
+ reference: Dataset
261
+
262
+
263
+ def load_datasets(use_case: str) -> DatasetDict:
264
+ """
265
+ Loads the primary and reference datasets for a given use-case.
266
+
267
+ Parameters
268
+ ----------
269
+ use_case: str
270
+ Name of the phoenix supported use case
271
+ Valid values include:
272
+ - "sentiment_classification_language_drift"
273
+ - "fashion_mnist"
274
+ - "ner_token_drift"
275
+ - "credit_card_fraud"
276
+ - "click_through_rate"
277
+
278
+
279
+ Returns
280
+ _______
281
+ datasets: DatasetDict
282
+ A dictionary of datasets, split out by dataset type (primary, reference).
283
+
284
+ """
285
+ primary_dataset, reference_dataset = download_fixture_if_missing(use_case)
286
+ return DatasetDict(primary=primary_dataset, reference=reference_dataset)
@@ -69,7 +69,7 @@ def _check_valid_embedding_data(dataframe: DataFrame, schema: Schema) -> List[er
69
69
  embedding_errors.append(
70
70
  err.InvalidEmbeddingVectorDataType(
71
71
  embedding_feature_name=embedding_name,
72
- vector_column_type=str(type(vector_column)),
72
+ vector_column_type=str(type(vector)),
73
73
  )
74
74
  )
75
75
  break
@@ -1,13 +1,17 @@
1
+ import math
1
2
  import warnings
3
+ from functools import cached_property
2
4
  from typing import Union, cast
3
5
 
4
6
  import numpy as np
5
7
  import numpy.typing as npt
6
8
  import pandas as pd
7
9
  import sklearn # type: ignore
10
+ from scipy.spatial.distance import euclidean # type: ignore
8
11
 
9
12
  from .mixins import (
10
13
  BaseMetric,
14
+ DriftOperator,
11
15
  EvaluationMetric,
12
16
  OptionalUnaryOperator,
13
17
  UnaryOperator,
@@ -18,7 +22,7 @@ from .mixins import (
18
22
 
19
23
  class Count(OptionalUnaryOperator, ZeroInitialValue, BaseMetric):
20
24
  def calc(self, df: pd.DataFrame) -> int:
21
- return df.loc[:, self.operand].count() if self.operand else len(df)
25
+ return df.loc[:, self.operand].count() if self.operand else df.size
22
26
 
23
27
 
24
28
  class Sum(UnaryOperator, BaseMetric):
@@ -29,7 +33,7 @@ class Sum(UnaryOperator, BaseMetric):
29
33
  class VectorSum(UnaryOperator, VectorOperator, ZeroInitialValue, BaseMetric):
30
34
  def calc(self, df: pd.DataFrame) -> Union[float, npt.NDArray[np.float64]]:
31
35
  return np.sum( # type: ignore
32
- df.loc[:, self.operand].to_numpy(),
36
+ df.loc[:, self.operand].dropna().to_numpy(),
33
37
  initial=self.initial_value(),
34
38
  )
35
39
 
@@ -45,9 +49,7 @@ class VectorMean(UnaryOperator, VectorOperator, BaseMetric):
45
49
  warnings.simplefilter("ignore", category=RuntimeWarning)
46
50
  return cast(
47
51
  Union[float, npt.NDArray[np.float64]],
48
- np.mean(
49
- df.loc[:, self.operand].to_numpy(),
50
- ),
52
+ np.mean(df.loc[:, self.operand].dropna()),
51
53
  )
52
54
 
53
55
 
@@ -80,3 +82,25 @@ class AccuracyScore(EvaluationMetric):
80
82
  return cast(
81
83
  float, sklearn.metrics.accuracy_score(df.loc[:, self.actual], df.loc[:, self.predicted])
82
84
  )
85
+
86
+
87
+ class EuclideanDistance(DriftOperator, VectorOperator):
88
+ @cached_property
89
+ def ref_value(self) -> Union[float, npt.NDArray[np.float64]]:
90
+ if self.reference_data is None or self.reference_data.empty:
91
+ return float("nan")
92
+ return cast(
93
+ Union[float, npt.NDArray[np.float64]],
94
+ np.mean(self.reference_data.loc[:, self.operand].dropna()),
95
+ )
96
+
97
+ def calc(self, df: pd.DataFrame) -> float:
98
+ if df.empty or (isinstance(self.ref_value, float) and not math.isfinite(self.ref_value)):
99
+ return float("nan")
100
+ return cast(
101
+ float,
102
+ euclidean(
103
+ np.mean(df.loc[:, self.operand].dropna()),
104
+ self.ref_value,
105
+ ),
106
+ )
@@ -4,7 +4,7 @@ BaseMetric. Other mixins provide specialized functionalities. Mixins rely
4
4
  on cooperative multiple inheritance and method resolution order in Python.
5
5
  """
6
6
  from abc import ABC, abstractmethod
7
- from typing import Any, Mapping, Optional, Tuple, Union
7
+ from typing import Any, Mapping, Optional, Tuple
8
8
 
9
9
  import numpy as np
10
10
  import pandas as pd
@@ -21,9 +21,9 @@ class ZeroInitialValue(ABC):
21
21
 
22
22
 
23
23
  class VectorOperator(ABC):
24
- shape: Union[int, Tuple[int, ...]]
24
+ shape: int
25
25
 
26
- def __init__(self, shape: Union[int, Tuple[int, ...]], **kwargs: Any):
26
+ def __init__(self, shape: int = 0, **kwargs: Any):
27
27
  self.shape = shape
28
28
  super().__init__(**kwargs)
29
29
 
@@ -89,3 +89,11 @@ class EvaluationMetric(BaseMetric, ABC):
89
89
 
90
90
  def input_columns(self) -> Tuple[ColumnName, ...]:
91
91
  return (self.predicted, self.actual)
92
+
93
+
94
+ class DriftOperator(UnaryOperator, BaseMetric, ABC):
95
+ reference_data: Optional[pd.DataFrame]
96
+
97
+ def __init__(self, reference_data: Optional[pd.DataFrame] = None, **kwargs: Any):
98
+ self.reference_data = reference_data
99
+ super().__init__(**kwargs)
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime, timedelta
2
2
  from functools import partial
3
3
  from itertools import accumulate, chain, repeat, takewhile
4
- from typing import Any, Callable, Generator, Iterable, List, Tuple, Union, cast
4
+ from typing import Any, Callable, Generator, Iterable, List, Tuple, cast
5
5
 
6
6
  import pandas as pd
7
7
  from typing_extensions import TypeAlias
@@ -66,20 +66,20 @@ def _aggregator(
66
66
  Calls groupby on the dataframe and apply metric calculations on each group.
67
67
  """
68
68
  calcs: Tuple[Metric, ...] = tuple(metrics)
69
- columns: Union[List[int], slice] = list(
69
+ columns: List[int] = list(
70
70
  set(
71
71
  dataframe.columns.get_loc(column_name)
72
72
  for calc in calcs
73
73
  for column_name in calc.input_columns()
74
74
  ),
75
- ) or slice(None)
75
+ )
76
76
  return pd.concat(
77
77
  chain(
78
78
  (pd.DataFrame(),),
79
79
  (
80
80
  dataframe.iloc[
81
81
  slice(*row_interval_from_sorted_time_index(dataframe.index, start, end)),
82
- columns,
82
+ columns or [0], # need at least one, so take the first one
83
83
  ]
84
84
  .groupby(group, group_keys=True)
85
85
  .apply(partial(_calculate, calcs=calcs))
@@ -105,16 +105,20 @@ def _groupers(
105
105
  """
106
106
  Yields pandas.Groupers from time series parameters.
107
107
  """
108
+ if not sampling_interval:
109
+ return
108
110
  divisible = evaluation_window % sampling_interval == timedelta()
109
- max_offset = evaluation_window if divisible else end_time - start_time
111
+ max_offset = end_time - start_time
112
+ if divisible and evaluation_window < max_offset:
113
+ max_offset = evaluation_window
110
114
  yield from (
111
115
  (
112
- start_time if divisible else max(start_time, end_time - offset - evaluation_window),
116
+ (start_time if divisible else end_time - offset) - evaluation_window,
113
117
  end_time - offset,
114
118
  pd.Grouper( # type: ignore # mypy finds the wrong Grouper
115
119
  freq=evaluation_window,
116
120
  origin=end_time,
117
- offset=offset,
121
+ offset=-offset,
118
122
  # Each point in timeseries will be labeled by the end instant of
119
123
  # its evaluation window.
120
124
  label="right",
@@ -7,7 +7,7 @@ from hdbscan import HDBSCAN
7
7
  from typing_extensions import TypeAlias
8
8
 
9
9
  RowIndex: TypeAlias = int
10
- Cluster: TypeAlias = Set[RowIndex]
10
+ RawCluster: TypeAlias = Set[RowIndex]
11
11
  Matrix: TypeAlias = npt.NDArray[np.float64]
12
12
 
13
13
 
@@ -16,9 +16,9 @@ class Hdbscan:
16
16
  min_cluster_size: int = 20
17
17
  min_samples: float = 1
18
18
 
19
- def find_clusters(self, mat: Matrix) -> List[Cluster]:
19
+ def find_clusters(self, mat: Matrix) -> List[RawCluster]:
20
20
  cluster_ids: npt.NDArray[np.int_] = HDBSCAN(**asdict(self)).fit_predict(mat)
21
- ans: List[Cluster] = [set() for _ in range(np.max(cluster_ids) + 1)]
21
+ ans: List[RawCluster] = [set() for _ in range(np.max(cluster_ids) + 1)]
22
22
  for row_idx, cluster_id in enumerate(cluster_ids):
23
23
  if cluster_id > -1:
24
24
  ans[cluster_id].add(row_idx)
@@ -1,16 +1,17 @@
1
1
  from dataclasses import dataclass
2
- from typing import Dict, Hashable, List, Mapping, Protocol, Set, Tuple, TypeVar
2
+ from typing import Dict, Hashable, List, Mapping, Protocol, Tuple, TypeVar
3
3
 
4
4
  import numpy as np
5
5
  import numpy.typing as npt
6
6
  from typing_extensions import TypeAlias
7
7
 
8
- Identifier = TypeVar("Identifier", bound=Hashable)
8
+ from phoenix.pointcloud.clustering import RawCluster
9
+
9
10
  Vector: TypeAlias = npt.NDArray[np.float64]
10
11
  Matrix: TypeAlias = npt.NDArray[np.float64]
11
- ClusterId: TypeAlias = int
12
12
  RowIndex: TypeAlias = int
13
- Cluster: TypeAlias = Set[RowIndex]
13
+ Identifier = TypeVar("Identifier", bound=Hashable)
14
+ ClusterId: TypeAlias = int
14
15
 
15
16
 
16
17
  class DimensionalityReducer(Protocol):
@@ -19,7 +20,7 @@ class DimensionalityReducer(Protocol):
19
20
 
20
21
 
21
22
  class ClustersFinder(Protocol):
22
- def find_clusters(self, mat: Matrix) -> List[Cluster]:
23
+ def find_clusters(self, mat: Matrix) -> List[RawCluster]:
23
24
  ...
24
25
 
25
26
 
@@ -48,15 +49,16 @@ class PointCloud:
48
49
  Returns
49
50
  -------
50
51
  projections : dictionary
51
- Projected vectors in the low demension space, mapped back to the
52
+ Projected vectors in the low dimensional space, mapped back to the
52
53
  input vectors' identifiers.
53
54
 
54
- cluster_membership: dictinary
55
+ cluster_membership: dictionary
55
56
  Cluster membership by way of cluster_ids in the form of integers
56
57
  0,1,2,... mapped back to the input vectors' identifiers. Note that
57
58
  some vectors may not belong to any cluster and are excluded here.
58
59
 
59
60
  """
61
+
60
62
  if not data:
61
63
  return {}, {}
62
64
  identifiers, vectors = zip(*data.items())
@@ -40,6 +40,8 @@ class Granularity:
40
40
  def to_timestamps(
41
41
  time_range: TimeRange, granularity: Granularity
42
42
  ) -> Generator[datetime, None, None]:
43
+ if not granularity.sampling_interval_minutes:
44
+ return
43
45
  yield from (
44
46
  takewhile(
45
47
  lambda t: time_range.start < t, # type: ignore