arize-phoenix 0.0.18__tar.gz → 0.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/PKG-INFO +12 -5
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/README.md +11 -4
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/__init__.py +1 -1
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/model_schema.py +2 -2
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/fixtures.py +119 -106
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/metrics.py +2 -2
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/timeseries.py +9 -3
- arize_phoenix-0.0.19/src/phoenix/server/api/input_types/TimeRange.py +28 -0
- arize_phoenix-0.0.19/src/phoenix/server/api/types/DatasetRole.py +11 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/Dimension.py +85 -12
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/DimensionDataType.py +2 -2
- arize_phoenix-0.0.19/src/phoenix/server/api/types/DimensionShape.py +21 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/DimensionType.py +1 -1
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/EmbeddingDimension.py +44 -8
- arize_phoenix-0.0.19/src/phoenix/server/api/types/NumericRange.py +10 -0
- arize_phoenix-0.0.19/src/phoenix/server/api/types/Segments.py +44 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/TimeSeries.py +21 -6
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/main.py +6 -2
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/index.js +493 -351
- arize_phoenix-0.0.18/src/phoenix/server/api/input_types/TimeRange.py +0 -12
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/.gitignore +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/IP_NOTICE +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/LICENSE +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/pyproject.toml +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/config.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/dimension.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/dimension_data_type.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/dimension_type.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/dataset.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/errors.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/event.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/schema.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/datasets/validation.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/cardinality.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/embeddings.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/median.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/metrics/percent_empty.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/py.typed +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/index.html +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/services.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/session/session.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/utils/__init__.py +0 -0
- {arize_phoenix-0.0.18 → arize_phoenix-0.0.19}/src/phoenix/utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.19
|
|
4
4
|
Summary: ML Observability in your notebook
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -47,16 +47,22 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
</a>
|
|
48
48
|
<br/>
|
|
49
49
|
<br/>
|
|
50
|
-
<a href="https://
|
|
50
|
+
<a href="https://docs.arize.com/phoenix/">
|
|
51
|
+
<img src="https://img.shields.io/static/v1?message=Docs&logo=&labelColor=grey&color=blue&logoColor=white&label=%20"/>
|
|
52
|
+
</a>
|
|
53
|
+
<a target="_blank" href="https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q">
|
|
51
54
|
<img src="https://img.shields.io/static/v1?message=Community&logo=slack&labelColor=grey&color=blue&logoColor=white&label=%20"/>
|
|
52
55
|
</a>
|
|
53
|
-
<a href="https://
|
|
56
|
+
<a target="_blank" href="https://twitter.com/ArizePhoenix">
|
|
57
|
+
<img src="https://img.shields.io/badge/-ArizePhoenix-blue.svg?color=blue&labelColor=gray&logo=twitter">
|
|
58
|
+
</a>
|
|
59
|
+
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
54
60
|
<img src="https://img.shields.io/pypi/v/arize-phoenix?color=blue">
|
|
55
61
|
</a>
|
|
56
|
-
<a href="https://anaconda.org/conda-forge/arize-phoenix">
|
|
62
|
+
<a target="_blank" href="https://anaconda.org/conda-forge/arize-phoenix">
|
|
57
63
|
<img src="https://img.shields.io/conda/vn/conda-forge/arize-phoenix.svg?color=blue">
|
|
58
64
|
</a>
|
|
59
|
-
<a href="https://pypi.org/project/arize-phoenix/">
|
|
65
|
+
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
60
66
|
<img src="https://img.shields.io/pypi/pyversions/arize-phoenix">
|
|
61
67
|
</a>
|
|
62
68
|
</p>
|
|
@@ -183,6 +189,7 @@ Join our community to connect with thousands of machine learning practitioners a
|
|
|
183
189
|
- 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
|
|
184
190
|
- 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
|
|
185
191
|
- 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
|
|
192
|
+
- 🐣 Follow us on [twitter](https://twitter.com/ArizePhoenix).
|
|
186
193
|
- 💌️ Sign up for our [mailing list](https://phoenix.arize.com/#updates).
|
|
187
194
|
- 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
|
|
188
195
|
- 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
|
|
@@ -4,16 +4,22 @@
|
|
|
4
4
|
</a>
|
|
5
5
|
<br/>
|
|
6
6
|
<br/>
|
|
7
|
-
<a href="https://
|
|
7
|
+
<a href="https://docs.arize.com/phoenix/">
|
|
8
|
+
<img src="https://img.shields.io/static/v1?message=Docs&logo=&labelColor=grey&color=blue&logoColor=white&label=%20"/>
|
|
9
|
+
</a>
|
|
10
|
+
<a target="_blank" href="https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q">
|
|
8
11
|
<img src="https://img.shields.io/static/v1?message=Community&logo=slack&labelColor=grey&color=blue&logoColor=white&label=%20"/>
|
|
9
12
|
</a>
|
|
10
|
-
<a href="https://
|
|
13
|
+
<a target="_blank" href="https://twitter.com/ArizePhoenix">
|
|
14
|
+
<img src="https://img.shields.io/badge/-ArizePhoenix-blue.svg?color=blue&labelColor=gray&logo=twitter">
|
|
15
|
+
</a>
|
|
16
|
+
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
11
17
|
<img src="https://img.shields.io/pypi/v/arize-phoenix?color=blue">
|
|
12
18
|
</a>
|
|
13
|
-
<a href="https://anaconda.org/conda-forge/arize-phoenix">
|
|
19
|
+
<a target="_blank" href="https://anaconda.org/conda-forge/arize-phoenix">
|
|
14
20
|
<img src="https://img.shields.io/conda/vn/conda-forge/arize-phoenix.svg?color=blue">
|
|
15
21
|
</a>
|
|
16
|
-
<a href="https://pypi.org/project/arize-phoenix/">
|
|
22
|
+
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
17
23
|
<img src="https://img.shields.io/pypi/pyversions/arize-phoenix">
|
|
18
24
|
</a>
|
|
19
25
|
</p>
|
|
@@ -140,6 +146,7 @@ Join our community to connect with thousands of machine learning practitioners a
|
|
|
140
146
|
- 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
|
|
141
147
|
- 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
|
|
142
148
|
- 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
|
|
149
|
+
- 🐣 Follow us on [twitter](https://twitter.com/ArizePhoenix).
|
|
143
150
|
- 💌️ Sign up for our [mailing list](https://phoenix.arize.com/#updates).
|
|
144
151
|
- 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
|
|
145
152
|
- 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
|
|
@@ -331,7 +331,7 @@ class EmbeddingDimension(Dimension):
|
|
|
331
331
|
object.__setattr__(self, "display_name", self.name)
|
|
332
332
|
|
|
333
333
|
@classmethod
|
|
334
|
-
def
|
|
334
|
+
def from_dimension(cls, emb: Embedding, **kwargs: Any) -> "EmbeddingDimension":
|
|
335
335
|
"""Use `from_` instead of `__init__` because the latter is needed by
|
|
336
336
|
replace() and we don't want to clobber the generated version.
|
|
337
337
|
"""
|
|
@@ -981,7 +981,7 @@ class Schema(SchemaSpec):
|
|
|
981
981
|
else:
|
|
982
982
|
yield ScalarDimension(spec, role=role, data_type=data_type)
|
|
983
983
|
elif isinstance(spec, Embedding):
|
|
984
|
-
yield EmbeddingDimension.
|
|
984
|
+
yield EmbeddingDimension.from_dimension(spec, role=role, data_type=data_type)
|
|
985
985
|
else:
|
|
986
986
|
raise TypeError(f"{role} has unrecognized type: {type(spec)}")
|
|
987
987
|
|
|
@@ -1,12 +1,17 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
|
-
import os
|
|
3
3
|
from dataclasses import dataclass, replace
|
|
4
|
-
from
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterator, NamedTuple, Optional, Tuple
|
|
6
|
+
from urllib import request
|
|
7
|
+
from urllib.parse import quote, urljoin
|
|
5
8
|
|
|
6
9
|
from pandas import read_parquet
|
|
7
10
|
|
|
8
|
-
from .
|
|
9
|
-
from .
|
|
11
|
+
from phoenix.config import DATASET_DIR
|
|
12
|
+
from phoenix.core.model_schema import DatasetRole
|
|
13
|
+
from phoenix.datasets.dataset import Dataset
|
|
14
|
+
from phoenix.datasets.schema import EmbeddingColumnNames, Schema
|
|
10
15
|
|
|
11
16
|
logger = logging.getLogger(__name__)
|
|
12
17
|
|
|
@@ -15,13 +20,22 @@ logger = logging.getLogger(__name__)
|
|
|
15
20
|
class Fixture:
|
|
16
21
|
name: str
|
|
17
22
|
description: str
|
|
18
|
-
|
|
19
|
-
|
|
23
|
+
prefix: str
|
|
24
|
+
primary_file_name: str
|
|
25
|
+
reference_file_name: Optional[str]
|
|
20
26
|
primary_schema: Schema
|
|
21
27
|
reference_schema: Schema
|
|
22
28
|
|
|
29
|
+
def paths(self) -> Iterator[Tuple[DatasetRole, Path]]:
|
|
30
|
+
return (
|
|
31
|
+
(role, Path(self.prefix) / name)
|
|
32
|
+
for role, name in zip(
|
|
33
|
+
DatasetRole,
|
|
34
|
+
(self.primary_file_name, self.reference_file_name),
|
|
35
|
+
)
|
|
36
|
+
if name
|
|
37
|
+
)
|
|
23
38
|
|
|
24
|
-
FIXTURE_URL_PREFIX = "http://storage.googleapis.com/arize-assets/phoenix/datasets/"
|
|
25
39
|
|
|
26
40
|
sentiment_classification_language_drift_schema = Schema(
|
|
27
41
|
prediction_id_column_name="prediction_id",
|
|
@@ -53,16 +67,9 @@ sentiment_classification_language_drift_fixture = Fixture(
|
|
|
53
67
|
""",
|
|
54
68
|
primary_schema=sentiment_classification_language_drift_schema,
|
|
55
69
|
reference_schema=sentiment_classification_language_drift_schema,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
"sentiment_classification_language_drift_production.parquet",
|
|
60
|
-
),
|
|
61
|
-
reference_dataset_url=os.path.join(
|
|
62
|
-
FIXTURE_URL_PREFIX,
|
|
63
|
-
"unstructured/nlp/sentiment-classification-language-drift",
|
|
64
|
-
"sentiment_classification_language_drift_training.parquet",
|
|
65
|
-
),
|
|
70
|
+
prefix="unstructured/nlp/sentiment-classification-language-drift",
|
|
71
|
+
primary_file_name="sentiment_classification_language_drift_production.parquet",
|
|
72
|
+
reference_file_name="sentiment_classification_language_drift_training.parquet",
|
|
66
73
|
)
|
|
67
74
|
|
|
68
75
|
image_classification_schema = Schema(
|
|
@@ -86,12 +93,9 @@ image_classification_fixture = Fixture(
|
|
|
86
93
|
""",
|
|
87
94
|
primary_schema=replace(image_classification_schema, actual_label_column_name=None),
|
|
88
95
|
reference_schema=image_classification_schema,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
reference_dataset_url=os.path.join(
|
|
93
|
-
FIXTURE_URL_PREFIX, "unstructured/cv/human-actions/human_actions_training.parquet"
|
|
94
|
-
),
|
|
96
|
+
prefix="unstructured/cv/human-actions",
|
|
97
|
+
primary_file_name="human_actions_production.parquet",
|
|
98
|
+
reference_file_name="human_actions_training.parquet",
|
|
95
99
|
)
|
|
96
100
|
|
|
97
101
|
fashion_mnist_primary_schema = Schema(
|
|
@@ -117,14 +121,9 @@ fashion_mnist_fixture = Fixture(
|
|
|
117
121
|
""",
|
|
118
122
|
primary_schema=fashion_mnist_primary_schema,
|
|
119
123
|
reference_schema=fashion_mnist_reference_schema,
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
),
|
|
124
|
-
reference_dataset_url=os.path.join(
|
|
125
|
-
FIXTURE_URL_PREFIX,
|
|
126
|
-
"unstructured/cv/fashion-mnist/fashion_mnist_train.parquet",
|
|
127
|
-
),
|
|
124
|
+
prefix="unstructured/cv/fashion-mnist",
|
|
125
|
+
primary_file_name="fashion_mnist_production.parquet",
|
|
126
|
+
reference_file_name="fashion_mnist_train.parquet",
|
|
128
127
|
)
|
|
129
128
|
|
|
130
129
|
ner_token_drift_schema = Schema(
|
|
@@ -156,14 +155,9 @@ ner_token_drift_fixture = Fixture(
|
|
|
156
155
|
""",
|
|
157
156
|
primary_schema=ner_token_drift_schema,
|
|
158
157
|
reference_schema=ner_token_drift_schema,
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
),
|
|
163
|
-
reference_dataset_url=os.path.join(
|
|
164
|
-
FIXTURE_URL_PREFIX,
|
|
165
|
-
"unstructured/nlp/named-entity-recognition/ner_token_drift_train.parquet",
|
|
166
|
-
),
|
|
158
|
+
prefix="unstructured/nlp/named-entity-recognition",
|
|
159
|
+
primary_file_name="ner_token_drift_production.parquet",
|
|
160
|
+
reference_file_name="ner_token_drift_train.parquet",
|
|
167
161
|
)
|
|
168
162
|
|
|
169
163
|
credit_card_fraud_schema = Schema(
|
|
@@ -194,13 +188,9 @@ credit_card_fraud_fixture = Fixture(
|
|
|
194
188
|
""",
|
|
195
189
|
primary_schema=credit_card_fraud_schema,
|
|
196
190
|
reference_schema=credit_card_fraud_schema,
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
reference_dataset_url=os.path.join(
|
|
201
|
-
FIXTURE_URL_PREFIX,
|
|
202
|
-
"structured/credit-card-fraud/credit_card_fraud_train.parquet",
|
|
203
|
-
),
|
|
191
|
+
prefix="structured/credit-card-fraud",
|
|
192
|
+
primary_file_name="credit_card_fraud_production.parquet",
|
|
193
|
+
reference_file_name="credit_card_fraud_train.parquet",
|
|
204
194
|
)
|
|
205
195
|
|
|
206
196
|
click_through_rate_schema = Schema(
|
|
@@ -228,12 +218,9 @@ click_through_rate_fixture = Fixture(
|
|
|
228
218
|
""",
|
|
229
219
|
primary_schema=click_through_rate_schema,
|
|
230
220
|
reference_schema=click_through_rate_schema,
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
reference_dataset_url=os.path.join(
|
|
235
|
-
FIXTURE_URL_PREFIX, "structured/click-through-rate/click_through_rate_train.parquet"
|
|
236
|
-
),
|
|
221
|
+
prefix="structured/click-through-rate",
|
|
222
|
+
primary_file_name="click_through_rate_production.parquet",
|
|
223
|
+
reference_file_name="click_through_rate_train.parquet",
|
|
237
224
|
)
|
|
238
225
|
|
|
239
226
|
wide_data_primary_schema = Schema(
|
|
@@ -250,14 +237,9 @@ wide_data_fixture = Fixture(
|
|
|
250
237
|
""",
|
|
251
238
|
primary_schema=wide_data_primary_schema,
|
|
252
239
|
reference_schema=wide_data_reference_schema,
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
),
|
|
257
|
-
reference_dataset_url=os.path.join(
|
|
258
|
-
FIXTURE_URL_PREFIX,
|
|
259
|
-
"structured/wide-data/wide_data_train.parquet",
|
|
260
|
-
),
|
|
240
|
+
prefix="structured/wide-data",
|
|
241
|
+
primary_file_name="wide_data_production.parquet",
|
|
242
|
+
reference_file_name="wide_data_train.parquet",
|
|
261
243
|
)
|
|
262
244
|
|
|
263
245
|
deep_data_primary_schema = Schema(
|
|
@@ -274,14 +256,9 @@ deep_data_fixture = Fixture(
|
|
|
274
256
|
""",
|
|
275
257
|
primary_schema=deep_data_primary_schema,
|
|
276
258
|
reference_schema=deep_data_reference_schema,
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
),
|
|
281
|
-
reference_dataset_url=os.path.join(
|
|
282
|
-
FIXTURE_URL_PREFIX,
|
|
283
|
-
"structured/deep-data/deep_data_train.parquet",
|
|
284
|
-
),
|
|
259
|
+
prefix="structured/deep-data",
|
|
260
|
+
primary_file_name="deep_data_production.parquet",
|
|
261
|
+
reference_file_name="deep_data_train.parquet",
|
|
285
262
|
)
|
|
286
263
|
|
|
287
264
|
|
|
@@ -305,14 +282,9 @@ llm_summarization_fixture = Fixture(
|
|
|
305
282
|
""",
|
|
306
283
|
primary_schema=llm_summarization_schema,
|
|
307
284
|
reference_schema=llm_summarization_schema,
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
),
|
|
312
|
-
reference_dataset_url=os.path.join(
|
|
313
|
-
FIXTURE_URL_PREFIX,
|
|
314
|
-
"unstructured/llm/summarization/llm_summarization_baseline.parquet",
|
|
315
|
-
),
|
|
285
|
+
prefix="unstructured/llm/summarization",
|
|
286
|
+
primary_file_name="llm_summarization_prod.parquet",
|
|
287
|
+
reference_file_name="llm_summarization_baseline.parquet",
|
|
316
288
|
)
|
|
317
289
|
|
|
318
290
|
FIXTURES: Tuple[Fixture, ...] = (
|
|
@@ -329,23 +301,30 @@ FIXTURES: Tuple[Fixture, ...] = (
|
|
|
329
301
|
NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
|
|
330
302
|
|
|
331
303
|
|
|
332
|
-
def
|
|
304
|
+
def get_datasets(
|
|
305
|
+
fixture_name: str,
|
|
306
|
+
no_internet: bool = False,
|
|
307
|
+
) -> Tuple[Dataset, Optional[Dataset]]:
|
|
333
308
|
"""
|
|
334
309
|
Downloads primary and reference datasets for a fixture if they are not found
|
|
335
310
|
locally.
|
|
336
311
|
"""
|
|
337
312
|
fixture = _get_fixture_by_name(fixture_name=fixture_name)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
313
|
+
if no_internet:
|
|
314
|
+
paths = {role: DATASET_DIR / path for role, path in fixture.paths()}
|
|
315
|
+
else:
|
|
316
|
+
paths = dict(_download(fixture, DATASET_DIR))
|
|
317
|
+
primary_dataset = Dataset(
|
|
318
|
+
read_parquet(paths[DatasetRole.PRIMARY]),
|
|
319
|
+
fixture.primary_schema,
|
|
320
|
+
"production",
|
|
342
321
|
)
|
|
343
322
|
reference_dataset = None
|
|
344
|
-
if fixture.
|
|
345
|
-
reference_dataset =
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
323
|
+
if fixture.reference_file_name is not None:
|
|
324
|
+
reference_dataset = Dataset(
|
|
325
|
+
read_parquet(paths[DatasetRole.REFERENCE]),
|
|
326
|
+
fixture.reference_schema,
|
|
327
|
+
"training",
|
|
349
328
|
)
|
|
350
329
|
return primary_dataset, reference_dataset
|
|
351
330
|
|
|
@@ -361,25 +340,6 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
|
|
|
361
340
|
return NAME_TO_FIXTURE[fixture_name]
|
|
362
341
|
|
|
363
342
|
|
|
364
|
-
def _download_dataset_if_missing(dataset_name: str, dataset_url: str, schema: Schema) -> Dataset:
|
|
365
|
-
"""
|
|
366
|
-
Downloads a dataset from the given URL if it is not found locally.
|
|
367
|
-
"""
|
|
368
|
-
try:
|
|
369
|
-
return Dataset.from_name(dataset_name)
|
|
370
|
-
except FileNotFoundError:
|
|
371
|
-
pass
|
|
372
|
-
|
|
373
|
-
logger.info(f'Downloading dataset: "{dataset_name}"')
|
|
374
|
-
dataset = Dataset(
|
|
375
|
-
dataframe=read_parquet(dataset_url),
|
|
376
|
-
schema=schema,
|
|
377
|
-
name=dataset_name,
|
|
378
|
-
)
|
|
379
|
-
logger.info("Download complete.")
|
|
380
|
-
return dataset
|
|
381
|
-
|
|
382
|
-
|
|
383
343
|
@dataclass
|
|
384
344
|
class ExampleDatasets:
|
|
385
345
|
"""
|
|
@@ -414,8 +374,61 @@ def load_example(use_case: str) -> ExampleDatasets:
|
|
|
414
374
|
|
|
415
375
|
"""
|
|
416
376
|
fixture = _get_fixture_by_name(use_case)
|
|
417
|
-
primary_dataset, reference_dataset =
|
|
377
|
+
primary_dataset, reference_dataset = get_datasets(use_case)
|
|
418
378
|
print(f"📥 Loaded {use_case} example datasets.")
|
|
419
379
|
print("ℹ️ About this use-case:")
|
|
420
380
|
print(fixture.description)
|
|
421
381
|
return ExampleDatasets(primary=primary_dataset, reference=reference_dataset)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class Metadata(NamedTuple):
|
|
385
|
+
path: str
|
|
386
|
+
mediaLink: str
|
|
387
|
+
md5Hash: str
|
|
388
|
+
|
|
389
|
+
def save_artifact(self, location: Path) -> Path:
|
|
390
|
+
data_file_path = location / self.path
|
|
391
|
+
md5_file = data_file_path.with_name(data_file_path.stem + ".md5")
|
|
392
|
+
data_file_path.parents[0].mkdir(parents=True, exist_ok=True)
|
|
393
|
+
if data_file_path.is_file() and md5_file.is_file():
|
|
394
|
+
with open(md5_file, "r") as f:
|
|
395
|
+
if f.readline() == self.md5Hash:
|
|
396
|
+
return data_file_path
|
|
397
|
+
request.urlretrieve(self.mediaLink, data_file_path)
|
|
398
|
+
with open(md5_file, "w") as f:
|
|
399
|
+
f.write(self.md5Hash)
|
|
400
|
+
return data_file_path
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class GCSAssets(NamedTuple):
|
|
404
|
+
host: str = "https://storage.googleapis.com/"
|
|
405
|
+
bucket: str = "arize-assets"
|
|
406
|
+
prefix: str = "phoenix/datasets/"
|
|
407
|
+
|
|
408
|
+
def metadata(self, path: Path) -> Metadata:
|
|
409
|
+
url = urljoin(
|
|
410
|
+
urljoin(self.host, f"storage/v1/b/{self.bucket}/o/"),
|
|
411
|
+
quote(urljoin(self.prefix, str(path)), safe=""),
|
|
412
|
+
)
|
|
413
|
+
resp = json.loads(request.urlopen(request.Request(url)).read())
|
|
414
|
+
return Metadata(
|
|
415
|
+
resp["name"][len(self.prefix) :],
|
|
416
|
+
resp["mediaLink"],
|
|
417
|
+
resp["md5Hash"],
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[DatasetRole, Path]]:
|
|
422
|
+
for role, path in fixture.paths():
|
|
423
|
+
yield role, GCSAssets().metadata(path).save_artifact(location)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
# Download all fixtures
|
|
427
|
+
if __name__ == "__main__":
|
|
428
|
+
import time
|
|
429
|
+
|
|
430
|
+
for fixture in FIXTURES:
|
|
431
|
+
start_time = time.time()
|
|
432
|
+
print(f"getting {fixture.name}", end="...")
|
|
433
|
+
dict(_download(fixture, DATASET_DIR))
|
|
434
|
+
print(f"done ({time.time() - start_time:.2f}s)")
|
|
@@ -59,7 +59,7 @@ class Mean(UnaryOperator, BaseMetric):
|
|
|
59
59
|
def calc(self, dataframe: pd.DataFrame) -> float:
|
|
60
60
|
data = self.get_operand_column(dataframe)
|
|
61
61
|
numeric_data = pd.to_numeric(data, errors="coerce")
|
|
62
|
-
return
|
|
62
|
+
return numeric_data.mean()
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
@dataclass
|
|
@@ -100,7 +100,7 @@ class Cardinality(UnaryOperator, BaseMetric):
|
|
|
100
100
|
class PercentEmpty(UnaryOperator, BaseMetric):
|
|
101
101
|
def calc(self, dataframe: pd.DataFrame) -> float:
|
|
102
102
|
data = self.get_operand_column(dataframe)
|
|
103
|
-
return
|
|
103
|
+
return data.isna().mean() * 100
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
@dataclass
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from datetime import datetime, timedelta
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
2
|
from functools import partial
|
|
3
3
|
from itertools import accumulate, repeat
|
|
4
4
|
from typing import Any, Callable, Iterable, Iterator, Tuple, cast
|
|
@@ -195,9 +195,15 @@ def _results(
|
|
|
195
195
|
# pandas row indexing is stop-exclusive
|
|
196
196
|
row_slice = slice(row_start, row_stop)
|
|
197
197
|
filtered = dataframe.iloc[row_slice, :]
|
|
198
|
-
|
|
198
|
+
res = filtered.groupby(
|
|
199
199
|
group,
|
|
200
200
|
group_keys=True,
|
|
201
201
|
).apply(
|
|
202
202
|
calculate_metrics,
|
|
203
|
-
)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# NB: on ubuntu, we lose the timezone information when there is no data
|
|
206
|
+
if res.index.tzinfo is None: # type: ignore
|
|
207
|
+
res = res.set_axis(res.index.tz_localize(timezone.utc), axis=0) # type: ignore
|
|
208
|
+
|
|
209
|
+
yield res.loc[result_slice, :]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
|
|
3
|
+
import strawberry
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@strawberry.input
|
|
7
|
+
class TimeRange:
|
|
8
|
+
start: datetime = strawberry.field(
|
|
9
|
+
description="The start of the time range",
|
|
10
|
+
)
|
|
11
|
+
end: datetime = strawberry.field(
|
|
12
|
+
description="The end of the time range. Right exclusive.",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
def __post_init__(self) -> None:
|
|
16
|
+
setattr(
|
|
17
|
+
self,
|
|
18
|
+
"start",
|
|
19
|
+
self.start.astimezone(timezone.utc),
|
|
20
|
+
)
|
|
21
|
+
setattr(
|
|
22
|
+
self,
|
|
23
|
+
"end",
|
|
24
|
+
self.end.astimezone(timezone.utc),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def is_valid(self) -> bool:
|
|
28
|
+
return self.start < self.end
|