arize-phoenix 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 0.0.18
3
+ Version: 0.0.19
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -47,16 +47,22 @@ Description-Content-Type: text/markdown
47
47
  </a>
48
48
  <br/>
49
49
  <br/>
50
- <a href="https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q">
50
+ <a href="https://docs.arize.com/phoenix/">
51
+ <img src="https://img.shields.io/static/v1?message=Docs&logo=&labelColor=grey&color=blue&logoColor=white&label=%20"/>
52
+ </a>
53
+ <a target="_blank" href="https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q">
51
54
  <img src="https://img.shields.io/static/v1?message=Community&logo=slack&labelColor=grey&color=blue&logoColor=white&label=%20"/>
52
55
  </a>
53
- <a href="https://pypi.org/project/arize-phoenix/">
56
+ <a target="_blank" href="https://twitter.com/ArizePhoenix">
57
+ <img src="https://img.shields.io/badge/-ArizePhoenix-blue.svg?color=blue&labelColor=gray&logo=twitter">
58
+ </a>
59
+ <a target="_blank" href="https://pypi.org/project/arize-phoenix/">
54
60
  <img src="https://img.shields.io/pypi/v/arize-phoenix?color=blue">
55
61
  </a>
56
- <a href="https://anaconda.org/conda-forge/arize-phoenix">
62
+ <a target="_blank" href="https://anaconda.org/conda-forge/arize-phoenix">
57
63
  <img src="https://img.shields.io/conda/vn/conda-forge/arize-phoenix.svg?color=blue">
58
64
  </a>
59
- <a href="https://pypi.org/project/arize-phoenix/">
65
+ <a target="_blank" href="https://pypi.org/project/arize-phoenix/">
60
66
  <img src="https://img.shields.io/pypi/pyversions/arize-phoenix">
61
67
  </a>
62
68
  </p>
@@ -183,6 +189,7 @@ Join our community to connect with thousands of machine learning practitioners a
183
189
  - 💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
184
190
  - 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
185
191
  - 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
192
+ - 🐣 Follow us on [twitter](https://twitter.com/ArizePhoenix).
186
193
  - 💌️ Sign up for our [mailing list](https://phoenix.arize.com/#updates).
187
194
  - 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
188
195
  - 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
@@ -1,4 +1,4 @@
1
- phoenix/__init__.py,sha256=sgC9MOWol4GHv7QTCMWDTueielw7Q_o1eSCLH6DrHU0,997
1
+ phoenix/__init__.py,sha256=MS-mFdJlXRrlUlXo7KWlnmNRpais546_SbhjGXBcCDU,997
2
2
  phoenix/config.py,sha256=tjNn9oqDxQmeO85sCchLlTsDiRJ6AoK0CTt_Uc_hrKM,1442
3
3
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
4
  phoenix/services.py,sha256=pamdcZ9hp9BqgRV0ZIcMsjBYp8HCpIQPdxaXQfQLxYU,3711
@@ -8,13 +8,13 @@ phoenix/core/dimension_data_type.py,sha256=FEBp4p06LlFpAXq-ftxDAFymBtU_pYTFmJjFc
8
8
  phoenix/core/dimension_type.py,sha256=EKFmPzuGr1cn7t4vD-XDk9s836j6U3iRbSu3Z2sO8sM,171
9
9
  phoenix/core/embedding_dimension.py,sha256=niOw0OBCJrCa4R9fyM6bocBz6KZf14XAcThhGrCC-qQ,1017
10
10
  phoenix/core/model.py,sha256=MSNqq3mHIIA-TF4OAjnO3i6Xmp04hREegXx4_RNTM_Q,9566
11
- phoenix/core/model_schema.py,sha256=G4gvSjJmu24Lt6cIirv5vZZXKU7sAklEW5FKu6XJZmQ,41141
11
+ phoenix/core/model_schema.py,sha256=KXOPW2IbqMopnQ4cEh2T4kSJQNugqAySeivor5m2bAY,41159
12
12
  phoenix/core/model_schema_adapter.py,sha256=iIjUK0gZdJ_wDnUxoIm7K1_8WyD8yXjulDx9n1Sz2AA,5118
13
13
  phoenix/datasets/__init__.py,sha256=rh4DZNaCKbXzeiUO-OTu67hpA0rK53_3LfV1oDi4NDI,245
14
14
  phoenix/datasets/dataset.py,sha256=CoKcfLQBxo-7_d93mhhBwvxOZ8Hl7ihlC7bnbYvrl-s,27142
15
15
  phoenix/datasets/errors.py,sha256=-Iyk8rsvP_KX-P4gOqjm26slkDq1-9CohK07_LkrYCI,8117
16
16
  phoenix/datasets/event.py,sha256=7n7QA8TvByKHby5JFPpFb-vk1XhlaD9rmatwsgIiYLI,266
17
- phoenix/datasets/fixtures.py,sha256=Wk0u9Avg87S-rICa75Qj7DdYgJ81BYcFrE4gjqzoCh0,15395
17
+ phoenix/datasets/fixtures.py,sha256=9rmJgsF50vmuwR7PavNf7kdFsLQiasta6iJEzh7GvkM,15954
18
18
  phoenix/datasets/schema.py,sha256=1sSdtKkN7L61rsb8FNeTxJZG-ysGR7JGgT08WKGuDFs,3658
19
19
  phoenix/datasets/validation.py,sha256=iiJu_Vn_NWgrHmg1vRax-QvCYjIylwl2cCUatCY5RZQ,8665
20
20
  phoenix/metrics/README.md,sha256=5gekqTU-5gGdMwvcfNp2Wlu8p1ul9kGY_jq0XXQusoI,1964
@@ -23,17 +23,17 @@ phoenix/metrics/binning.py,sha256=xaKkaHfrcNqJ2uzuiBSfyOJ1cS6vPKXHOKfSFjk80l4,89
23
23
  phoenix/metrics/cardinality.py,sha256=WpZ4P0b3ZX2AQRNC_QZLgHCtl_IV7twv9Tg2VfmT358,799
24
24
  phoenix/metrics/embeddings.py,sha256=X8J3CJWIW7OqDhzrenCanymkl1I356wIx687EY_a62Q,354
25
25
  phoenix/metrics/median.py,sha256=M-d00yh-JVodi7QC-smA6jbAgjbWajYHlOEBKAWk0IY,119
26
- phoenix/metrics/metrics.py,sha256=7WPOwAS2TueAxkvYe6uhBoXEZbJS5raQqEn1i_-2ZJs,7318
26
+ phoenix/metrics/metrics.py,sha256=3ggWwiVR5dmNqsEDuJvJiLgttkHlnHK3uB7IBqjLwwk,7292
27
27
  phoenix/metrics/mixins.py,sha256=zk-6Il1asTbMP_hwvfXABPu_8jYynQyEudDIcWRhU0c,5727
28
28
  phoenix/metrics/percent_empty.py,sha256=0pRA-_nFqGgYfTnxe_uIZX9RQV-O1ADzh6KQZIbsXnk,465
29
- phoenix/metrics/timeseries.py,sha256=bMshh7OJCD95VGelPz1MMj6KMePjKYkAuRsk4xtMu_A,6609
29
+ phoenix/metrics/timeseries.py,sha256=20WA1DN79NDc2iB7xm71XAK4Sn4pwXANEo1skr9pz_I,6864
30
30
  phoenix/pointcloud/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
31
31
  phoenix/pointcloud/clustering.py,sha256=IzcG67kJ2hPP7pcqVmKPSL_6gKRonKdOT3bCtbTOqnk,820
32
32
  phoenix/pointcloud/pointcloud.py,sha256=als0aitTA__4PrSqBk6qPPKe8fIG-ZSnlVRVkfMorBU,2290
33
33
  phoenix/pointcloud/projectors.py,sha256=ekZvKYmb0BibaISytzmUgcDwrfW5Fk-kB52HEtnx7jo,557
34
34
  phoenix/server/__init__.py,sha256=jzUH8jjxFZJ59UympBQWpHXqWtF0kE5G7eBsc59y-9s,28
35
35
  phoenix/server/app.py,sha256=6Uw-2IXZPNEUkTK9lR9XCanIoCJJzV4dHQoyeFbN51Y,3997
36
- phoenix/server/main.py,sha256=J9X1y1m4rLgPo8onj7jdyiP4H9hU7eVmf5F_N-UualI,2942
36
+ phoenix/server/main.py,sha256=QhfnLoomXbYbtbzX9XX4YmmILZ0GzRpDB0Ngo2XbkMk,3027
37
37
  phoenix/server/thread_server.py,sha256=Fu-bUcDcOpP4s3YE7pEwB1PieL1lchziKB5GozWcTKw,1236
38
38
  phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  phoenix/server/api/context.py,sha256=5IVkq9x4GUZp5wZ50EcReYI5TDW6jWFB7XigXiUBYrA,402
@@ -41,24 +41,28 @@ phoenix/server/api/interceptor.py,sha256=pivESQdLQAWrMjToK7b8rl52NobiatqrWnK8Qe0
41
41
  phoenix/server/api/schema.py,sha256=QYONWcS3xNvqD6t2hp63WkoZIQVrW9kH59MkVFXf3_I,1143
42
42
  phoenix/server/api/input_types/DimensionInput.py,sha256=Vfx5FmiMKey4-EHDQsQRPzSAMRJMN5oVMLDUl4NKAa8,164
43
43
  phoenix/server/api/input_types/Granularity.py,sha256=6SVfZ5yTZYq1PI6vdpjfkBUc4YilLSkF-k6okuSNbbQ,2301
44
- phoenix/server/api/input_types/TimeRange.py,sha256=8GhSVyFC3byuvpcOG2lhC5ZKXgXW0g_UtaVdDPAfxwk,334
44
+ phoenix/server/api/input_types/TimeRange.py,sha256=cKXGQCuKtc-MwnaC_HBhfVnrM-f6i98JIiJPdQ2Gb64,638
45
45
  phoenix/server/api/input_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  phoenix/server/api/types/DataQualityMetric.py,sha256=eq2W8TutOdoeWcRWRUSTNBKR9eP0lfZUb1tlqYJ6flw,243
47
47
  phoenix/server/api/types/Dataset.py,sha256=YIiy6sU6CnH8iiBZXJmMDjKVlSlupOysmFoLOnvzGYE,2928
48
- phoenix/server/api/types/Dimension.py,sha256=l2tvWVJXBpBAkpBTtFG2IDOMU76SoJyBtjrDqWkYzps,5396
49
- phoenix/server/api/types/DimensionDataType.py,sha256=a_3J8KkfapFDWrbTIE8JFtYSVdiJTAYofiSYQUb1SNM,420
50
- phoenix/server/api/types/DimensionType.py,sha256=Q9RxkXfu9-7WR0qqud0zRHf616RTGNpwgcmCsQUQBgg,792
48
+ phoenix/server/api/types/DatasetRole.py,sha256=dHjW-TDZwhgvSziFglaR6fu9PrnYqxX9HoNCFqKTeMo,191
49
+ phoenix/server/api/types/Dimension.py,sha256=Gvf5Esgl4jymckcZg5_ucjXlpsUUtiJ8oP7LTrkvft4,7450
50
+ phoenix/server/api/types/DimensionDataType.py,sha256=o0QQRpUzgXbhd10drR2LthxrjB8klQcELrIvsbp4l_g,441
51
+ phoenix/server/api/types/DimensionShape.py,sha256=LNsRt3Uyx1OmwMa5EXk1JGRXPLVV2EEgYXz-6wnLJn4,562
52
+ phoenix/server/api/types/DimensionType.py,sha256=JLikZUBVqbHlUWcYYd8d60gB1_hmAcuFuZZsjCXpIwc,801
51
53
  phoenix/server/api/types/DimensionWithValue.py,sha256=kBHNs3EYd-i9xB6-Pj5qmO_GlRZ6X-RHR5ByGfCjoNM,470
52
- phoenix/server/api/types/EmbeddingDimension.py,sha256=jP2vURnuYj9ib_MG_1iYj8HjFVNYE80aQXm04QDN7sI,10674
54
+ phoenix/server/api/types/EmbeddingDimension.py,sha256=bJmhaW9SoWEGr_pOc3G40OYLZcmDHuchd5MrXe8Ofbs,11495
53
55
  phoenix/server/api/types/EmbeddingMetadata.py,sha256=SDySL8yHfLUczVTUwJhvrTiiO-HjiQy4GjuXD34SfdE,433
54
56
  phoenix/server/api/types/Event.py,sha256=3DUbDDcIvxP4P0k9xiYQTZjlReOq9JT365wANWubEFA,2593
55
57
  phoenix/server/api/types/EventMetadata.py,sha256=-lwxvPT8cjq4_fMeMIqMfv7StlPjrm1ohRLJfa6eEHA,520
56
58
  phoenix/server/api/types/ExportEventsMutation.py,sha256=zar9sk1bh8xqqVIs0obh-1tIK5VjwablxfWC09Ge1OM,1477
57
59
  phoenix/server/api/types/ExportedFile.py,sha256=e3GTn7B5LgsTbqiwjhMCQH7VsiqXitrBO4aCMS1lHsg,163
58
60
  phoenix/server/api/types/Model.py,sha256=doJ3TIx--iJ0C28bwE9b3EumZyDB4P0BRfGDTmaLL-U,4003
61
+ phoenix/server/api/types/NumericRange.py,sha256=afEjgF97Go_OvmjMggbPBt-zGM8IONewAyEiKEHRds0,192
59
62
  phoenix/server/api/types/PromptResponse.py,sha256=EEF5HMmo3twGkbzIKhEeawp0q_J_zzSdsmQ5qjfKTmI,609
60
63
  phoenix/server/api/types/ScalarDriftMetricEnum.py,sha256=6CbRDHaUA3pnf5QfAwgMzPJ1Dn3BUsKn-eVeA8jY44Y,171
61
- phoenix/server/api/types/TimeSeries.py,sha256=mgioxxqUXilrkRK4XEGbf4kMd8mwDRv2bkOSeXlFLwY,4633
64
+ phoenix/server/api/types/Segments.py,sha256=KQXt3zePs6QZlV7m5OzILvZ9V-0SqdY1_Hxfv4zgfHk,883
65
+ phoenix/server/api/types/TimeSeries.py,sha256=LZ0mLEzSj-jeTCr85CEf9RAfNpAUjv4BcBXcXmqaJPg,4886
62
66
  phoenix/server/api/types/UMAPPoints.py,sha256=XwRa3VO6rZdnMR8LT5mh7mxBVrBG574qixmmK-qwaVo,3844
63
67
  phoenix/server/api/types/VectorDriftMetricEnum.py,sha256=fouPDC30n8F67dleK5lJv8Xec94ZJgDZP75C1V5ihUg,135
64
68
  phoenix/server/api/types/__init__.py,sha256=77AN3W0O7WVSxPUQEgASD-I2nkyoRcUvOTNxcRs66gU,332
@@ -75,14 +79,14 @@ phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZV
75
79
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
76
80
  phoenix/server/static/index.css,sha256=jeV8eWPiHUcUmb_0yp_rI1odc-RnxlXVgMT-x9HGWbo,1817
77
81
  phoenix/server/static/index.html,sha256=xPZZH-y4dWlbDutPEV1k0rhmWJtIV-Db9aYP-dEc7wM,703
78
- phoenix/server/static/index.js,sha256=kxdX_AwFkFV6m_0ZDYO-DzjZUukz0VWDZrN2xERvUbY,2248751
82
+ phoenix/server/static/index.js,sha256=UFHSzaHKl0TdmkqVhciU_7joPwDhrvzndOac1P54BsM,2268442
79
83
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
80
84
  phoenix/session/__init__.py,sha256=qFZaChQ3SwNAOR-LD4hzQQthe32haw1vU1JeBvgMnRQ,136
81
85
  phoenix/session/session.py,sha256=qVg8q6DG-YotNVWXIywdR2q0VgRYsV7GiDMyvE8rje4,8650
82
86
  phoenix/utils/__init__.py,sha256=alIDGBnxWH4JvP-UW-7N99seBBi0r1GV1h8f1ERFBec,21
83
87
  phoenix/utils/utils.py,sha256=hZK3a_nLFYiQb1O6EcMF3mVmhTjnfuJ5WMcjhvOu7zk,427
84
- arize_phoenix-0.0.18.dist-info/METADATA,sha256=c0q1gj26bTqVhsgs2B3ZQnm0HK_pa7atMLvBUE_S8Xo,7815
85
- arize_phoenix-0.0.18.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
86
- arize_phoenix-0.0.18.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
87
- arize_phoenix-0.0.18.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
88
- arize_phoenix-0.0.18.dist-info/RECORD,,
88
+ arize_phoenix-0.0.19.dist-info/METADATA,sha256=cj9jXkDT7cR0BX4SehkMuhTbWgYNDeJJaeVuXQpxyng,10759
89
+ arize_phoenix-0.0.19.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
90
+ arize_phoenix-0.0.19.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
91
+ arize_phoenix-0.0.19.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
92
+ arize_phoenix-0.0.19.dist-info/RECORD,,
phoenix/__init__.py CHANGED
@@ -7,7 +7,7 @@ from .datasets import (
7
7
  )
8
8
  from .session import Session, active_session, close_app, launch_app
9
9
 
10
- __version__ = "0.0.18"
10
+ __version__ = "0.0.19"
11
11
 
12
12
  # module level doc-string
13
13
  __doc__ = """
@@ -331,7 +331,7 @@ class EmbeddingDimension(Dimension):
331
331
  object.__setattr__(self, "display_name", self.name)
332
332
 
333
333
  @classmethod
334
- def from_(cls, emb: Embedding, **kwargs: Any) -> "EmbeddingDimension":
334
+ def from_dimension(cls, emb: Embedding, **kwargs: Any) -> "EmbeddingDimension":
335
335
  """Use `from_` instead of `__init__` because the latter is needed by
336
336
  replace() and we don't want to clobber the generated version.
337
337
  """
@@ -981,7 +981,7 @@ class Schema(SchemaSpec):
981
981
  else:
982
982
  yield ScalarDimension(spec, role=role, data_type=data_type)
983
983
  elif isinstance(spec, Embedding):
984
- yield EmbeddingDimension.from_(spec, role=role, data_type=data_type)
984
+ yield EmbeddingDimension.from_dimension(spec, role=role, data_type=data_type)
985
985
  else:
986
986
  raise TypeError(f"{role} has unrecognized type: {type(spec)}")
987
987
 
@@ -1,12 +1,17 @@
1
+ import json
1
2
  import logging
2
- import os
3
3
  from dataclasses import dataclass, replace
4
- from typing import Optional, Tuple
4
+ from pathlib import Path
5
+ from typing import Iterator, NamedTuple, Optional, Tuple
6
+ from urllib import request
7
+ from urllib.parse import quote, urljoin
5
8
 
6
9
  from pandas import read_parquet
7
10
 
8
- from .dataset import Dataset
9
- from .schema import EmbeddingColumnNames, Schema
11
+ from phoenix.config import DATASET_DIR
12
+ from phoenix.core.model_schema import DatasetRole
13
+ from phoenix.datasets.dataset import Dataset
14
+ from phoenix.datasets.schema import EmbeddingColumnNames, Schema
10
15
 
11
16
  logger = logging.getLogger(__name__)
12
17
 
@@ -15,13 +20,22 @@ logger = logging.getLogger(__name__)
15
20
  class Fixture:
16
21
  name: str
17
22
  description: str
18
- primary_dataset_url: str
19
- reference_dataset_url: Optional[str]
23
+ prefix: str
24
+ primary_file_name: str
25
+ reference_file_name: Optional[str]
20
26
  primary_schema: Schema
21
27
  reference_schema: Schema
22
28
 
29
+ def paths(self) -> Iterator[Tuple[DatasetRole, Path]]:
30
+ return (
31
+ (role, Path(self.prefix) / name)
32
+ for role, name in zip(
33
+ DatasetRole,
34
+ (self.primary_file_name, self.reference_file_name),
35
+ )
36
+ if name
37
+ )
23
38
 
24
- FIXTURE_URL_PREFIX = "http://storage.googleapis.com/arize-assets/phoenix/datasets/"
25
39
 
26
40
  sentiment_classification_language_drift_schema = Schema(
27
41
  prediction_id_column_name="prediction_id",
@@ -53,16 +67,9 @@ sentiment_classification_language_drift_fixture = Fixture(
53
67
  """,
54
68
  primary_schema=sentiment_classification_language_drift_schema,
55
69
  reference_schema=sentiment_classification_language_drift_schema,
56
- primary_dataset_url=os.path.join(
57
- FIXTURE_URL_PREFIX,
58
- "unstructured/nlp/sentiment-classification-language-drift",
59
- "sentiment_classification_language_drift_production.parquet",
60
- ),
61
- reference_dataset_url=os.path.join(
62
- FIXTURE_URL_PREFIX,
63
- "unstructured/nlp/sentiment-classification-language-drift",
64
- "sentiment_classification_language_drift_training.parquet",
65
- ),
70
+ prefix="unstructured/nlp/sentiment-classification-language-drift",
71
+ primary_file_name="sentiment_classification_language_drift_production.parquet",
72
+ reference_file_name="sentiment_classification_language_drift_training.parquet",
66
73
  )
67
74
 
68
75
  image_classification_schema = Schema(
@@ -86,12 +93,9 @@ image_classification_fixture = Fixture(
86
93
  """,
87
94
  primary_schema=replace(image_classification_schema, actual_label_column_name=None),
88
95
  reference_schema=image_classification_schema,
89
- primary_dataset_url=os.path.join(
90
- FIXTURE_URL_PREFIX, "unstructured/cv/human-actions/human_actions_production.parquet"
91
- ),
92
- reference_dataset_url=os.path.join(
93
- FIXTURE_URL_PREFIX, "unstructured/cv/human-actions/human_actions_training.parquet"
94
- ),
96
+ prefix="unstructured/cv/human-actions",
97
+ primary_file_name="human_actions_production.parquet",
98
+ reference_file_name="human_actions_training.parquet",
95
99
  )
96
100
 
97
101
  fashion_mnist_primary_schema = Schema(
@@ -117,14 +121,9 @@ fashion_mnist_fixture = Fixture(
117
121
  """,
118
122
  primary_schema=fashion_mnist_primary_schema,
119
123
  reference_schema=fashion_mnist_reference_schema,
120
- primary_dataset_url=os.path.join(
121
- FIXTURE_URL_PREFIX,
122
- "unstructured/cv/fashion-mnist/fashion_mnist_production.parquet",
123
- ),
124
- reference_dataset_url=os.path.join(
125
- FIXTURE_URL_PREFIX,
126
- "unstructured/cv/fashion-mnist/fashion_mnist_train.parquet",
127
- ),
124
+ prefix="unstructured/cv/fashion-mnist",
125
+ primary_file_name="fashion_mnist_production.parquet",
126
+ reference_file_name="fashion_mnist_train.parquet",
128
127
  )
129
128
 
130
129
  ner_token_drift_schema = Schema(
@@ -156,14 +155,9 @@ ner_token_drift_fixture = Fixture(
156
155
  """,
157
156
  primary_schema=ner_token_drift_schema,
158
157
  reference_schema=ner_token_drift_schema,
159
- primary_dataset_url=os.path.join(
160
- FIXTURE_URL_PREFIX,
161
- "unstructured/nlp/named-entity-recognition/ner_token_drift_production.parquet",
162
- ),
163
- reference_dataset_url=os.path.join(
164
- FIXTURE_URL_PREFIX,
165
- "unstructured/nlp/named-entity-recognition/ner_token_drift_train.parquet",
166
- ),
158
+ prefix="unstructured/nlp/named-entity-recognition",
159
+ primary_file_name="ner_token_drift_production.parquet",
160
+ reference_file_name="ner_token_drift_train.parquet",
167
161
  )
168
162
 
169
163
  credit_card_fraud_schema = Schema(
@@ -194,13 +188,9 @@ credit_card_fraud_fixture = Fixture(
194
188
  """,
195
189
  primary_schema=credit_card_fraud_schema,
196
190
  reference_schema=credit_card_fraud_schema,
197
- primary_dataset_url=os.path.join(
198
- FIXTURE_URL_PREFIX, "structured/credit-card-fraud/credit_card_fraud_production.parquet"
199
- ),
200
- reference_dataset_url=os.path.join(
201
- FIXTURE_URL_PREFIX,
202
- "structured/credit-card-fraud/credit_card_fraud_train.parquet",
203
- ),
191
+ prefix="structured/credit-card-fraud",
192
+ primary_file_name="credit_card_fraud_production.parquet",
193
+ reference_file_name="credit_card_fraud_train.parquet",
204
194
  )
205
195
 
206
196
  click_through_rate_schema = Schema(
@@ -228,12 +218,9 @@ click_through_rate_fixture = Fixture(
228
218
  """,
229
219
  primary_schema=click_through_rate_schema,
230
220
  reference_schema=click_through_rate_schema,
231
- primary_dataset_url=os.path.join(
232
- FIXTURE_URL_PREFIX, "structured/click-through-rate/click_through_rate_production.parquet"
233
- ),
234
- reference_dataset_url=os.path.join(
235
- FIXTURE_URL_PREFIX, "structured/click-through-rate/click_through_rate_train.parquet"
236
- ),
221
+ prefix="structured/click-through-rate",
222
+ primary_file_name="click_through_rate_production.parquet",
223
+ reference_file_name="click_through_rate_train.parquet",
237
224
  )
238
225
 
239
226
  wide_data_primary_schema = Schema(
@@ -250,14 +237,9 @@ wide_data_fixture = Fixture(
250
237
  """,
251
238
  primary_schema=wide_data_primary_schema,
252
239
  reference_schema=wide_data_reference_schema,
253
- primary_dataset_url=os.path.join(
254
- FIXTURE_URL_PREFIX,
255
- "structured/wide-data/wide_data_production.parquet",
256
- ),
257
- reference_dataset_url=os.path.join(
258
- FIXTURE_URL_PREFIX,
259
- "structured/wide-data/wide_data_train.parquet",
260
- ),
240
+ prefix="structured/wide-data",
241
+ primary_file_name="wide_data_production.parquet",
242
+ reference_file_name="wide_data_train.parquet",
261
243
  )
262
244
 
263
245
  deep_data_primary_schema = Schema(
@@ -274,14 +256,9 @@ deep_data_fixture = Fixture(
274
256
  """,
275
257
  primary_schema=deep_data_primary_schema,
276
258
  reference_schema=deep_data_reference_schema,
277
- primary_dataset_url=os.path.join(
278
- FIXTURE_URL_PREFIX,
279
- "structured/deep-data/deep_data_production.parquet",
280
- ),
281
- reference_dataset_url=os.path.join(
282
- FIXTURE_URL_PREFIX,
283
- "structured/deep-data/deep_data_train.parquet",
284
- ),
259
+ prefix="structured/deep-data",
260
+ primary_file_name="deep_data_production.parquet",
261
+ reference_file_name="deep_data_train.parquet",
285
262
  )
286
263
 
287
264
 
@@ -305,14 +282,9 @@ llm_summarization_fixture = Fixture(
305
282
  """,
306
283
  primary_schema=llm_summarization_schema,
307
284
  reference_schema=llm_summarization_schema,
308
- primary_dataset_url=os.path.join(
309
- FIXTURE_URL_PREFIX,
310
- "unstructured/llm/summarization/llm_summarization_prod.parquet",
311
- ),
312
- reference_dataset_url=os.path.join(
313
- FIXTURE_URL_PREFIX,
314
- "unstructured/llm/summarization/llm_summarization_baseline.parquet",
315
- ),
285
+ prefix="unstructured/llm/summarization",
286
+ primary_file_name="llm_summarization_prod.parquet",
287
+ reference_file_name="llm_summarization_baseline.parquet",
316
288
  )
317
289
 
318
290
  FIXTURES: Tuple[Fixture, ...] = (
@@ -329,23 +301,30 @@ FIXTURES: Tuple[Fixture, ...] = (
329
301
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
330
302
 
331
303
 
332
- def download_fixture_if_missing(fixture_name: str) -> Tuple[Dataset, Optional[Dataset]]:
304
+ def get_datasets(
305
+ fixture_name: str,
306
+ no_internet: bool = False,
307
+ ) -> Tuple[Dataset, Optional[Dataset]]:
333
308
  """
334
309
  Downloads primary and reference datasets for a fixture if they are not found
335
310
  locally.
336
311
  """
337
312
  fixture = _get_fixture_by_name(fixture_name=fixture_name)
338
- primary_dataset = _download_dataset_if_missing(
339
- dataset_name="production",
340
- dataset_url=fixture.primary_dataset_url,
341
- schema=fixture.primary_schema,
313
+ if no_internet:
314
+ paths = {role: DATASET_DIR / path for role, path in fixture.paths()}
315
+ else:
316
+ paths = dict(_download(fixture, DATASET_DIR))
317
+ primary_dataset = Dataset(
318
+ read_parquet(paths[DatasetRole.PRIMARY]),
319
+ fixture.primary_schema,
320
+ "production",
342
321
  )
343
322
  reference_dataset = None
344
- if fixture.reference_dataset_url is not None:
345
- reference_dataset = _download_dataset_if_missing(
346
- dataset_name="training",
347
- dataset_url=fixture.reference_dataset_url,
348
- schema=fixture.reference_schema,
323
+ if fixture.reference_file_name is not None:
324
+ reference_dataset = Dataset(
325
+ read_parquet(paths[DatasetRole.REFERENCE]),
326
+ fixture.reference_schema,
327
+ "training",
349
328
  )
350
329
  return primary_dataset, reference_dataset
351
330
 
@@ -361,25 +340,6 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
361
340
  return NAME_TO_FIXTURE[fixture_name]
362
341
 
363
342
 
364
- def _download_dataset_if_missing(dataset_name: str, dataset_url: str, schema: Schema) -> Dataset:
365
- """
366
- Downloads a dataset from the given URL if it is not found locally.
367
- """
368
- try:
369
- return Dataset.from_name(dataset_name)
370
- except FileNotFoundError:
371
- pass
372
-
373
- logger.info(f'Downloading dataset: "{dataset_name}"')
374
- dataset = Dataset(
375
- dataframe=read_parquet(dataset_url),
376
- schema=schema,
377
- name=dataset_name,
378
- )
379
- logger.info("Download complete.")
380
- return dataset
381
-
382
-
383
343
  @dataclass
384
344
  class ExampleDatasets:
385
345
  """
@@ -414,8 +374,61 @@ def load_example(use_case: str) -> ExampleDatasets:
414
374
 
415
375
  """
416
376
  fixture = _get_fixture_by_name(use_case)
417
- primary_dataset, reference_dataset = download_fixture_if_missing(use_case)
377
+ primary_dataset, reference_dataset = get_datasets(use_case)
418
378
  print(f"📥 Loaded {use_case} example datasets.")
419
379
  print("ℹ️ About this use-case:")
420
380
  print(fixture.description)
421
381
  return ExampleDatasets(primary=primary_dataset, reference=reference_dataset)
382
+
383
+
384
+ class Metadata(NamedTuple):
385
+ path: str
386
+ mediaLink: str
387
+ md5Hash: str
388
+
389
+ def save_artifact(self, location: Path) -> Path:
390
+ data_file_path = location / self.path
391
+ md5_file = data_file_path.with_name(data_file_path.stem + ".md5")
392
+ data_file_path.parents[0].mkdir(parents=True, exist_ok=True)
393
+ if data_file_path.is_file() and md5_file.is_file():
394
+ with open(md5_file, "r") as f:
395
+ if f.readline() == self.md5Hash:
396
+ return data_file_path
397
+ request.urlretrieve(self.mediaLink, data_file_path)
398
+ with open(md5_file, "w") as f:
399
+ f.write(self.md5Hash)
400
+ return data_file_path
401
+
402
+
403
+ class GCSAssets(NamedTuple):
404
+ host: str = "https://storage.googleapis.com/"
405
+ bucket: str = "arize-assets"
406
+ prefix: str = "phoenix/datasets/"
407
+
408
+ def metadata(self, path: Path) -> Metadata:
409
+ url = urljoin(
410
+ urljoin(self.host, f"storage/v1/b/{self.bucket}/o/"),
411
+ quote(urljoin(self.prefix, str(path)), safe=""),
412
+ )
413
+ resp = json.loads(request.urlopen(request.Request(url)).read())
414
+ return Metadata(
415
+ resp["name"][len(self.prefix) :],
416
+ resp["mediaLink"],
417
+ resp["md5Hash"],
418
+ )
419
+
420
+
421
+ def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[DatasetRole, Path]]:
422
+ for role, path in fixture.paths():
423
+ yield role, GCSAssets().metadata(path).save_artifact(location)
424
+
425
+
426
+ # Download all fixtures
427
+ if __name__ == "__main__":
428
+ import time
429
+
430
+ for fixture in FIXTURES:
431
+ start_time = time.time()
432
+ print(f"getting {fixture.name}", end="...")
433
+ dict(_download(fixture, DATASET_DIR))
434
+ print(f"done ({time.time() - start_time:.2f}s)")
@@ -59,7 +59,7 @@ class Mean(UnaryOperator, BaseMetric):
59
59
  def calc(self, dataframe: pd.DataFrame) -> float:
60
60
  data = self.get_operand_column(dataframe)
61
61
  numeric_data = pd.to_numeric(data, errors="coerce")
62
- return cast(float, numeric_data.mean())
62
+ return numeric_data.mean()
63
63
 
64
64
 
65
65
  @dataclass
@@ -100,7 +100,7 @@ class Cardinality(UnaryOperator, BaseMetric):
100
100
  class PercentEmpty(UnaryOperator, BaseMetric):
101
101
  def calc(self, dataframe: pd.DataFrame) -> float:
102
102
  data = self.get_operand_column(dataframe)
103
- return cast(float, data.isna().mean() * 100)
103
+ return data.isna().mean() * 100
104
104
 
105
105
 
106
106
  @dataclass
@@ -1,4 +1,4 @@
1
- from datetime import datetime, timedelta
1
+ from datetime import datetime, timedelta, timezone
2
2
  from functools import partial
3
3
  from itertools import accumulate, repeat
4
4
  from typing import Any, Callable, Iterable, Iterator, Tuple, cast
@@ -195,9 +195,15 @@ def _results(
195
195
  # pandas row indexing is stop-exclusive
196
196
  row_slice = slice(row_start, row_stop)
197
197
  filtered = dataframe.iloc[row_slice, :]
198
- yield filtered.groupby(
198
+ res = filtered.groupby(
199
199
  group,
200
200
  group_keys=True,
201
201
  ).apply(
202
202
  calculate_metrics,
203
- ).loc[result_slice, :]
203
+ )
204
+
205
+ # NB: on ubuntu, we lose the timezone information when there is no data
206
+ if res.index.tzinfo is None: # type: ignore
207
+ res = res.set_axis(res.index.tz_localize(timezone.utc), axis=0) # type: ignore
208
+
209
+ yield res.loc[result_slice, :]
@@ -1,12 +1,28 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
 
3
3
  import strawberry
4
4
 
5
5
 
6
6
  @strawberry.input
7
7
  class TimeRange:
8
- start: datetime = strawberry.field(description="The start of the time range")
9
- end: datetime = strawberry.field(description="The end of the time range. Right exclusive.")
8
+ start: datetime = strawberry.field(
9
+ description="The start of the time range",
10
+ )
11
+ end: datetime = strawberry.field(
12
+ description="The end of the time range. Right exclusive.",
13
+ )
14
+
15
+ def __post_init__(self) -> None:
16
+ setattr(
17
+ self,
18
+ "start",
19
+ self.start.astimezone(timezone.utc),
20
+ )
21
+ setattr(
22
+ self,
23
+ "end",
24
+ self.end.astimezone(timezone.utc),
25
+ )
10
26
 
11
27
  def is_valid(self) -> bool:
12
28
  return self.start < self.end
@@ -0,0 +1,11 @@
1
+ from enum import Enum
2
+
3
+ import strawberry
4
+
5
+ from phoenix.core.model_schema import PRIMARY, REFERENCE
6
+
7
+
8
+ @strawberry.enum
9
+ class DatasetRole(Enum):
10
+ primary = PRIMARY
11
+ reference = REFERENCE