flytekitplugins-xarray-zarr 1.16.0b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: flytekitplugins-xarray-zarr
3
+ Version: 1.16.0b5
4
+ Summary: Xarray Zarr plugin for flytekit
5
+ Author: flyteorg
6
+ Author-email: admin@flyte.org
7
+ License: apache2
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: dask[distributed]>=2022.10.2
20
+ Requires-Dist: flytekit<2.0.0,>=1.3.0b2
21
+ Requires-Dist: xarray
22
+ Requires-Dist: zarr
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: license
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
@@ -0,0 +1,48 @@
1
+ # Flytekit Xarray Zarr Plugin
2
+ The Xarray Zarr plugin adds support to persist xarray datasets and dataarrays to zarr between tasks. If a dask cluster is present (see flytekitplugins-dask), it will attempt to connect to the distributed client before we call `.to_zarr(url)` call. This prevents the need to explicitly connect to a distributed client within the task.
3
+
4
+ If deck is enabled, we also render the datasets/dataarrays to html.
5
+
6
+ To install the plugin, run the following command:
7
+
8
+ ```bash
9
+ pip install flytekitplugins-xarray-zarr
10
+ ```
11
+
12
+ ## Example
13
+
14
+ ```python
15
+ import dask.array as da
16
+ import xarray as xr
17
+ from flytekit import task, workflow
18
+ from flytekitplugins.dask import Dask, WorkerGroup
19
+
20
+
21
+ @task(
22
+ task_config=Dask(workers=WorkerGroup(number_of_workers=6)),
23
+ enable_deck=True,
24
+ )
25
+ def generate_xarray_task() -> xr.Dataset:
26
+ return xr.Dataset(
27
+ {
28
+ "variable": (
29
+ ("time", "x", "y"),
30
+ da.random.uniform(size=(1024, 1024, 1024)),
31
+ )
32
+ },
33
+ )
34
+
35
+
36
+ @task(
37
+ task_config=Dask(workers=WorkerGroup(number_of_workers=6)),
38
+ enable_deck=True,
39
+ )
40
+ def preprocess_xarray_task(ds: xr.Dataset) -> xr.Dataset:
41
+ return ds * 2
42
+
43
+
44
+ @workflow
45
+ def xarray_workflow() -> xr.Dataset:
46
+ ds = generate_xarray_task()
47
+ return preprocess_xarray_task(ds=ds)
48
+ ```
@@ -0,0 +1,14 @@
1
+ """
2
+ .. currentmodule:: flytekitplugins.xarray
3
+
4
+ This package contains things that are useful when extending Flytekit.
5
+
6
+ .. autosummary::
7
+ :template: custom.rst
8
+ :toctree: generated/
9
+
10
+ XarrayDaZarrTypeTransformer
11
+ XarrayZarrTypeTransformer
12
+ """
13
+
14
+ from .xarray_transformers import XarrayDaZarrTypeTransformer, XarrayZarrTypeTransformer
@@ -0,0 +1,102 @@
1
+ import typing
2
+
3
+ import dask.distributed as dd
4
+
5
+ import xarray as xr
6
+ from flytekit import (
7
+ Blob,
8
+ BlobMetadata,
9
+ BlobType,
10
+ FlyteContext,
11
+ Literal,
12
+ LiteralType,
13
+ Scalar,
14
+ )
15
+ from flytekit.extend import TypeEngine, TypeTransformer
16
+
17
+
18
+ class XarrayZarrTypeTransformer(TypeTransformer[xr.Dataset]):
19
+ _TYPE_INFO = BlobType(format="binary", dimensionality=BlobType.BlobDimensionality.MULTIPART)
20
+
21
+ def __init__(self) -> None:
22
+ super().__init__(name="Xarray Dataset", t=xr.Dataset)
23
+
24
+ def get_literal_type(self, t: typing.Type[xr.Dataset]) -> LiteralType:
25
+ return LiteralType(blob=self._TYPE_INFO)
26
+
27
+ def to_literal(
28
+ self,
29
+ ctx: FlyteContext,
30
+ python_val: xr.Dataset,
31
+ python_type: typing.Type[xr.Dataset],
32
+ expected: LiteralType,
33
+ ) -> Literal:
34
+ remote_dir = ctx.file_access.get_random_remote_path("data.zarr")
35
+ # Opening with the dask client will attach the client eliminating the
36
+ # need for users to connect to the client if a task tasks a xr.Dataset
37
+ # type.
38
+ with dd.Client(timeout=120):
39
+ python_val.to_zarr(remote_dir, mode="w")
40
+ return Literal(scalar=Scalar(blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))))
41
+
42
+ def to_python_value(
43
+ self,
44
+ ctx: FlyteContext,
45
+ lv: Literal,
46
+ expected_python_type: typing.Type[xr.Dataset],
47
+ ) -> xr.Dataset:
48
+ return xr.open_zarr(lv.scalar.blob.uri)
49
+
50
+ def to_html(
51
+ self,
52
+ ctx: FlyteContext,
53
+ python_val: xr.Dataset,
54
+ expected_python_type: LiteralType,
55
+ ) -> str:
56
+ return python_val._repr_html_()
57
+
58
+
59
+ class XarrayDaZarrTypeTransformer(TypeTransformer[xr.DataArray]):
60
+ _TYPE_INFO = BlobType(format="binary", dimensionality=BlobType.BlobDimensionality.MULTIPART)
61
+
62
+ def __init__(self) -> None:
63
+ super().__init__(name="Xarray DataArray", t=xr.DataArray)
64
+
65
+ def get_literal_type(self, t: typing.Type[xr.DataArray]) -> LiteralType:
66
+ return LiteralType(blob=self._TYPE_INFO)
67
+
68
+ def to_literal(
69
+ self,
70
+ ctx: FlyteContext,
71
+ python_val: xr.DataArray,
72
+ python_type: typing.Type[xr.DataArray],
73
+ expected: LiteralType,
74
+ ) -> Literal:
75
+ remote_dir = ctx.file_access.get_random_remote_path("data.zarr")
76
+ # Opening with the dask client will attach the client eliminating the
77
+ # need for users to connect to the client if a task tasks a xr.Dataset
78
+ # type.
79
+ with dd.Client(timeout=120):
80
+ python_val.to_zarr(remote_dir, mode="w")
81
+ return Literal(scalar=Scalar(blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))))
82
+
83
+ def to_python_value(
84
+ self,
85
+ ctx: FlyteContext,
86
+ lv: Literal,
87
+ expected_python_type: typing.Type[xr.DataArray],
88
+ ) -> xr.DataArray:
89
+ # xr.open_zarr always opens a dataset, so we take the first variable
90
+ return list(xr.open_zarr(lv.scalar.blob.uri).data_vars.values())[0]
91
+
92
+ def to_html(
93
+ self,
94
+ ctx: FlyteContext,
95
+ python_val: xr.DataArray,
96
+ expected_python_type: LiteralType,
97
+ ) -> str:
98
+ return python_val._repr_html_()
99
+
100
+
101
+ TypeEngine.register(XarrayZarrTypeTransformer())
102
+ TypeEngine.register(XarrayDaZarrTypeTransformer())
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: flytekitplugins-xarray-zarr
3
+ Version: 1.16.0b5
4
+ Summary: Xarray Zarr plugin for flytekit
5
+ Author: flyteorg
6
+ Author-email: admin@flyte.org
7
+ License: apache2
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: dask[distributed]>=2022.10.2
20
+ Requires-Dist: flytekit<2.0.0,>=1.3.0b2
21
+ Requires-Dist: xarray
22
+ Requires-Dist: zarr
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: license
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
@@ -0,0 +1,12 @@
1
+ README.md
2
+ setup.py
3
+ flytekitplugins/xarray/__init__.py
4
+ flytekitplugins/xarray/xarray_transformers.py
5
+ flytekitplugins_xarray_zarr.egg-info/PKG-INFO
6
+ flytekitplugins_xarray_zarr.egg-info/SOURCES.txt
7
+ flytekitplugins_xarray_zarr.egg-info/dependency_links.txt
8
+ flytekitplugins_xarray_zarr.egg-info/entry_points.txt
9
+ flytekitplugins_xarray_zarr.egg-info/namespace_packages.txt
10
+ flytekitplugins_xarray_zarr.egg-info/requires.txt
11
+ flytekitplugins_xarray_zarr.egg-info/top_level.txt
12
+ tests/test_xarray_zarr_plugin.py
@@ -0,0 +1,2 @@
1
+ [flytekit.plugins]
2
+ xarray = flytekitplugins.xarray
@@ -0,0 +1,4 @@
1
+ dask[distributed]>=2022.10.2
2
+ flytekit<2.0.0,>=1.3.0b2
3
+ xarray
4
+ zarr
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,40 @@
1
+ from setuptools import setup
2
+
3
+ PLUGIN_NAME = "xarray"
4
+
5
+ microlib_name = f"flytekitplugins-{PLUGIN_NAME}-zarr"
6
+
7
+ plugin_requires = [
8
+ "dask[distributed]>=2022.10.2",
9
+ "flytekit>=1.3.0b2,<2.0.0",
10
+ "xarray",
11
+ "zarr",
12
+ ]
13
+
14
+ __version__ = "1.16.0b5"
15
+
16
+ setup(
17
+ name=microlib_name,
18
+ version=__version__,
19
+ author="flyteorg",
20
+ author_email="admin@flyte.org",
21
+ description="Xarray Zarr plugin for flytekit",
22
+ namespace_packages=["flytekitplugins"],
23
+ packages=[f"flytekitplugins.{PLUGIN_NAME}"],
24
+ install_requires=plugin_requires,
25
+ license="apache2",
26
+ python_requires=">=3.9",
27
+ classifiers=[
28
+ "Intended Audience :: Science/Research",
29
+ "Intended Audience :: Developers",
30
+ "License :: OSI Approved :: Apache Software License",
31
+ "Programming Language :: Python :: 3.9",
32
+ "Programming Language :: Python :: 3.10",
33
+ "Topic :: Scientific/Engineering",
34
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
35
+ "Topic :: Software Development",
36
+ "Topic :: Software Development :: Libraries",
37
+ "Topic :: Software Development :: Libraries :: Python Modules",
38
+ ],
39
+ entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]},
40
+ )
@@ -0,0 +1,48 @@
1
+ from flytekit import task, workflow
2
+ import numpy as np
3
+ import dask.array as da
4
+ import xarray as xr
5
+
6
+
7
+ def _sample_dataset() -> xr.Dataset:
8
+ return xr.Dataset(
9
+ {"test": (("x", "y"), da.random.uniform(size=(32, 32)))},
10
+ )
11
+
12
+
13
+ def test_xarray_zarr_dataarray_plugin():
14
+
15
+ @task
16
+ def _generate_xarray() -> xr.DataArray:
17
+ return _sample_dataset()["test"]
18
+
19
+ @task
20
+ def _consume_xarray(ds: xr.DataArray) -> xr.DataArray:
21
+ return ds
22
+
23
+ @workflow
24
+ def _xarray_wf() -> xr.DataArray:
25
+ ds = _generate_xarray()
26
+ return _consume_xarray(ds=ds)
27
+
28
+ array = _xarray_wf()
29
+ assert isinstance(array, xr.DataArray)
30
+
31
+
32
+ def test_xarray_zarr_dataset_plugin():
33
+
34
+ @task
35
+ def _generate_xarray() -> xr.Dataset:
36
+ return _sample_dataset()
37
+
38
+ @task
39
+ def _consume_xarray(ds: xr.Dataset) -> xr.Dataset:
40
+ return ds
41
+
42
+ @workflow
43
+ def _xarray_wf() -> xr.Dataset:
44
+ ds = _generate_xarray()
45
+ return _consume_xarray(ds=ds)
46
+
47
+ array = _xarray_wf()
48
+ assert isinstance(array, xr.Dataset)