flytekitplugins-xarray-zarr 1.16.0b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flytekitplugins_xarray_zarr-1.16.0b5/PKG-INFO +29 -0
- flytekitplugins_xarray_zarr-1.16.0b5/README.md +48 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins/xarray/__init__.py +14 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins/xarray/xarray_transformers.py +102 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/PKG-INFO +29 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/SOURCES.txt +12 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/dependency_links.txt +1 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/entry_points.txt +2 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/namespace_packages.txt +1 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/requires.txt +4 -0
- flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/top_level.txt +1 -0
- flytekitplugins_xarray_zarr-1.16.0b5/setup.cfg +4 -0
- flytekitplugins_xarray_zarr-1.16.0b5/setup.py +40 -0
- flytekitplugins_xarray_zarr-1.16.0b5/tests/test_xarray_zarr_plugin.py +48 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flytekitplugins-xarray-zarr
|
|
3
|
+
Version: 1.16.0b5
|
|
4
|
+
Summary: Xarray Zarr plugin for flytekit
|
|
5
|
+
Author: flyteorg
|
|
6
|
+
Author-email: admin@flyte.org
|
|
7
|
+
License: apache2
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Topic :: Software Development
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: dask[distributed]>=2022.10.2
|
|
20
|
+
Requires-Dist: flytekit<2.0.0,>=1.3.0b2
|
|
21
|
+
Requires-Dist: xarray
|
|
22
|
+
Requires-Dist: zarr
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: license
|
|
27
|
+
Dynamic: requires-dist
|
|
28
|
+
Dynamic: requires-python
|
|
29
|
+
Dynamic: summary
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Flytekit Xarray Zarr Plugin
|
|
2
|
+
The Xarray Zarr plugin adds support to persist xarray datasets and dataarrays to zarr between tasks. If a dask cluster is present (see flytekitplugins-dask), it will attempt to connect to the distributed client before we call `.to_zarr(url)` call. This prevents the need to explicitly connect to a distributed client within the task.
|
|
3
|
+
|
|
4
|
+
If deck is enabled, we also render the datasets/dataarrays to html.
|
|
5
|
+
|
|
6
|
+
To install the plugin, run the following command:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install flytekitplugins-xarray-zarr
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Example
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
import dask.array as da
|
|
16
|
+
import xarray as xr
|
|
17
|
+
from flytekit import task, workflow
|
|
18
|
+
from flytekitplugins.dask import Dask, WorkerGroup
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@task(
|
|
22
|
+
task_config=Dask(workers=WorkerGroup(number_of_workers=6)),
|
|
23
|
+
enable_deck=True,
|
|
24
|
+
)
|
|
25
|
+
def generate_xarray_task() -> xr.Dataset:
|
|
26
|
+
return xr.Dataset(
|
|
27
|
+
{
|
|
28
|
+
"variable": (
|
|
29
|
+
("time", "x", "y"),
|
|
30
|
+
da.random.uniform(size=(1024, 1024, 1024)),
|
|
31
|
+
)
|
|
32
|
+
},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@task(
|
|
37
|
+
task_config=Dask(workers=WorkerGroup(number_of_workers=6)),
|
|
38
|
+
enable_deck=True,
|
|
39
|
+
)
|
|
40
|
+
def preprocess_xarray_task(ds: xr.Dataset) -> xr.Dataset:
|
|
41
|
+
return ds * 2
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@workflow
|
|
45
|
+
def xarray_workflow() -> xr.Dataset:
|
|
46
|
+
ds = generate_xarray_task()
|
|
47
|
+
return preprocess_xarray_task(ds=ds)
|
|
48
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. currentmodule:: flytekitplugins.xarray
|
|
3
|
+
|
|
4
|
+
This package contains things that are useful when extending Flytekit.
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:template: custom.rst
|
|
8
|
+
:toctree: generated/
|
|
9
|
+
|
|
10
|
+
XarrayDaZarrTypeTransformer
|
|
11
|
+
XarrayZarrTypeTransformer
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .xarray_transformers import XarrayDaZarrTypeTransformer, XarrayZarrTypeTransformer
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
import dask.distributed as dd
|
|
4
|
+
|
|
5
|
+
import xarray as xr
|
|
6
|
+
from flytekit import (
|
|
7
|
+
Blob,
|
|
8
|
+
BlobMetadata,
|
|
9
|
+
BlobType,
|
|
10
|
+
FlyteContext,
|
|
11
|
+
Literal,
|
|
12
|
+
LiteralType,
|
|
13
|
+
Scalar,
|
|
14
|
+
)
|
|
15
|
+
from flytekit.extend import TypeEngine, TypeTransformer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class XarrayZarrTypeTransformer(TypeTransformer[xr.Dataset]):
|
|
19
|
+
_TYPE_INFO = BlobType(format="binary", dimensionality=BlobType.BlobDimensionality.MULTIPART)
|
|
20
|
+
|
|
21
|
+
def __init__(self) -> None:
|
|
22
|
+
super().__init__(name="Xarray Dataset", t=xr.Dataset)
|
|
23
|
+
|
|
24
|
+
def get_literal_type(self, t: typing.Type[xr.Dataset]) -> LiteralType:
|
|
25
|
+
return LiteralType(blob=self._TYPE_INFO)
|
|
26
|
+
|
|
27
|
+
def to_literal(
|
|
28
|
+
self,
|
|
29
|
+
ctx: FlyteContext,
|
|
30
|
+
python_val: xr.Dataset,
|
|
31
|
+
python_type: typing.Type[xr.Dataset],
|
|
32
|
+
expected: LiteralType,
|
|
33
|
+
) -> Literal:
|
|
34
|
+
remote_dir = ctx.file_access.get_random_remote_path("data.zarr")
|
|
35
|
+
# Opening with the dask client will attach the client eliminating the
|
|
36
|
+
# need for users to connect to the client if a task tasks a xr.Dataset
|
|
37
|
+
# type.
|
|
38
|
+
with dd.Client(timeout=120):
|
|
39
|
+
python_val.to_zarr(remote_dir, mode="w")
|
|
40
|
+
return Literal(scalar=Scalar(blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))))
|
|
41
|
+
|
|
42
|
+
def to_python_value(
|
|
43
|
+
self,
|
|
44
|
+
ctx: FlyteContext,
|
|
45
|
+
lv: Literal,
|
|
46
|
+
expected_python_type: typing.Type[xr.Dataset],
|
|
47
|
+
) -> xr.Dataset:
|
|
48
|
+
return xr.open_zarr(lv.scalar.blob.uri)
|
|
49
|
+
|
|
50
|
+
def to_html(
|
|
51
|
+
self,
|
|
52
|
+
ctx: FlyteContext,
|
|
53
|
+
python_val: xr.Dataset,
|
|
54
|
+
expected_python_type: LiteralType,
|
|
55
|
+
) -> str:
|
|
56
|
+
return python_val._repr_html_()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class XarrayDaZarrTypeTransformer(TypeTransformer[xr.DataArray]):
|
|
60
|
+
_TYPE_INFO = BlobType(format="binary", dimensionality=BlobType.BlobDimensionality.MULTIPART)
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
super().__init__(name="Xarray DataArray", t=xr.DataArray)
|
|
64
|
+
|
|
65
|
+
def get_literal_type(self, t: typing.Type[xr.DataArray]) -> LiteralType:
|
|
66
|
+
return LiteralType(blob=self._TYPE_INFO)
|
|
67
|
+
|
|
68
|
+
def to_literal(
|
|
69
|
+
self,
|
|
70
|
+
ctx: FlyteContext,
|
|
71
|
+
python_val: xr.DataArray,
|
|
72
|
+
python_type: typing.Type[xr.DataArray],
|
|
73
|
+
expected: LiteralType,
|
|
74
|
+
) -> Literal:
|
|
75
|
+
remote_dir = ctx.file_access.get_random_remote_path("data.zarr")
|
|
76
|
+
# Opening with the dask client will attach the client eliminating the
|
|
77
|
+
# need for users to connect to the client if a task tasks a xr.Dataset
|
|
78
|
+
# type.
|
|
79
|
+
with dd.Client(timeout=120):
|
|
80
|
+
python_val.to_zarr(remote_dir, mode="w")
|
|
81
|
+
return Literal(scalar=Scalar(blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))))
|
|
82
|
+
|
|
83
|
+
def to_python_value(
|
|
84
|
+
self,
|
|
85
|
+
ctx: FlyteContext,
|
|
86
|
+
lv: Literal,
|
|
87
|
+
expected_python_type: typing.Type[xr.DataArray],
|
|
88
|
+
) -> xr.DataArray:
|
|
89
|
+
# xr.open_zarr always opens a dataset, so we take the first variable
|
|
90
|
+
return list(xr.open_zarr(lv.scalar.blob.uri).data_vars.values())[0]
|
|
91
|
+
|
|
92
|
+
def to_html(
|
|
93
|
+
self,
|
|
94
|
+
ctx: FlyteContext,
|
|
95
|
+
python_val: xr.DataArray,
|
|
96
|
+
expected_python_type: LiteralType,
|
|
97
|
+
) -> str:
|
|
98
|
+
return python_val._repr_html_()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
TypeEngine.register(XarrayZarrTypeTransformer())
|
|
102
|
+
TypeEngine.register(XarrayDaZarrTypeTransformer())
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flytekitplugins-xarray-zarr
|
|
3
|
+
Version: 1.16.0b5
|
|
4
|
+
Summary: Xarray Zarr plugin for flytekit
|
|
5
|
+
Author: flyteorg
|
|
6
|
+
Author-email: admin@flyte.org
|
|
7
|
+
License: apache2
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Topic :: Software Development
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: dask[distributed]>=2022.10.2
|
|
20
|
+
Requires-Dist: flytekit<2.0.0,>=1.3.0b2
|
|
21
|
+
Requires-Dist: xarray
|
|
22
|
+
Requires-Dist: zarr
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: license
|
|
27
|
+
Dynamic: requires-dist
|
|
28
|
+
Dynamic: requires-python
|
|
29
|
+
Dynamic: summary
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
flytekitplugins/xarray/__init__.py
|
|
4
|
+
flytekitplugins/xarray/xarray_transformers.py
|
|
5
|
+
flytekitplugins_xarray_zarr.egg-info/PKG-INFO
|
|
6
|
+
flytekitplugins_xarray_zarr.egg-info/SOURCES.txt
|
|
7
|
+
flytekitplugins_xarray_zarr.egg-info/dependency_links.txt
|
|
8
|
+
flytekitplugins_xarray_zarr.egg-info/entry_points.txt
|
|
9
|
+
flytekitplugins_xarray_zarr.egg-info/namespace_packages.txt
|
|
10
|
+
flytekitplugins_xarray_zarr.egg-info/requires.txt
|
|
11
|
+
flytekitplugins_xarray_zarr.egg-info/top_level.txt
|
|
12
|
+
tests/test_xarray_zarr_plugin.py
|
flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
flytekitplugins_xarray_zarr-1.16.0b5/flytekitplugins_xarray_zarr.egg-info/namespace_packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flytekitplugins
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flytekitplugins
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
|
|
3
|
+
PLUGIN_NAME = "xarray"
|
|
4
|
+
|
|
5
|
+
microlib_name = f"flytekitplugins-{PLUGIN_NAME}-zarr"
|
|
6
|
+
|
|
7
|
+
plugin_requires = [
|
|
8
|
+
"dask[distributed]>=2022.10.2",
|
|
9
|
+
"flytekit>=1.3.0b2,<2.0.0",
|
|
10
|
+
"xarray",
|
|
11
|
+
"zarr",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
__version__ = "1.16.0b5"
|
|
15
|
+
|
|
16
|
+
setup(
|
|
17
|
+
name=microlib_name,
|
|
18
|
+
version=__version__,
|
|
19
|
+
author="flyteorg",
|
|
20
|
+
author_email="admin@flyte.org",
|
|
21
|
+
description="Xarray Zarr plugin for flytekit",
|
|
22
|
+
namespace_packages=["flytekitplugins"],
|
|
23
|
+
packages=[f"flytekitplugins.{PLUGIN_NAME}"],
|
|
24
|
+
install_requires=plugin_requires,
|
|
25
|
+
license="apache2",
|
|
26
|
+
python_requires=">=3.9",
|
|
27
|
+
classifiers=[
|
|
28
|
+
"Intended Audience :: Science/Research",
|
|
29
|
+
"Intended Audience :: Developers",
|
|
30
|
+
"License :: OSI Approved :: Apache Software License",
|
|
31
|
+
"Programming Language :: Python :: 3.9",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Topic :: Scientific/Engineering",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
35
|
+
"Topic :: Software Development",
|
|
36
|
+
"Topic :: Software Development :: Libraries",
|
|
37
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
38
|
+
],
|
|
39
|
+
entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]},
|
|
40
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from flytekit import task, workflow
|
|
2
|
+
import numpy as np
|
|
3
|
+
import dask.array as da
|
|
4
|
+
import xarray as xr
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _sample_dataset() -> xr.Dataset:
|
|
8
|
+
return xr.Dataset(
|
|
9
|
+
{"test": (("x", "y"), da.random.uniform(size=(32, 32)))},
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_xarray_zarr_dataarray_plugin():
|
|
14
|
+
|
|
15
|
+
@task
|
|
16
|
+
def _generate_xarray() -> xr.DataArray:
|
|
17
|
+
return _sample_dataset()["test"]
|
|
18
|
+
|
|
19
|
+
@task
|
|
20
|
+
def _consume_xarray(ds: xr.DataArray) -> xr.DataArray:
|
|
21
|
+
return ds
|
|
22
|
+
|
|
23
|
+
@workflow
|
|
24
|
+
def _xarray_wf() -> xr.DataArray:
|
|
25
|
+
ds = _generate_xarray()
|
|
26
|
+
return _consume_xarray(ds=ds)
|
|
27
|
+
|
|
28
|
+
array = _xarray_wf()
|
|
29
|
+
assert isinstance(array, xr.DataArray)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_xarray_zarr_dataset_plugin():
|
|
33
|
+
|
|
34
|
+
@task
|
|
35
|
+
def _generate_xarray() -> xr.Dataset:
|
|
36
|
+
return _sample_dataset()
|
|
37
|
+
|
|
38
|
+
@task
|
|
39
|
+
def _consume_xarray(ds: xr.Dataset) -> xr.Dataset:
|
|
40
|
+
return ds
|
|
41
|
+
|
|
42
|
+
@workflow
|
|
43
|
+
def _xarray_wf() -> xr.Dataset:
|
|
44
|
+
ds = _generate_xarray()
|
|
45
|
+
return _consume_xarray(ds=ds)
|
|
46
|
+
|
|
47
|
+
array = _xarray_wf()
|
|
48
|
+
assert isinstance(array, xr.Dataset)
|