flytekitplugins-geopandas 1.16.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flytekitplugins_geopandas-1.16.0b3/PKG-INFO +28 -0
- flytekitplugins_geopandas-1.16.0b3/README.md +10 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins/geopandas/__init__.py +14 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins/geopandas/gdf_transformers.py +73 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/PKG-INFO +28 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/SOURCES.txt +12 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/dependency_links.txt +1 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/entry_points.txt +2 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/namespace_packages.txt +1 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/requires.txt +3 -0
- flytekitplugins_geopandas-1.16.0b3/flytekitplugins_geopandas.egg-info/top_level.txt +1 -0
- flytekitplugins_geopandas-1.16.0b3/setup.cfg +4 -0
- flytekitplugins_geopandas-1.16.0b3/setup.py +35 -0
- flytekitplugins_geopandas-1.16.0b3/tests/test_geopandas_plugin.py +63 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flytekitplugins-geopandas
|
|
3
|
+
Version: 1.16.0b3
|
|
4
|
+
Summary: Geopandas plugin for flytekit
|
|
5
|
+
Author: flyteorg
|
|
6
|
+
Author-email: admin@flyte.org
|
|
7
|
+
License: apache2
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Topic :: Software Development
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: flytekit<2.0.0,>=1.3.0b2
|
|
20
|
+
Requires-Dist: geopandas<2.0.0,>=1.0.0
|
|
21
|
+
Requires-Dist: pandas
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: license
|
|
26
|
+
Dynamic: requires-dist
|
|
27
|
+
Dynamic: requires-python
|
|
28
|
+
Dynamic: summary
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Flytekit GeoPandas Plugin
|
|
2
|
+
[GeoPandas](https://geopandas.org/en/stable/) GeoPandas is an open source project to make working with geospatial data in python easier.
|
|
3
|
+
|
|
4
|
+
This plugin supports `gpd.GeoDataFrame` as a data type with [StructuredDataset](https://docs.flyte.org/en/latest/user_guide/data_types_and_io/structureddataset.html).
|
|
5
|
+
|
|
6
|
+
To install the plugin, run the following command:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install flytekitplugins-geopandas
|
|
10
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. currentmodule:: flytekitplugins.geopandas
|
|
3
|
+
|
|
4
|
+
This package contains things that are useful when extending Flytekit.
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:template: custom.rst
|
|
8
|
+
:toctree: generated/
|
|
9
|
+
|
|
10
|
+
GeoPandasDecodingHandler
|
|
11
|
+
GeoPandasEncodingHandler
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .gdf_transformers import GeoPandasDecodingHandler, GeoPandasEncodingHandler
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from flytekit import FlyteContext, lazy_module
|
|
6
|
+
from flytekit.models import literals
|
|
7
|
+
from flytekit.models.literals import StructuredDatasetMetadata
|
|
8
|
+
from flytekit.models.types import StructuredDatasetType
|
|
9
|
+
from flytekit.types.structured.structured_dataset import (
|
|
10
|
+
PARQUET,
|
|
11
|
+
StructuredDataset,
|
|
12
|
+
StructuredDatasetDecoder,
|
|
13
|
+
StructuredDatasetEncoder,
|
|
14
|
+
StructuredDatasetTransformerEngine,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if typing.TYPE_CHECKING:
|
|
18
|
+
import pyarrow
|
|
19
|
+
|
|
20
|
+
import geopandas as gpd
|
|
21
|
+
else:
|
|
22
|
+
gpd = lazy_module("geopandas")
|
|
23
|
+
pyarrow = lazy_module("pyarrow")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GeoPandasDataFrameRenderer:
|
|
27
|
+
"""
|
|
28
|
+
The Geopandas DataFrame summary statistics are rendered as an HTML table.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def to_html(self, df: gpd.GeoDataFrame) -> str:
|
|
32
|
+
assert isinstance(df, gpd.GeoDataFrame)
|
|
33
|
+
return df.describe()._repr_html_()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GeoPandasEncodingHandler(StructuredDatasetEncoder):
|
|
37
|
+
def encode(
|
|
38
|
+
self,
|
|
39
|
+
ctx: FlyteContext,
|
|
40
|
+
structured_dataset: StructuredDataset,
|
|
41
|
+
structured_dataset_type: StructuredDatasetType,
|
|
42
|
+
) -> literals.StructuredDataset:
|
|
43
|
+
dir = ctx.file_access.get_random_remote_directory()
|
|
44
|
+
if not ctx.file_access.is_remote(dir):
|
|
45
|
+
Path(dir).mkdir(parents=True, exist_ok=True)
|
|
46
|
+
uri = os.path.join(str(dir), "data.parquet")
|
|
47
|
+
df = typing.cast(gpd.GeoDataFrame, structured_dataset.dataframe)
|
|
48
|
+
df.to_parquet(uri)
|
|
49
|
+
structured_dataset_type.format = PARQUET
|
|
50
|
+
return literals.StructuredDataset(uri=uri, metadata=StructuredDatasetMetadata(structured_dataset_type))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class GeoPandasDecodingHandler(StructuredDatasetDecoder):
|
|
54
|
+
def decode(
|
|
55
|
+
self,
|
|
56
|
+
ctx: FlyteContext,
|
|
57
|
+
flyte_value: literals.StructuredDataset,
|
|
58
|
+
current_task_metadata: StructuredDatasetMetadata,
|
|
59
|
+
) -> gpd.GeoDataFrame:
|
|
60
|
+
# a user may want to bring a non-parquet gdf, which uses a different
|
|
61
|
+
# opening method.
|
|
62
|
+
try:
|
|
63
|
+
return gpd.read_parquet(flyte_value.uri)
|
|
64
|
+
except pyarrow.lib.ArrowInvalid:
|
|
65
|
+
return gpd.read_file(flyte_value.uri)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
StructuredDatasetTransformerEngine.register_renderer(gpd.GeoDataFrame, GeoPandasDataFrameRenderer())
|
|
69
|
+
# We register GeoPandas encoder to support parquet between and from tasks / workflows
|
|
70
|
+
StructuredDatasetTransformerEngine.register(GeoPandasEncodingHandler(gpd.GeoDataFrame, None, PARQUET))
|
|
71
|
+
# We register to any format for decoder in the event a user provides geopackage,
|
|
72
|
+
# shape file, parquet, etc.
|
|
73
|
+
StructuredDatasetTransformerEngine.register(GeoPandasDecodingHandler(gpd.GeoDataFrame, None, None))
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flytekitplugins-geopandas
|
|
3
|
+
Version: 1.16.0b3
|
|
4
|
+
Summary: Geopandas plugin for flytekit
|
|
5
|
+
Author: flyteorg
|
|
6
|
+
Author-email: admin@flyte.org
|
|
7
|
+
License: apache2
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Topic :: Software Development
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: flytekit<2.0.0,>=1.3.0b2
|
|
20
|
+
Requires-Dist: geopandas<2.0.0,>=1.0.0
|
|
21
|
+
Requires-Dist: pandas
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: license
|
|
26
|
+
Dynamic: requires-dist
|
|
27
|
+
Dynamic: requires-python
|
|
28
|
+
Dynamic: summary
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
flytekitplugins/geopandas/__init__.py
|
|
4
|
+
flytekitplugins/geopandas/gdf_transformers.py
|
|
5
|
+
flytekitplugins_geopandas.egg-info/PKG-INFO
|
|
6
|
+
flytekitplugins_geopandas.egg-info/SOURCES.txt
|
|
7
|
+
flytekitplugins_geopandas.egg-info/dependency_links.txt
|
|
8
|
+
flytekitplugins_geopandas.egg-info/entry_points.txt
|
|
9
|
+
flytekitplugins_geopandas.egg-info/namespace_packages.txt
|
|
10
|
+
flytekitplugins_geopandas.egg-info/requires.txt
|
|
11
|
+
flytekitplugins_geopandas.egg-info/top_level.txt
|
|
12
|
+
tests/test_geopandas_plugin.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flytekitplugins
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flytekitplugins
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
|
|
3
|
+
PLUGIN_NAME = "geopandas"
|
|
4
|
+
|
|
5
|
+
microlib_name = f"flytekitplugins-{PLUGIN_NAME}"
|
|
6
|
+
|
|
7
|
+
plugin_requires = ["flytekit>=1.3.0b2,<2.0.0", "geopandas>=1.0.0,<2.0.0", "pandas"]
|
|
8
|
+
|
|
9
|
+
__version__ = "v1.16.0b3"
|
|
10
|
+
|
|
11
|
+
setup(
|
|
12
|
+
name=microlib_name,
|
|
13
|
+
version=__version__,
|
|
14
|
+
author="flyteorg",
|
|
15
|
+
author_email="admin@flyte.org",
|
|
16
|
+
description="Geopandas plugin for flytekit",
|
|
17
|
+
namespace_packages=["flytekitplugins"],
|
|
18
|
+
packages=[f"flytekitplugins.{PLUGIN_NAME}"],
|
|
19
|
+
install_requires=plugin_requires,
|
|
20
|
+
license="apache2",
|
|
21
|
+
python_requires=">=3.9",
|
|
22
|
+
classifiers=[
|
|
23
|
+
"Intended Audience :: Science/Research",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"License :: OSI Approved :: Apache Software License",
|
|
26
|
+
"Programming Language :: Python :: 3.9",
|
|
27
|
+
"Programming Language :: Python :: 3.10",
|
|
28
|
+
"Topic :: Scientific/Engineering",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
30
|
+
"Topic :: Software Development",
|
|
31
|
+
"Topic :: Software Development :: Libraries",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
],
|
|
34
|
+
entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]},
|
|
35
|
+
)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import geopandas as gpd
|
|
2
|
+
from flytekitplugins.geopandas.gdf_transformers import GeoPandasDataFrameRenderer
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from flytekit import task
|
|
8
|
+
from flytekit.types.structured.structured_dataset import StructuredDataset
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_geopandas_encodes_decodes():
|
|
13
|
+
@task
|
|
14
|
+
def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
15
|
+
return gdf
|
|
16
|
+
|
|
17
|
+
gdf = gpd.GeoDataFrame(
|
|
18
|
+
{"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other_column": [1, 2]},
|
|
19
|
+
crs="EPSG:4326",
|
|
20
|
+
)
|
|
21
|
+
rt_gdf = _gdf_task(gdf)
|
|
22
|
+
assert rt_gdf.equals(gdf)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.parametrize("file_name", ["output.geojson", "output.gpkg"])
|
|
26
|
+
def test_geopandas_encodes_common_formats(tmp_path: Path, file_name: str):
|
|
27
|
+
@task
|
|
28
|
+
def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
29
|
+
return gdf
|
|
30
|
+
|
|
31
|
+
gdf = gpd.GeoDataFrame(
|
|
32
|
+
{"other": np.array([1.0, 2.0])},
|
|
33
|
+
geometry=gpd.points_from_xy([0, 1], [0, 1]),
|
|
34
|
+
crs="EPSG:4326",
|
|
35
|
+
)
|
|
36
|
+
uri = str(tmp_path / file_name)
|
|
37
|
+
gdf.to_file(uri)
|
|
38
|
+
rt_gdf = _gdf_task(gdf=StructuredDataset(uri=uri))
|
|
39
|
+
assert rt_gdf.equals(gdf)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_geopandas_encodes_shp_not_yet_supported(tmp_path: Path):
|
|
43
|
+
@task
|
|
44
|
+
def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
45
|
+
return gdf
|
|
46
|
+
|
|
47
|
+
gdf = gpd.GeoDataFrame(
|
|
48
|
+
{"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other": [1, 2]},
|
|
49
|
+
crs="EPSG:4326",
|
|
50
|
+
)
|
|
51
|
+
uri = str(tmp_path / "output.shp")
|
|
52
|
+
gdf.to_file(uri)
|
|
53
|
+
with pytest.raises(ValueError, match=r"Set SHAPE_RESTORE_SHX config option to YES"):
|
|
54
|
+
rt_gdf = _gdf_task(gdf=StructuredDataset(uri=uri))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_gdf_renderer():
|
|
58
|
+
gdf = gpd.GeoDataFrame(
|
|
59
|
+
{"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other_column": [1, 2]},
|
|
60
|
+
crs="EPSG:4326",
|
|
61
|
+
)
|
|
62
|
+
described = gdf.describe()._repr_html_()
|
|
63
|
+
assert GeoPandasDataFrameRenderer().to_html(gdf) == described
|