flytekitplugins-geopandas 1.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: flytekitplugins-geopandas
3
+ Version: 1.16.0
4
+ Summary: Geopandas plugin for flytekit
5
+ Author: flyteorg
6
+ Author-email: admin@flyte.org
7
+ License: apache2
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: flytekit<2.0.0,>=1.3.0b2
20
+ Requires-Dist: geopandas<2.0.0,>=1.0.0
21
+ Requires-Dist: pandas
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: license
26
+ Dynamic: requires-dist
27
+ Dynamic: requires-python
28
+ Dynamic: summary
@@ -0,0 +1,10 @@
1
+ # Flytekit GeoPandas Plugin
2
+ [GeoPandas](https://geopandas.org/en/stable/) GeoPandas is an open source project to make working with geospatial data in python easier.
3
+
4
+ This plugin supports `gpd.GeoDataFrame` as a data type with [StructuredDataset](https://docs.flyte.org/en/latest/user_guide/data_types_and_io/structureddataset.html).
5
+
6
+ To install the plugin, run the following command:
7
+
8
+ ```bash
9
+ pip install flytekitplugins-geopandas
10
+ ```
@@ -0,0 +1,14 @@
1
+ """
2
+ .. currentmodule:: flytekitplugins.geopandas
3
+
4
+ This package contains things that are useful when extending Flytekit.
5
+
6
+ .. autosummary::
7
+ :template: custom.rst
8
+ :toctree: generated/
9
+
10
+ GeoPandasDecodingHandler
11
+ GeoPandasEncodingHandler
12
+ """
13
+
14
+ from .gdf_transformers import GeoPandasDecodingHandler, GeoPandasEncodingHandler
@@ -0,0 +1,74 @@
1
+ import typing
2
+ from pathlib import Path
3
+
4
+ from flytekit import FlyteContext, lazy_module
5
+ from flytekit.models import literals
6
+ from flytekit.models.literals import StructuredDatasetMetadata
7
+ from flytekit.models.types import StructuredDatasetType
8
+ from flytekit.types.structured.structured_dataset import (
9
+ PARQUET,
10
+ StructuredDataset,
11
+ StructuredDatasetDecoder,
12
+ StructuredDatasetEncoder,
13
+ StructuredDatasetTransformerEngine,
14
+ )
15
+
16
+ if typing.TYPE_CHECKING:
17
+ import pyarrow
18
+
19
+ import geopandas as gpd
20
+ else:
21
+ gpd = lazy_module("geopandas")
22
+ pyarrow = lazy_module("pyarrow")
23
+
24
+
25
+ class GeoPandasDataFrameRenderer:
26
+ """
27
+ The Geopandas DataFrame summary statistics are rendered as an HTML table.
28
+ """
29
+
30
+ def to_html(self, df: gpd.GeoDataFrame) -> str:
31
+ assert isinstance(df, gpd.GeoDataFrame)
32
+ return df.describe()._repr_html_()
33
+
34
+
35
+ class GeoPandasEncodingHandler(StructuredDatasetEncoder):
36
+ def encode(
37
+ self,
38
+ ctx: FlyteContext,
39
+ structured_dataset: StructuredDataset,
40
+ structured_dataset_type: StructuredDatasetType,
41
+ ) -> literals.StructuredDataset:
42
+ uri = typing.cast(str, structured_dataset.uri) or ctx.file_access.join(
43
+ ctx.file_access.raw_output_prefix, ctx.file_access.get_random_string()
44
+ )
45
+ if not ctx.file_access.is_remote(uri):
46
+ Path(uri).mkdir(parents=True, exist_ok=True)
47
+ uri = str(Path(uri) / "data.parquet")
48
+ df = typing.cast(gpd.GeoDataFrame, structured_dataset.dataframe)
49
+ df.to_parquet(uri)
50
+ structured_dataset_type.format = PARQUET
51
+ return literals.StructuredDataset(uri=uri, metadata=StructuredDatasetMetadata(structured_dataset_type))
52
+
53
+
54
+ class GeoPandasDecodingHandler(StructuredDatasetDecoder):
55
+ def decode(
56
+ self,
57
+ ctx: FlyteContext,
58
+ flyte_value: literals.StructuredDataset,
59
+ current_task_metadata: StructuredDatasetMetadata,
60
+ ) -> gpd.GeoDataFrame:
61
+ # a user may want to bring a non-parquet gdf, which uses a different
62
+ # opening method.
63
+ try:
64
+ return gpd.read_parquet(flyte_value.uri)
65
+ except pyarrow.lib.ArrowInvalid:
66
+ return gpd.read_file(flyte_value.uri)
67
+
68
+
69
+ StructuredDatasetTransformerEngine.register_renderer(gpd.GeoDataFrame, GeoPandasDataFrameRenderer())
70
+ # We register GeoPandas encoder to support parquet between and from tasks / workflows
71
+ StructuredDatasetTransformerEngine.register(GeoPandasEncodingHandler(gpd.GeoDataFrame, None, PARQUET))
72
+ # We register to any format for decoder in the event a user provides geopackage,
73
+ # shape file, parquet, etc.
74
+ StructuredDatasetTransformerEngine.register(GeoPandasDecodingHandler(gpd.GeoDataFrame, None, None))
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: flytekitplugins-geopandas
3
+ Version: 1.16.0
4
+ Summary: Geopandas plugin for flytekit
5
+ Author: flyteorg
6
+ Author-email: admin@flyte.org
7
+ License: apache2
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: flytekit<2.0.0,>=1.3.0b2
20
+ Requires-Dist: geopandas<2.0.0,>=1.0.0
21
+ Requires-Dist: pandas
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: license
26
+ Dynamic: requires-dist
27
+ Dynamic: requires-python
28
+ Dynamic: summary
@@ -0,0 +1,12 @@
1
+ README.md
2
+ setup.py
3
+ flytekitplugins/geopandas/__init__.py
4
+ flytekitplugins/geopandas/gdf_transformers.py
5
+ flytekitplugins_geopandas.egg-info/PKG-INFO
6
+ flytekitplugins_geopandas.egg-info/SOURCES.txt
7
+ flytekitplugins_geopandas.egg-info/dependency_links.txt
8
+ flytekitplugins_geopandas.egg-info/entry_points.txt
9
+ flytekitplugins_geopandas.egg-info/namespace_packages.txt
10
+ flytekitplugins_geopandas.egg-info/requires.txt
11
+ flytekitplugins_geopandas.egg-info/top_level.txt
12
+ tests/test_geopandas_plugin.py
@@ -0,0 +1,2 @@
1
+ [flytekit.plugins]
2
+ geopandas = flytekitplugins.geopandas
@@ -0,0 +1,3 @@
1
+ flytekit<2.0.0,>=1.3.0b2
2
+ geopandas<2.0.0,>=1.0.0
3
+ pandas
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,35 @@
1
+ from setuptools import setup
2
+
3
+ PLUGIN_NAME = "geopandas"
4
+
5
+ microlib_name = f"flytekitplugins-{PLUGIN_NAME}"
6
+
7
+ plugin_requires = ["flytekit>=1.3.0b2,<2.0.0", "geopandas>=1.0.0,<2.0.0", "pandas"]
8
+
9
+ __version__ = "v1.16.0"
10
+
11
+ setup(
12
+ name=microlib_name,
13
+ version=__version__,
14
+ author="flyteorg",
15
+ author_email="admin@flyte.org",
16
+ description="Geopandas plugin for flytekit",
17
+ namespace_packages=["flytekitplugins"],
18
+ packages=[f"flytekitplugins.{PLUGIN_NAME}"],
19
+ install_requires=plugin_requires,
20
+ license="apache2",
21
+ python_requires=">=3.9",
22
+ classifiers=[
23
+ "Intended Audience :: Science/Research",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: Apache Software License",
26
+ "Programming Language :: Python :: 3.9",
27
+ "Programming Language :: Python :: 3.10",
28
+ "Topic :: Scientific/Engineering",
29
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
30
+ "Topic :: Software Development",
31
+ "Topic :: Software Development :: Libraries",
32
+ "Topic :: Software Development :: Libraries :: Python Modules",
33
+ ],
34
+ entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]},
35
+ )
@@ -0,0 +1,63 @@
1
+ import geopandas as gpd
2
+ from flytekitplugins.geopandas.gdf_transformers import GeoPandasDataFrameRenderer
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+
7
+ from flytekit import task
8
+ from flytekit.types.structured.structured_dataset import StructuredDataset
9
+ import numpy as np
10
+
11
+
12
+ def test_geopandas_encodes_decodes():
13
+ @task
14
+ def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
15
+ return gdf
16
+
17
+ gdf = gpd.GeoDataFrame(
18
+ {"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other_column": [1, 2]},
19
+ crs="EPSG:4326",
20
+ )
21
+ rt_gdf = _gdf_task(gdf)
22
+ assert rt_gdf.equals(gdf)
23
+
24
+
25
+ @pytest.mark.parametrize("file_name", ["output.geojson", "output.gpkg"])
26
+ def test_geopandas_encodes_common_formats(tmp_path: Path, file_name: str):
27
+ @task
28
+ def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
29
+ return gdf
30
+
31
+ gdf = gpd.GeoDataFrame(
32
+ {"other": np.array([1.0, 2.0])},
33
+ geometry=gpd.points_from_xy([0, 1], [0, 1]),
34
+ crs="EPSG:4326",
35
+ )
36
+ uri = str(tmp_path / file_name)
37
+ gdf.to_file(uri)
38
+ rt_gdf = _gdf_task(gdf=StructuredDataset(uri=uri))
39
+ assert rt_gdf.equals(gdf)
40
+
41
+
42
+ def test_geopandas_encodes_shp_not_yet_supported(tmp_path: Path):
43
+ @task
44
+ def _gdf_task(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
45
+ return gdf
46
+
47
+ gdf = gpd.GeoDataFrame(
48
+ {"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other": [1, 2]},
49
+ crs="EPSG:4326",
50
+ )
51
+ uri = str(tmp_path / "output.shp")
52
+ gdf.to_file(uri)
53
+ with pytest.raises(ValueError, match=r"Set SHAPE_RESTORE_SHX config option to YES"):
54
+ rt_gdf = _gdf_task(gdf=StructuredDataset(uri=uri))
55
+
56
+
57
+ def test_gdf_renderer():
58
+ gdf = gpd.GeoDataFrame(
59
+ {"geometry": gpd.points_from_xy([0, 1], [0, 1]), "other_column": [1, 2]},
60
+ crs="EPSG:4326",
61
+ )
62
+ described = gdf.describe()._repr_html_()
63
+ assert GeoPandasDataFrameRenderer().to_html(gdf) == described