rusterize 0.1.0__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rusterize might be problematic. Click here for more details.

rusterize/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ import importlib.metadata
2
+ from .core import *
3
+
4
+ __version__ = importlib.metadata.version("rusterize")
rusterize/core.py ADDED
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional, Tuple, Union
4
+
5
+ import polars as pl
6
+ from pandas import DataFrame
7
+ import rioxarray
8
+ from xarray import DataArray
9
+ from .rusterize import _rusterize
10
+
11
+
12
+ def rusterize(gdf: DataFrame,
13
+ res: Union[Tuple[int, ...], Tuple[float, ...]],
14
+ field: Optional[str] = None,
15
+ by: Optional[str] = None,
16
+ fun: str = "last",
17
+ background: Optional[Union[int, float]] = None,
18
+ ) -> Dict[str, Any]:
19
+ """
20
+ Fast geopandas rasterization into xarray.DataArray
21
+
22
+ Args:
23
+ :param gdf: geopandas dataframe to rasterize.
24
+ :param res: tuple of (xres, yres) for rasterized data.
25
+ :param field: field to rasterize. Default is None.
26
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
27
+ :param fun: pixel function to use, see fasterize for options. Default is `last`.
28
+ :param background: background value in final raster. Default is None.
29
+
30
+ Returns:
31
+ Dictionary containing rasterized geometries and spatial attributes to build a xarray.DataArray.
32
+ """
33
+ # type checks
34
+ if not isinstance(gdf, DataFrame):
35
+ raise TypeError("Must pass a valid geopandas dataframe.")
36
+ if not isinstance(field, (str, type(None))):
37
+ raise TypeError("Must pass a valid string to field.")
38
+ if not isinstance(by, (str, type(None))):
39
+ raise TypeError("Must pass a valid string to by.")
40
+ if not isinstance(res, tuple):
41
+ raise TypeError("Must pass a valid resolution tuple (x, y).")
42
+ if not isinstance(fun, str):
43
+ raise TypeError("Must pass a valid string to pixel_fn. Select only of sum, first, last, min, max, count, or any.")
44
+ if not isinstance(background, (int, float, type(None))):
45
+ raise TypeError("Must pass a valid background type.")
46
+
47
+ # value check
48
+ if by and not field:
49
+ raise ValueError("If by is specified, field must also be specified.")
50
+ if len(res) != 2 or any((res[0], res[1])) <= 0 or not isinstance(res[0], type(res[1])):
51
+ raise ValueError("Must pass valid resolution tuple of values of consistent dtype.")
52
+
53
+ # RasterInfo
54
+ bounds = gdf.total_bounds
55
+ raster_info = {
56
+ "xmin": bounds[0],
57
+ "ymin": bounds[1],
58
+ "xmax": bounds[2],
59
+ "ymax": bounds[3],
60
+ "xres": res[0],
61
+ "yres": res[1],
62
+ "nrows": 0,
63
+ "ncols": 0
64
+ }
65
+
66
+ # extract columns of interest and convert to polars
67
+ cols = list(set([col for col in (field, by) if col]))
68
+ df = pl.from_pandas(gdf[cols]) if cols else None
69
+
70
+ # rusterize
71
+ r = _rusterize(
72
+ gdf.geometry,
73
+ raster_info,
74
+ fun,
75
+ df,
76
+ field,
77
+ by,
78
+ background
79
+ )
80
+ return DataArray.from_dict(r).rio.write_crs(gdf.crs, inplace=True)
Binary file
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: rusterize
3
+ Version: 0.1.0
4
+ Classifier: License :: OSI Approved :: MIT License
5
+ Classifier: Operating System :: OS Independent
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Programming Language :: Python :: Implementation :: CPython
8
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
9
+ Requires-Dist: geopandas >=1.0.1
10
+ Requires-Dist: pandas >=2.2.3
11
+ Requires-Dist: pyarrow >=18.1.0
12
+ Requires-Dist: polars >=1.19.0
13
+ Requires-Dist: xarray >=2025.1.1
14
+ Requires-Dist: rioxarray >=0.18.2
15
+ License-File: LICENSE
16
+ Summary: High performance rasterization tool for Python build in Rust
17
+ Keywords: fast,raster
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
+ Project-URL: repository, https://github.com/ttrotto/rusterize
21
+
22
+ # rusterize
23
+
24
+ High performance rasterization tool for python built in Rust. This
25
+ repository is heavily based on the [**fasterize**](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
+ for R. This version ports it to Python with a Rust backend.
27
+
28
+ Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/)
29
+ dataframes and returns a [xarray](https://docs.xarray.dev/en/stable/). It
30
+ tighly mirrors the processing routine of fasterize, so it works only on
31
+ (multi)polygon geometries at the moment.
32
+
33
+ # Installation
34
+
35
+ Install the current version with pip:
36
+
37
+ ``` {shell}
38
+ pip install rusterize
39
+ ```
40
+
41
+ # Contributing
42
+
43
+ Any contribution is welcome! You can install **rusterize** directly
44
+ from this repo using [maturin](https://www.maturin.rs/) as an editable
45
+ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
46
+ [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
47
+ installed.
48
+
49
+ ``` {shell}
50
+ # Clone repo
51
+ git clone https://github.com/<username>/rusterize.git
52
+ cd rusterize
53
+
54
+ # Install the Rust nightly toolchain
55
+ rustup toolchain install nightly-2025-01-05
56
+
57
+ # Install maturin
58
+ pip install maturin
59
+
60
+ # Install editable version with optmized code
61
+ maturin develop --profile dist-release
62
+ ```
63
+
64
+ # API
65
+
66
+ This function has a simple API:
67
+
68
+ ``` {shell}
69
+ from rusterize.core import rusterize
70
+
71
+ # gdf = <import datasets as needed>
72
+
73
+ # rusterize
74
+ rusterize(gdf,
75
+ (30, 30),
76
+ "field",
77
+ "by",
78
+ "sum",
79
+ 0)
80
+ ```
81
+
82
+ - `gdf`: geopandas dataframe to rasterize
83
+ - `res`: tuple of (xres, yres) for final resolution
84
+ - `field`: field to rasterize. Default is None (a value of `1` is rasterized).
85
+ - `by`: column to rasterize. Assigns each group to a band in the
86
+ stack. Values are taken from `field`. Default is None
87
+ - `fun`: pixel function to use when multiple values overlap. Default is
88
+ `last`. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`
89
+ - `background`: background value in final raster. Default is None (NaN)
90
+
91
+ # Usage
92
+
93
+ **rusterize** consists of a single function `rusterize()`. The Rust implementation
94
+ returns an array that is then converted to a xarray on the Python side
95
+ for simpliicty.
96
+
97
+ ``` python
98
+ from rusterize.core import rusterize
99
+ import geopandas as gpd
100
+ from shapely import wkt
101
+ import matplotlib.pyplot as plt
102
+
103
+ # example from fasterize
104
+ polygons = [
105
+ "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
106
+ "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
107
+ "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))"
108
+ ]
109
+
110
+ # Convert WKT strings to Shapely geometries
111
+ geometries = [wkt.loads(polygon) for polygon in polygons]
112
+
113
+ # Create a GeoDataFrame
114
+ gdf = gpd.GeoDataFrame({'value': range(1, len(polygons) + 1)}, geometry=geometries, crs='EPSG:32619')
115
+
116
+ # rusterize
117
+ output = rusterize(
118
+ gdf,
119
+ res=(1, 1),
120
+ field="value",
121
+ fun="sum"
122
+ ).squeeze()
123
+
124
+ # plot it
125
+ fig, ax = plt.subplots(figsize=(12, 6))
126
+ output.plot.imshow(ax=ax)
127
+ plt.show()
128
+ ```
129
+
130
+ ![](README_files/figure-commonmark/cell-2-output-1.png)
131
+
132
+ # Benchmarks
133
+
134
+ **fasterize** is fast and so is **rusterize**! Let’s try it on small and large
135
+ datasets.
136
+
137
+ ``` python
138
+ from rusterize.core import rusterize
139
+ import geopandas as gpd
140
+ import requests
141
+ import zipfile
142
+ from io import BytesIO
143
+
144
+ # large dataset (~380 MB)
145
+ url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
146
+ response = requests.get(url)
147
+
148
+ # unzip
149
+ with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
150
+ zip_ref.extractall()
151
+
152
+ # read
153
+ gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
154
+
155
+ # small dataset (first 1000 rows)
156
+ gdf_small = gdf_large.iloc[:1000, :]
157
+
158
+ # rusterize at 1/6 degree resolution
159
+ def test_large(benchmark):
160
+ benchmark(rusterize, gdf_large, (1/6, 1/6), fun="sum")
161
+
162
+ def test_small(benchmark):
163
+ benchmark(rusterize, gdf_small, (1/6, 1/6), fun="sum")
164
+ ```
165
+
166
+ Then you can run it with [pytest](https://docs.pytest.org/en/stable/)
167
+ and
168
+ [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
169
+
170
+ ``` {shell}
171
+ pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
172
+
173
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
174
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
175
+ -------------------------------------------------------------------------------------------------------------
176
+ test_large 10.5870 11.2302 10.8633 0.1508 10.8417 0.1594 4;1 0.0921 20 1
177
+ test_small 0.5083 0.6416 0.5265 0.0393 0.5120 0.0108 2;2 1.8995 20 1
178
+ -------------------------------------------------------------------------------------------------------------
179
+ ```
180
+
181
+ And fasterize:
182
+
183
+ ``` {r}
184
+ large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
185
+ small <- large[1:1000, ]
186
+ fn <- function(v) {
187
+ r <- raster(v, res = 1/6)
188
+ return(fasterize(v, r, fun = "sum"))
189
+ }
190
+ microbenchmark(
191
+ fasterize_large = f <- fn(large),
192
+ fasterize_small = f <- fn(small),
193
+ times=20L,
194
+ unit='s'
195
+ )
196
+ ```
197
+
198
+ ``` {shell}
199
+ Unit: seconds
200
+ expr min lq mean median uq max neval
201
+ fasterize_large 9.565781 9.815375 10.02838 9.984965 10.18532 10.66656 20
202
+ fasterize_small 0.469389 0.500616 0.571851 0.558818 0.613419 0.795159 20
203
+ ```
204
+
205
+ And on even
206
+ [larger](https://open.canada.ca/data/en/dataset/fbf12500-bffe-4209-a1ae-fba86f154ebf/resource/cc90d77c-fba3-4f84-b30a-e684cfe0649a)
207
+ datasets? This is a benchmark with 350K+ geometries rasterized at 30
208
+ meters (20 rounds) with no field value and pixel function `sum`.
209
+
210
+ ``` {shell}
211
+ # rusterize
212
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
213
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
214
+ -------------------------------------------------------------------------------------------------------------
215
+ test_sbw 46.5711 49.0212 48.4340 0.5504 48.5812 0.5054 3;1 0.0206 20 1
216
+ -------------------------------------------------------------------------------------------------------------
217
+
218
+ # fasterize
219
+ Unit: seconds
220
+ expr min lq mean median uq max neval
221
+ fasterize 62.12409 72.13832 74.53424 75.12375 77.72899 84.77415 20
222
+ ```
223
+
224
+ # Comparison with other tools
225
+
226
+ While `rusterize` is fast, there are other very fast solutions out there, including
227
+ - `GDAL`
228
+ - `rasterio`
229
+ - `geocube`
230
+
231
+ However, `rusterize` allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing.
232
+
233
+ The following is a time comparison run on a dataset with 340K+ geometries, rasterized at 2m resolution.
234
+ ```
235
+ rusterize: 24 sec
236
+ fasterize: 47 sec
237
+ GDAL (cli): 40 sec (read from fast drive, write to fast drive)
238
+ rasterio: 20 sec (but no spatial information)
239
+ geocube: 42 sec (larger memory footprint)
240
+ ```
@@ -0,0 +1,7 @@
1
+ rusterize-0.1.0.dist-info/METADATA,sha256=vBY0cdi3hAcyq6AJ8Tb7F4wRf3ftfSVT3CAQTsZngoA,8156
2
+ rusterize-0.1.0.dist-info/WHEEL,sha256=OP9M1tCSrnmep3a1BuV3EynKyOuGWiyM2EjKtCEnCOs,95
3
+ rusterize-0.1.0.dist-info/licenses/LICENSE,sha256=FXkix0amECHul0Y2qWBXnEGNV2fd8GuVCIZuuzQwR-c,1130
4
+ rusterize/core.py,sha256=wcDZBkJOOCZz7zEAT3DvtKQx1NMQTb9IHhthYrCSqGE,2940
5
+ rusterize/__init__.py,sha256=rQSJ7V7ykrsuWz-cQK5Dm9E7usCYmCD3dIUrnosWABc,105
6
+ rusterize/rusterize.pyd,sha256=q8Fk-VlojeHk8N9-E8rhVc8ULdqSGkqQj4b4iLMjqnw,37707776
7
+ rusterize-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.8.1)
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-abi3-win_amd64
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tommaso Trotto
4
+ Copyright (c) 2017 EcoHealth Alliance
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.