rusterize 0.3.0__cp311-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rusterize might be problematic. Click here for more details.

rusterize/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ import importlib.metadata
2
+ from .core import *
3
+
4
+ __version__ = importlib.metadata.version("rusterize")
rusterize/core.py ADDED
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ from types import NoneType
4
+ from typing import Any, Dict, Tuple
5
+
6
+ import polars as pl
7
+ from geopandas import GeoDataFrame
8
+ import rioxarray
9
+ from xarray import DataArray
10
+ from .rusterize import _rusterize
11
+
12
+
13
+ def rusterize(gdf: GeoDataFrame,
14
+ res: Tuple[int, ...] | Tuple[float, ...] | None = None,
15
+ out_shape: Tuple[int, ...] | None = None,
16
+ extent: Tuple[int, ...] | Tuple[float, ...] | None = None,
17
+ field: str | None = None,
18
+ by: str | None = None,
19
+ fun: str = "last",
20
+ background: int | float | None = None,
21
+ ) -> Dict[str, Any]:
22
+ """
23
+ Fast geopandas rasterization into xarray.DataArray
24
+
25
+ Args:
26
+ :param gdf: geopandas dataframe to rasterize.
27
+ :param res: tuple of (xres, yres) for rasterized data.
28
+ :param out_shape: tuple of (nrows, ncols) for regularized output shape.
29
+ :param extent: tuple of (xmin, xmax, ymin, ymax) for regularized extent.
30
+ :param field: field to rasterize. Default is None.
31
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
32
+ :param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
33
+ :param background: background value in final raster. Default is None (NaN).
34
+
35
+ Returns:
36
+ Rasterized xarray.DataArray.
37
+
38
+ Note:
39
+ When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
40
+ Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
41
+ The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
42
+ """
43
+ # type checks
44
+ if not isinstance(gdf, GeoDataFrame):
45
+ raise TypeError("Must pass a valid geopandas dataframe.")
46
+ if not isinstance(res, (tuple, NoneType)):
47
+ raise TypeError("Must pass a valid resolution tuple (x, y).")
48
+ if not isinstance(out_shape, (tuple, NoneType)):
49
+ raise TypeError("Must pass a valid output shape tuple (nrows, ncols).")
50
+ if not isinstance(extent, (tuple, NoneType)):
51
+ raise TypeError("Must pass a valid extent tuple (xmin, ymin, xmax, ymax).")
52
+ if not isinstance(field, (str, NoneType)):
53
+ raise TypeError("Must pass a valid string to field.")
54
+ if not isinstance(by, (str, NoneType)):
55
+ raise TypeError("Must pass a valid string to by.")
56
+ if not isinstance(fun, str):
57
+ raise TypeError("Must pass a valid string to pixel_fn. Select one of sum, first, last, min, max, count, or any.")
58
+ if not isinstance(background, (int, float, NoneType)):
59
+ raise TypeError("Must pass a valid background type.")
60
+
61
+ # value checks
62
+ if not res and not out_shape and not extent:
63
+ raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
64
+ if extent and not res and not out_shape:
65
+ raise ValueError("Must also specify `res` or `out_shape` with extent.")
66
+ if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
67
+ raise ValueError("Resolution must be 2 positive numbers.")
68
+ if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
69
+ raise ValueError("Output shape must be 2 positive integers.")
70
+ if extent and len(extent) != 4:
71
+ raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
72
+ if by and not field:
73
+ raise ValueError("If `by` is specified, `field` must also be specified.")
74
+
75
+ # defaults
76
+ _res = res if res else (0, 0)
77
+ _shape = out_shape if out_shape else (0, 0)
78
+ (_bounds, has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
79
+
80
+ # RasterInfo
81
+ raster_info = {
82
+ "nrows": _shape[0],
83
+ "ncols": _shape[1],
84
+ "xmin": _bounds[0],
85
+ "ymin": _bounds[1],
86
+ "xmax": _bounds[2],
87
+ "ymax": _bounds[3],
88
+ "xres": _res[0],
89
+ "yres": _res[1],
90
+ "has_extent": has_extent
91
+ }
92
+
93
+ # extract columns of interest and convert to polars
94
+ cols = list(set([col for col in (field, by) if col]))
95
+ df = pl.from_pandas(gdf[cols]) if cols else None
96
+
97
+ # rusterize
98
+ r = _rusterize(
99
+ gdf.geometry,
100
+ raster_info,
101
+ fun,
102
+ df,
103
+ field,
104
+ by,
105
+ background
106
+ )
107
+ return DataArray.from_dict(r).rio.write_crs(gdf.crs, inplace=True)
Binary file
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: rusterize
3
+ Version: 0.3.0
4
+ Classifier: License :: OSI Approved :: MIT License
5
+ Classifier: Operating System :: OS Independent
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Programming Language :: Python :: Implementation :: CPython
8
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
9
+ Requires-Dist: geopandas>=1.0.1
10
+ Requires-Dist: pandas>=2.2.3
11
+ Requires-Dist: pyarrow>=18.1.0
12
+ Requires-Dist: polars>=1.19.0
13
+ Requires-Dist: xarray>=2025.1.1
14
+ Requires-Dist: rioxarray>=0.18.2
15
+ License-File: LICENSE
16
+ Summary: High performance rasterization tool for Python built in Rust
17
+ Keywords: fast,raster,geopandas,xarray
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
+ Project-URL: repository, https://github.com/ttrotto/rusterize
21
+
22
+ # rusterize
23
+
24
+ High performance rasterization tool for Python built in Rust. This
25
+ repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
+ for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements.
27
+
28
+ **rusterize** is designed to work on *(multi)polygons* and *(multi)linestrings*. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
29
+
30
+ # Installation
31
+
32
+ Install the current version with pip:
33
+
34
+ ``` shell
35
+ pip install rusterize
36
+ ```
37
+
38
+ # Contributing
39
+
40
+ Any contribution is welcome! You can install **rusterize** directly
41
+ from this repo using [maturin](https://www.maturin.rs/) as an editable
42
+ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
43
+ [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
44
+ installed.
45
+
46
+ ``` shell
47
+ # Clone repo
48
+ git clone https://github.com/<username>/rusterize.git
49
+ cd rusterize
50
+
51
+ # Install the Rust nightly toolchain
52
+ rustup toolchain install nightly-2025-01-05
53
+
54
+ # Install maturin
55
+ pip install maturin
56
+
57
+ # Install editable version with optmized code
58
+ maturin develop --profile dist-release
59
+ ```
60
+
61
+ # API
62
+
63
+ This package has a simple API:
64
+
65
+ ``` python
66
+ from rusterize.core import rusterize
67
+
68
+ # gdf = <import/modify dataframe as needed>
69
+
70
+ # rusterize
71
+ rusterize(gdf,
72
+ res=(30, 30),
73
+ out_shape=(10, 10)
74
+ extent=(0, 300, 0, 300)
75
+ field="field",
76
+ by="by",
77
+ fun="sum",
78
+ background=0)
79
+ ```
80
+
81
+ - `gdf`: geopandas dataframe to rasterize
82
+ - `res`: tuple of (xres, yres) for desired resolution (default: `None`)
83
+ - `out_shape`: tuple of (nrows, ncols) for desired output shape (default: `None`)
84
+ - `extent`: tuple of (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
85
+ - `field`: field to rasterize. (default: `None` -> a value of `1` is rasterized).
86
+ - `by`: column to rasterize. Assigns each group to a band in the stack. Values are taken from `field`. (default: `None` -> singleband raster)
87
+ - `fun`: pixel function to use when multiple values overlap. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. (default: `last`)
88
+ - `background`: background value in final raster. (default: `np.nan`)
89
+
90
+ Note that control over the desired extent is not as strict as for resolution and shape. That is,
91
+ when resolution, output shape, and extent are specified, priority is given to resolution and shape.
92
+ So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
93
+ from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
94
+ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
95
+
96
+ # Usage
97
+
98
+ **rusterize** consists of a single function `rusterize()`. The Rust implementation
99
+ returns a dictionary that is converted to a xarray on the Python side
100
+ for simpliicty.
101
+
102
+ ``` python
103
+ from rusterize.core import rusterize
104
+ import geopandas as gpd
105
+ from shapely import wkt
106
+ import matplotlib.pyplot as plt
107
+
108
+ # Construct geometries
109
+ geoms = [
110
+ "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
111
+ "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
112
+ "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
113
+ "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))"
114
+ ]
115
+
116
+ # Convert WKT strings to Shapely geometries
117
+ geometries = [wkt.loads(geom) for geom in geoms]
118
+
119
+ # Create a GeoDataFrame
120
+ gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
121
+
122
+ # rusterize
123
+ output = rusterize(
124
+ gdf,
125
+ res=(1, 1),
126
+ field="value",
127
+ fun="sum"
128
+ ).squeeze()
129
+
130
+ # plot it
131
+ fig, ax = plt.subplots(figsize=(12, 6))
132
+ output.plot.imshow(ax=ax)
133
+ plt.show()
134
+ ```
135
+
136
+ ![](img/plot.png)
137
+
138
+ # Benchmarks
139
+
140
+ **rusterize** is fast! Let’s try it on small and large datasets.
141
+
142
+ ``` python
143
+ from rusterize.core import rusterize
144
+ import geopandas as gpd
145
+ import requests
146
+ import zipfile
147
+ from io import BytesIO
148
+
149
+ # large dataset (~380 MB)
150
+ url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
151
+ response = requests.get(url)
152
+
153
+ # unzip
154
+ with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
155
+ zip_ref.extractall()
156
+
157
+ # read
158
+ gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
159
+
160
+ # small dataset (first 1000 rows)
161
+ gdf_small = gdf_large.iloc[:1000, :]
162
+
163
+ # rusterize at 1/6 degree resolution
164
+ def test_large(benchmark):
165
+ benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
166
+
167
+ def test_small(benchmark):
168
+ benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
169
+ ```
170
+
171
+ Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
172
+ ```
173
+ pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
174
+
175
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
176
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
177
+ -------------------------------------------------------------------------------------------------------------
178
+ rusterize_large 1.6430 1.9249 1.7442 0.1024 1.6878 0.1974 6;0 0.5733 20 1
179
+ rusterize_small 0.0912 0.1194 0.1014 0.0113 0.0953 0.0223 7;0 9.8633 20 1
180
+ -------------------------------------------------------------------------------------------------------------
181
+ ```
182
+
183
+ And fasterize:
184
+ ``` r
185
+ library(sf)
186
+ library(raster)
187
+ library(fasterize)
188
+ library(microbenchmark)
189
+
190
+ large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
191
+ small <- large[1:1000, ]
192
+ fn <- function(v) {
193
+ r <- raster(v, res = 1/6)
194
+ return(fasterize(v, r, fun = "sum"))
195
+ }
196
+ microbenchmark(
197
+ fasterize_large = f <- fn(large),
198
+ fasterize_small = f <- fn(small),
199
+ times=20L,
200
+ unit='s'
201
+ )
202
+ ```
203
+ ```
204
+ Unit: seconds
205
+ expr min lq mean median uq max neval
206
+ fasterize_large 9.9450280 10.6674467 10.8632224 10.9182963 11.1943478 11.3768210 20
207
+ fasterize_small 0.4906411 0.5140836 0.5581061 0.5320919 0.5603512 0.8750579 20
208
+ ```
209
+ And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value and pixel function `any`. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
210
+ ```
211
+ # rusterize
212
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
213
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
214
+ -------------------------------------------------------------------------------------------------------------
215
+ rusterize 6.7270 7.0098 6.7824 0.0646 6.7686 0.0266 2;2 0.1474 20 1
216
+ -------------------------------------------------------------------------------------------------------------
217
+
218
+ # fasterize
219
+ Unit: seconds
220
+ expr min lq mean median uq max neval
221
+ fasterize 157.4734 177.2055 194.3222 194.6455 213.9195 230.6504 20
222
+
223
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
224
+ Time (mean ± σ): 5.801 s ± 0.124 s [User: 4.381 s, System: 1.396 s]
225
+ Range (min … max): 5.649 s … 6.023 s 20 runs
226
+ ```
227
+ In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
228
+ ```
229
+ # rusterize
230
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
231
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
232
+ -------------------------------------------------------------------------------------------------------------
233
+ test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;2 0.2120 20 1
234
+ -------------------------------------------------------------------------------------------------------------
235
+
236
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
237
+ Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
238
+ Range (min … max): 8.658 s … 8.874 s 20 runs
239
+ ```
240
+ # Comparison with other tools
241
+
242
+ While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, and extent.
243
+
244
+ The following is a time comparison on a single run on the same forest stands dataset used earlier.
245
+ ```
246
+ rusterize: 6.7 sec
247
+ rasterio: 68 sec (but no spatial information)
248
+ fasterize: 157 sec (including raster creation)
249
+ geocube: 260 sec (larger memory footprint)
250
+ ```
@@ -0,0 +1,7 @@
1
+ rusterize-0.3.0.dist-info/METADATA,sha256=spu-itZ6aACvDps3MbfUaXPUKignwHjgr8_m9Ryd8b4,10435
2
+ rusterize-0.3.0.dist-info/WHEEL,sha256=4qYHF3r3_wk9215EqUuzc4CZ76XfRcqOTn7Cv3gIg80,95
3
+ rusterize-0.3.0.dist-info/licenses/LICENSE,sha256=FXkix0amECHul0Y2qWBXnEGNV2fd8GuVCIZuuzQwR-c,1130
4
+ rusterize/core.py,sha256=b6ciLMbrBCihdQdOVifAg9d2pE0gjX-aj08erPSSDBM,4694
5
+ rusterize/__init__.py,sha256=rQSJ7V7ykrsuWz-cQK5Dm9E7usCYmCD3dIUrnosWABc,105
6
+ rusterize/rusterize.pyd,sha256=-muUoytgSoapAGDQ64QvZptJHex0oYuDIwA0SiIa_X4,40093184
7
+ rusterize-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.8.3)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-abi3-win_amd64
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tommaso Trotto
4
+ Copyright (c) 2017 EcoHealth Alliance
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.