rusterize 0.4.0__cp311-abi3-manylinux_2_28_ppc64le.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rusterize might be problematic. Click here for more details.

rusterize/__init__.py ADDED
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+ import importlib.metadata
3
+
4
+ from types import NoneType
5
+ from typing import List, Tuple
6
+
7
+ import numpy as np
8
+ import polars as pl
9
+ from geopandas import GeoDataFrame
10
+ import rioxarray
11
+ from xarray import DataArray
12
+ from .rusterize import _rusterize
13
+
14
+ __version__ = importlib.metadata.version("rusterize")
15
+
16
+
17
+ def rusterize(
18
+ gdf: GeoDataFrame,
19
+ res: Tuple | List | None = None,
20
+ out_shape: Tuple | List | None = None,
21
+ extent: Tuple | List | None = None,
22
+ field: str | None = None,
23
+ by: str | None = None,
24
+ burn: int | float | None = None,
25
+ fun: str = "last",
26
+ background: int | float | None = np.nan,
27
+ dtype: str = "float64",
28
+ ) -> DataArray:
29
+ """
30
+ Fast geopandas rasterization into xarray.DataArray
31
+
32
+ Args:
33
+ :param gdf: geopandas dataframe to rasterize.
34
+ :param res: (xres, yres) for rasterized data.
35
+ :param out_shape: (nrows, ncols) for regularized output shape.
36
+ :param extent: (xmin, xmax, ymin, ymax) for regularized extent.
37
+ :param field: field to rasterize, mutually exclusive with `burn`. Default is None.
38
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
39
+ :param burn: burn a value onto the raster, mutually exclusive with `field`. Default is None.
40
+ :param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
41
+ :param background: background value in final raster. Default is np.nan.
42
+ :param dtype: specify the output dtype. Default is `float64`.
43
+
44
+ Returns:
45
+ Rasterized xarray.DataArray.
46
+
47
+ Notes:
48
+ When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
49
+ Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
50
+ The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
51
+
52
+ If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
53
+
54
+ A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
55
+ that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
56
+ """
57
+ # type checks
58
+ if not isinstance(gdf, GeoDataFrame):
59
+ raise TypeError("`gdf` must be a geopandas dataframe.")
60
+ if not isinstance(res, (tuple, list, NoneType)):
61
+ raise TypeError("`resolution` must be a tuple or list of (x, y).")
62
+ if not isinstance(out_shape, (tuple, list, NoneType)):
63
+ raise TypeError("`out_shape` must be a tuple or list of (nrows, ncols).")
64
+ if not isinstance(extent, (tuple, list, NoneType)):
65
+ raise TypeError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
66
+ if not isinstance(field, (str, NoneType)):
67
+ raise TypeError("`field` must be a string column name.")
68
+ if not isinstance(by, (str, NoneType)):
69
+ raise TypeError("`by` must be a string column name.")
70
+ if not isinstance(burn, (int, float, NoneType)):
71
+ raise TypeError("`burn` must be an integer or float.")
72
+ if not isinstance(fun, str):
73
+ raise TypeError("`pixel_fn` must be one of sum, first, last, min, max, count, or any.")
74
+ if not isinstance(background, (int, float, NoneType)):
75
+ raise TypeError("`background` must be integer, float, or None.")
76
+ if not isinstance(dtype, str):
77
+ raise TypeError("`dtype` must be a one of uint8, uint16, uint32, uint64, int8, int16, int32, int64, float32, float64")
78
+
79
+ # value checks
80
+ if not res and not out_shape and not extent:
81
+ raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
82
+ if extent and not res and not out_shape:
83
+ raise ValueError("Must also specify `res` or `out_shape` with extent.")
84
+ if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
85
+ raise ValueError("Resolution must be 2 positive numbers.")
86
+ if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
87
+ raise ValueError("Output shape must be 2 positive integers.")
88
+ if extent and len(extent) != 4:
89
+ raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
90
+ if field and burn:
91
+ raise ValueError("Only one of `field` or `burn` can be specified.")
92
+
93
+ # defaults
94
+ _res = res if res else (0, 0)
95
+ _shape = out_shape if out_shape else (0, 0)
96
+ (_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
97
+
98
+ # RasterInfo
99
+ raster_info = {
100
+ "nrows": _shape[0],
101
+ "ncols": _shape[1],
102
+ "xmin": _bounds[0],
103
+ "ymin": _bounds[1],
104
+ "xmax": _bounds[2],
105
+ "ymax": _bounds[3],
106
+ "xres": _res[0],
107
+ "yres": _res[1],
108
+ "has_extent": _has_extent,
109
+ }
110
+
111
+ # extract columns of interest and convert to polars
112
+ cols = list(set([col for col in (field, by) if col]))
113
+ try:
114
+ df = pl.from_pandas(gdf[cols]) if cols else None
115
+ except KeyError as e:
116
+ raise KeyError("Column not found in GeoDataFrame") from e
117
+
118
+ # rusterize
119
+ r = _rusterize(gdf.geometry, raster_info, fun, df, field, by, burn, background, dtype)
120
+ return DataArray.from_dict(r).rio.write_crs(gdf.crs, inplace=True)
Binary file
@@ -0,0 +1,254 @@
1
+ Metadata-Version: 2.4
2
+ Name: rusterize
3
+ Version: 0.4.0
4
+ Classifier: License :: OSI Approved :: MIT License
5
+ Classifier: Operating System :: OS Independent
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Programming Language :: Python :: Implementation :: CPython
8
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
9
+ Requires-Dist: geopandas>=1.0.1
10
+ Requires-Dist: pandas>=2.2.3
11
+ Requires-Dist: pyarrow>=18.1.0
12
+ Requires-Dist: polars>=1.19.0
13
+ Requires-Dist: xarray>=2025.1.1
14
+ Requires-Dist: rioxarray>=0.18.2
15
+ License-File: LICENSE
16
+ Summary: High performance rasterization tool for Python built in Rust
17
+ Keywords: rust,fast,raster,geometry,geopandas,xarray
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
+ Project-URL: repository, https://github.com/ttrotto/rusterize
21
+
22
+ # rusterize
23
+
24
+ High performance rasterization tool for Python built in Rust. This
25
+ repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
+ for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements (see [API](#API)).
27
+
28
+ **rusterize** is designed to work on *(multi)polygons* and *(multi)linestrings*, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
29
+
30
+ # Installation
31
+
32
+ Install the current version with pip:
33
+
34
+ ``` shell
35
+ pip install rusterize
36
+ ```
37
+
38
+ # Contributing
39
+
40
+ Any contribution is welcome! You can install **rusterize** directly
41
+ from this repo using [maturin](https://www.maturin.rs/) as an editable
42
+ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
43
+ [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
44
+ installed.
45
+
46
+ ``` shell
47
+ # Clone repo
48
+ git clone https://github.com/<username>/rusterize.git
49
+ cd rusterize
50
+
51
+ # Install the Rust nightly toolchain
52
+ rustup toolchain install nightly-2025-07-31
53
+
54
+ # Install maturin
55
+ pip install maturin
56
+
57
+ # Install editable version with optmized code
58
+ maturin develop --profile dist-release
59
+ ```
60
+
61
+ # API
62
+
63
+ This package has a simple API:
64
+
65
+ ``` python
66
+ from rusterize import rusterize
67
+
68
+ # gdf = <import/modify dataframe as needed>
69
+
70
+ # rusterize
71
+ rusterize(gdf,
72
+ res=(30, 30),
73
+ out_shape=(10, 10)
74
+ extent=(0, 10, 10, 20)
75
+ field="field",
76
+ by="by",
77
+ burn=None,
78
+ fun="sum",
79
+ background=0,
80
+ dtype="uint8")
81
+ ```
82
+
83
+ - `gdf`: geopandas dataframe to rasterize
84
+ - `res`: (xres, yres) for desired resolution (default: `None`)
85
+ - `out_shape`: (nrows, ncols) for desired output shape (default: `None`)
86
+ - `extent`: (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
87
+ - `field`: column to rasterize. Mutually exclusive with `burn`. (default: `None` -> a value of `1` is rasterized)
88
+ - `by`: column for grouping. Assign each group to a band in the stack. Values are taken from `field` if specified, else `burn` is rasterized. (default: `None` -> singleband raster)
89
+ - `burn`: a single value to burn. Mutually exclusive with `field`. (default: `None`). If no field is found in `gdf` or if `field` is `None`, then `burn=1`
90
+ - `fun`: pixel function to use when multiple values overlap. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. (default: `last`)
91
+ - `background`: background value in final raster. (default: `np.nan`). A `None` value corresponds to the default of the specified dtype. An illegal value for a dtype will be replaced with the default of that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
92
+ - `dtype`: dtype of the final raster. Possible values are `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, `float32`, `float64` (default: `float64`)
93
+
94
+ Note that control over the desired extent is not as strict as for resolution and shape. That is,
95
+ when resolution, output shape, and extent are specified, priority is given to resolution and shape.
96
+ So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
97
+ from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
98
+ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
99
+
100
+ # Usage
101
+
102
+ **rusterize** consists of a single function `rusterize()`. The Rust implementation
103
+ returns a dictionary that is converted to a xarray on the Python side
104
+ for simpliicty.
105
+
106
+ ``` python
107
+ from rusterize import rusterize
108
+ import geopandas as gpd
109
+ from shapely import wkt
110
+ import matplotlib.pyplot as plt
111
+
112
+ # Construct geometries
113
+ geoms = [
114
+ "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
115
+ "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
116
+ "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
117
+ "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))",
118
+ "GEOMETRYCOLLECTION (POINT (50 -40), POLYGON ((75 -40, 75 -30, 100 -30, 100 -40, 75 -40)), LINESTRING (80 -40, 100 0), GEOMETRYCOLLECTION (POLYGON ((100 20, 100 30, 110 30, 110 20, 100 20))))"
119
+ ]
120
+
121
+ # Convert WKT strings to Shapely geometries
122
+ geometries = [wkt.loads(geom) for geom in geoms]
123
+
124
+ # Create a GeoDataFrame
125
+ gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
126
+
127
+ # rusterize
128
+ output = rusterize(
129
+ gdf,
130
+ res=(1, 1),
131
+ field="value",
132
+ fun="sum",
133
+ ).squeeze()
134
+
135
+ # plot it
136
+ fig, ax = plt.subplots(figsize=(12, 6))
137
+ output.plot.imshow(ax=ax)
138
+ plt.show()
139
+ ```
140
+
141
+ ![](img/plot.png)
142
+
143
+ # Benchmarks
144
+
145
+ **rusterize** is fast! Let’s try it on small and large datasets.
146
+
147
+ ``` python
148
+ from rusterize import rusterize
149
+ import geopandas as gpd
150
+ import requests
151
+ import zipfile
152
+ from io import BytesIO
153
+
154
+ # large dataset (~380 MB)
155
+ url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
156
+ response = requests.get(url)
157
+
158
+ # unzip
159
+ with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
160
+ zip_ref.extractall()
161
+
162
+ # read
163
+ gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
164
+
165
+ # small dataset (first 1000 rows)
166
+ gdf_small = gdf_large.iloc[:1000, :]
167
+
168
+ # rusterize at 1/6 degree resolution
169
+ def test_large(benchmark):
170
+ benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
171
+
172
+ def test_small(benchmark):
173
+ benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
174
+ ```
175
+
176
+ Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
177
+ ```
178
+ pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
179
+
180
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
181
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
182
+ -------------------------------------------------------------------------------------------------------------
183
+ rusterize_small 0.0791 0.0899 0.0812 0.0027 0.0803 0.0020 2;2 12.3214 20 1
184
+ rusterize_large 1.379545 1.4474 1.4006 0.0178 1.3966 0.0214 5;1 0.7140 20 1
185
+ -------------------------------------------------------------------------------------------------------------
186
+ ```
187
+ And fasterize:
188
+ ``` r
189
+ library(sf)
190
+ library(raster)
191
+ library(fasterize)
192
+ library(microbenchmark)
193
+
194
+ large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
195
+ small <- large[1:1000, ]
196
+ fn <- function(v) {
197
+ r <- raster(v, res = 1/6)
198
+ return(fasterize(v, r, fun = "sum"))
199
+ }
200
+ microbenchmark(
201
+ fasterize_large = f <- fn(large),
202
+ fasterize_small = f <- fn(small),
203
+ times=20L,
204
+ unit='s'
205
+ )
206
+ ```
207
+ ```
208
+ Unit: seconds
209
+ expr min lq mean median uq max neval
210
+ fasterize_small 0.4741043 0.4926114 0.5191707 0.5193289 0.536741 0.5859029 20
211
+ fasterize_large 9.2199426 10.3595465 10.6653139 10.5369429 11.025771 11.7944567 20
212
+ ```
213
+ And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value and pixel function `any`. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
214
+ ```
215
+ # rusterize
216
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
217
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
218
+ -------------------------------------------------------------------------------------------------------------
219
+ rusterize 5.9331 7.2308 6.1302 0.3183 5.9903 0.1736 2;4 0.1631 20 1
220
+ -------------------------------------------------------------------------------------------------------------
221
+
222
+ # fasterize
223
+ Unit: seconds
224
+ expr min lq mean median uq max neval
225
+ fasterize 157.4734 177.2055 194.3222 194.6455 213.9195 230.6504 20
226
+
227
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
228
+ Time (mean ± σ): 5.495 s ± 0.038 s [User: 4.268 s, System: 1.225 s]
229
+ Range (min … max): 5.452 s … 5.623 s 20 runs
230
+ ```
231
+ In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
232
+ ```
233
+ # rusterize
234
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
235
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
236
+ -------------------------------------------------------------------------------------------------------------
237
+ test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;2 0.2120 20 1
238
+ -------------------------------------------------------------------------------------------------------------
239
+
240
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
241
+ Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
242
+ Range (min … max): 8.658 s … 8.874 s 20 runs
243
+ ```
244
+ # Comparison with other tools
245
+
246
+ While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type.
247
+
248
+ The following is a time comparison on a single run on the same forest stands dataset used earlier.
249
+ ```
250
+ rusterize: 5.9 sec
251
+ rasterio: 68 sec (but no spatial information)
252
+ fasterize: 157 sec (including raster creation)
253
+ geocube: 260 sec (larger memory footprint)
254
+ ```
@@ -0,0 +1,6 @@
1
+ rusterize-0.4.0.dist-info/METADATA,sha256=VxdEZ9jhsBsBCzPY-yZiktAmNs4wuSWEe3GpU863CTQ,11176
2
+ rusterize-0.4.0.dist-info/WHEEL,sha256=MGaQjBHAphO6IVR063SFNWIT-ZlGUh0_fKN0-9trTkw,108
3
+ rusterize-0.4.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
4
+ rusterize/__init__.py,sha256=TZvnGqurMBCNrnTfdtjkFhQofqUk-w7TO19JhB5m1OQ,5525
5
+ rusterize/rusterize.abi3.so,sha256=8XDkTy4fMGQeErO_QYQgg50UcmAgX_v-u54LHUMcgyo,48538216
6
+ rusterize-0.4.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.9.3)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-abi3-manylinux_2_28_ppc64le
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tommaso Trotto
4
+ Copyright (c) 2017 EcoHealth Alliance
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.