rusterize 0.4.0__cp311-abi3-manylinux_2_28_armv7l.whl → 0.4.1__cp311-abi3-manylinux_2_28_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rusterize might be problematic. Click here for more details.
- rusterize/__init__.py +41 -22
- rusterize/rusterize.abi3.so +0 -0
- {rusterize-0.4.0.dist-info → rusterize-0.4.1.dist-info}/METADATA +36 -21
- rusterize-0.4.1.dist-info/RECORD +6 -0
- {rusterize-0.4.0.dist-info → rusterize-0.4.1.dist-info}/WHEEL +1 -1
- rusterize-0.4.0.dist-info/RECORD +0 -6
- {rusterize-0.4.0.dist-info → rusterize-0.4.1.dist-info}/licenses/LICENSE +0 -0
rusterize/__init__.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
import importlib.metadata
|
|
3
2
|
|
|
3
|
+
import importlib.metadata
|
|
4
4
|
from types import NoneType
|
|
5
5
|
from typing import List, Tuple
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import polars as pl
|
|
9
|
-
from geopandas import GeoDataFrame
|
|
10
9
|
import rioxarray
|
|
10
|
+
from geopandas import GeoDataFrame
|
|
11
11
|
from xarray import DataArray
|
|
12
|
+
|
|
12
13
|
from .rusterize import _rusterize
|
|
13
14
|
|
|
14
15
|
__version__ = importlib.metadata.version("rusterize")
|
|
@@ -16,6 +17,7 @@ __version__ = importlib.metadata.version("rusterize")
|
|
|
16
17
|
|
|
17
18
|
def rusterize(
|
|
18
19
|
gdf: GeoDataFrame,
|
|
20
|
+
like: DataArray | None = None,
|
|
19
21
|
res: Tuple | List | None = None,
|
|
20
22
|
out_shape: Tuple | List | None = None,
|
|
21
23
|
extent: Tuple | List | None = None,
|
|
@@ -31,6 +33,7 @@ def rusterize(
|
|
|
31
33
|
|
|
32
34
|
Args:
|
|
33
35
|
:param gdf: geopandas dataframe to rasterize.
|
|
36
|
+
:param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
|
|
34
37
|
:param res: (xres, yres) for rasterized data.
|
|
35
38
|
:param out_shape: (nrows, ncols) for regularized output shape.
|
|
36
39
|
:param extent: (xmin, xmax, ymin, ymax) for regularized extent.
|
|
@@ -46,17 +49,20 @@ def rusterize(
|
|
|
46
49
|
|
|
47
50
|
Notes:
|
|
48
51
|
When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
|
|
52
|
+
If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
|
|
49
53
|
Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
|
|
50
54
|
The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
|
|
51
|
-
|
|
55
|
+
|
|
52
56
|
If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
|
|
53
|
-
|
|
57
|
+
|
|
54
58
|
A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
|
|
55
59
|
that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
|
|
56
60
|
"""
|
|
57
61
|
# type checks
|
|
58
62
|
if not isinstance(gdf, GeoDataFrame):
|
|
59
63
|
raise TypeError("`gdf` must be a geopandas dataframe.")
|
|
64
|
+
if not isinstance(like, (DataArray, NoneType)):
|
|
65
|
+
raise TypeError("`'ike' must be a xarray.DataArray")
|
|
60
66
|
if not isinstance(res, (tuple, list, NoneType)):
|
|
61
67
|
raise TypeError("`resolution` must be a tuple or list of (x, y).")
|
|
62
68
|
if not isinstance(out_shape, (tuple, list, NoneType)):
|
|
@@ -74,26 +80,39 @@ def rusterize(
|
|
|
74
80
|
if not isinstance(background, (int, float, NoneType)):
|
|
75
81
|
raise TypeError("`background` must be integer, float, or None.")
|
|
76
82
|
if not isinstance(dtype, str):
|
|
77
|
-
raise TypeError(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if extent and not res and not out_shape:
|
|
83
|
-
raise ValueError("Must also specify `res` or `out_shape` with extent.")
|
|
84
|
-
if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
|
|
85
|
-
raise ValueError("Resolution must be 2 positive numbers.")
|
|
86
|
-
if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
|
|
87
|
-
raise ValueError("Output shape must be 2 positive integers.")
|
|
88
|
-
if extent and len(extent) != 4:
|
|
89
|
-
raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
|
|
83
|
+
raise TypeError(
|
|
84
|
+
"`dtype` must be a one of uint8, uint16, uint32, uint64, int8, int16, int32, int64, float32, float64"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# value checks and defaults
|
|
90
88
|
if field and burn:
|
|
91
89
|
raise ValueError("Only one of `field` or `burn` can be specified.")
|
|
90
|
+
if like is not None:
|
|
91
|
+
if any((res, out_shape, extent)):
|
|
92
|
+
raise ValueError("`like` is mutually exclusive with `res`, `out_shape`, and `extent`.")
|
|
93
|
+
else:
|
|
94
|
+
affine = like.rio.transform()
|
|
95
|
+
_res = (affine.a, abs(affine.e))
|
|
96
|
+
_shape = like.squeeze().shape
|
|
97
|
+
_bounds, _has_extent = like.rio.bounds(), True
|
|
98
|
+
else:
|
|
99
|
+
if not res and not out_shape and not extent:
|
|
100
|
+
raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
|
|
101
|
+
if extent and not res and not out_shape:
|
|
102
|
+
raise ValueError("Must also specify `res` or `out_shape` with extent.")
|
|
103
|
+
if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
|
|
104
|
+
raise ValueError("`res` must be 2 positive numbers.")
|
|
105
|
+
if out_shape and (
|
|
106
|
+
len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)
|
|
107
|
+
):
|
|
108
|
+
raise ValueError("`out_shape` must be 2 positive integers.")
|
|
109
|
+
if extent and len(extent) != 4:
|
|
110
|
+
raise ValueError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
|
|
92
111
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
# defaults
|
|
113
|
+
_res = res if res else (0, 0)
|
|
114
|
+
_shape = out_shape if out_shape else (0, 0)
|
|
115
|
+
(_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
|
|
97
116
|
|
|
98
117
|
# RasterInfo
|
|
99
118
|
raster_info = {
|
|
@@ -113,7 +132,7 @@ def rusterize(
|
|
|
113
132
|
try:
|
|
114
133
|
df = pl.from_pandas(gdf[cols]) if cols else None
|
|
115
134
|
except KeyError as e:
|
|
116
|
-
raise KeyError("Column not found in GeoDataFrame") from e
|
|
135
|
+
raise KeyError("Column not found in GeoDataFrame.") from e
|
|
117
136
|
|
|
118
137
|
# rusterize
|
|
119
138
|
r = _rusterize(gdf.geometry, raster_info, fun, df, field, by, burn, background, dtype)
|
rusterize/rusterize.abi3.so
CHANGED
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rusterize
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Classifier: License :: OSI Approved :: MIT License
|
|
5
5
|
Classifier: Operating System :: OS Independent
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
@@ -25,13 +25,13 @@ High performance rasterization tool for Python built in Rust. This
|
|
|
25
25
|
repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
|
|
26
26
|
for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements (see [API](#API)).
|
|
27
27
|
|
|
28
|
-
**rusterize** is designed to work on
|
|
28
|
+
**rusterize** is designed to work on _(multi)polygons_ and _(multi)linestrings_, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
|
|
29
29
|
|
|
30
30
|
# Installation
|
|
31
31
|
|
|
32
32
|
Install the current version with pip:
|
|
33
33
|
|
|
34
|
-
```
|
|
34
|
+
```shell
|
|
35
35
|
pip install rusterize
|
|
36
36
|
```
|
|
37
37
|
|
|
@@ -43,7 +43,7 @@ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.or
|
|
|
43
43
|
[cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
|
|
44
44
|
installed.
|
|
45
45
|
|
|
46
|
-
```
|
|
46
|
+
```shell
|
|
47
47
|
# Clone repo
|
|
48
48
|
git clone https://github.com/<username>/rusterize.git
|
|
49
49
|
cd rusterize
|
|
@@ -62,25 +62,29 @@ maturin develop --profile dist-release
|
|
|
62
62
|
|
|
63
63
|
This package has a simple API:
|
|
64
64
|
|
|
65
|
-
```
|
|
65
|
+
```python
|
|
66
66
|
from rusterize import rusterize
|
|
67
67
|
|
|
68
68
|
# gdf = <import/modify dataframe as needed>
|
|
69
69
|
|
|
70
70
|
# rusterize
|
|
71
|
-
rusterize(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
71
|
+
rusterize(
|
|
72
|
+
gdf,
|
|
73
|
+
like=None,
|
|
74
|
+
res=(30, 30),
|
|
75
|
+
out_shape=(10, 10),
|
|
76
|
+
extent=(0, 10, 10, 20),
|
|
77
|
+
field="field",
|
|
78
|
+
by="by",
|
|
79
|
+
burn=None,
|
|
80
|
+
fun="sum",
|
|
81
|
+
background=0,
|
|
82
|
+
dtype="uint8"
|
|
83
|
+
)
|
|
81
84
|
```
|
|
82
85
|
|
|
83
86
|
- `gdf`: geopandas dataframe to rasterize
|
|
87
|
+
- `like`: xr.DataArray to use as template for `res`, `out_shape`, and `extent`. Mutually exclusive with these parameters (default: `None`)
|
|
84
88
|
- `res`: (xres, yres) for desired resolution (default: `None`)
|
|
85
89
|
- `out_shape`: (nrows, ncols) for desired output shape (default: `None`)
|
|
86
90
|
- `extent`: (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
|
|
@@ -103,7 +107,7 @@ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
|
|
|
103
107
|
returns a dictionary that is converted to a xarray on the Python side
|
|
104
108
|
for simpliicty.
|
|
105
109
|
|
|
106
|
-
```
|
|
110
|
+
```python
|
|
107
111
|
from rusterize import rusterize
|
|
108
112
|
import geopandas as gpd
|
|
109
113
|
from shapely import wkt
|
|
@@ -144,7 +148,7 @@ plt.show()
|
|
|
144
148
|
|
|
145
149
|
**rusterize** is fast! Let’s try it on small and large datasets.
|
|
146
150
|
|
|
147
|
-
```
|
|
151
|
+
```python
|
|
148
152
|
from rusterize import rusterize
|
|
149
153
|
import geopandas as gpd
|
|
150
154
|
import requests
|
|
@@ -158,7 +162,7 @@ response = requests.get(url)
|
|
|
158
162
|
# unzip
|
|
159
163
|
with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
|
|
160
164
|
zip_ref.extractall()
|
|
161
|
-
|
|
165
|
+
|
|
162
166
|
# read
|
|
163
167
|
gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
|
|
164
168
|
|
|
@@ -168,12 +172,13 @@ gdf_small = gdf_large.iloc[:1000, :]
|
|
|
168
172
|
# rusterize at 1/6 degree resolution
|
|
169
173
|
def test_large(benchmark):
|
|
170
174
|
benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
|
|
171
|
-
|
|
175
|
+
|
|
172
176
|
def test_small(benchmark):
|
|
173
|
-
benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
|
|
177
|
+
benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
|
|
174
178
|
```
|
|
175
179
|
|
|
176
180
|
Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
|
|
181
|
+
|
|
177
182
|
```
|
|
178
183
|
pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
|
|
179
184
|
|
|
@@ -184,8 +189,10 @@ rusterize_small 0.0791 0.0899 0.0812 0.0027 0.0803 0.0020 2
|
|
|
184
189
|
rusterize_large 1.379545 1.4474 1.4006 0.0178 1.3966 0.0214 5;1 0.7140 20 1
|
|
185
190
|
-------------------------------------------------------------------------------------------------------------
|
|
186
191
|
```
|
|
192
|
+
|
|
187
193
|
And fasterize:
|
|
188
|
-
|
|
194
|
+
|
|
195
|
+
```r
|
|
189
196
|
library(sf)
|
|
190
197
|
library(raster)
|
|
191
198
|
library(fasterize)
|
|
@@ -204,13 +211,16 @@ microbenchmark(
|
|
|
204
211
|
unit='s'
|
|
205
212
|
)
|
|
206
213
|
```
|
|
214
|
+
|
|
207
215
|
```
|
|
208
216
|
Unit: seconds
|
|
209
217
|
expr min lq mean median uq max neval
|
|
210
218
|
fasterize_small 0.4741043 0.4926114 0.5191707 0.5193289 0.536741 0.5859029 20
|
|
211
219
|
fasterize_large 9.2199426 10.3595465 10.6653139 10.5369429 11.025771 11.7944567 20
|
|
212
220
|
```
|
|
221
|
+
|
|
213
222
|
And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value and pixel function `any`. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
|
|
223
|
+
|
|
214
224
|
```
|
|
215
225
|
# rusterize
|
|
216
226
|
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
@@ -228,7 +238,9 @@ Unit: seconds
|
|
|
228
238
|
Time (mean ± σ): 5.495 s ± 0.038 s [User: 4.268 s, System: 1.225 s]
|
|
229
239
|
Range (min … max): 5.452 s … 5.623 s 20 runs
|
|
230
240
|
```
|
|
241
|
+
|
|
231
242
|
In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
|
|
243
|
+
|
|
232
244
|
```
|
|
233
245
|
# rusterize
|
|
234
246
|
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
@@ -241,14 +253,17 @@ test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;
|
|
|
241
253
|
Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
|
|
242
254
|
Range (min … max): 8.658 s … 8.874 s 20 runs
|
|
243
255
|
```
|
|
256
|
+
|
|
244
257
|
# Comparison with other tools
|
|
245
258
|
|
|
246
259
|
While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type.
|
|
247
260
|
|
|
248
261
|
The following is a time comparison on a single run on the same forest stands dataset used earlier.
|
|
262
|
+
|
|
249
263
|
```
|
|
250
264
|
rusterize: 5.9 sec
|
|
251
265
|
rasterio: 68 sec (but no spatial information)
|
|
252
266
|
fasterize: 157 sec (including raster creation)
|
|
253
267
|
geocube: 260 sec (larger memory footprint)
|
|
254
268
|
```
|
|
269
|
+
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
rusterize-0.4.1.dist-info/METADATA,sha256=1kAxrvUCGH9PGUTwL0b40mas-5YyNN3w8QIIxaBtXW4,11278
|
|
2
|
+
rusterize-0.4.1.dist-info/WHEEL,sha256=fONWb1qLCPIEAjeedeEKnA5S_-BreZ13XfcyXg_9WBE,107
|
|
3
|
+
rusterize-0.4.1.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
|
|
4
|
+
rusterize/__init__.py,sha256=u5ZbtujqvgUALd446NMUP28W7csKbgg-qb9B0GRt9xU,6409
|
|
5
|
+
rusterize/rusterize.abi3.so,sha256=9WBjR0zrxTqxh7SfOdwvHNhvB1C0HPqUSvWvWXOKkMg,44951268
|
|
6
|
+
rusterize-0.4.1.dist-info/RECORD,,
|
rusterize-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
rusterize-0.4.0.dist-info/METADATA,sha256=VxdEZ9jhsBsBCzPY-yZiktAmNs4wuSWEe3GpU863CTQ,11176
|
|
2
|
-
rusterize-0.4.0.dist-info/WHEEL,sha256=p4amE_B4MOxnAxDu9Bb3ocyiWnFpjSV2f-bxUNWqaUk,107
|
|
3
|
-
rusterize-0.4.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
|
|
4
|
-
rusterize/__init__.py,sha256=TZvnGqurMBCNrnTfdtjkFhQofqUk-w7TO19JhB5m1OQ,5525
|
|
5
|
-
rusterize/rusterize.abi3.so,sha256=oztD7bn_V8HoBA4a_eEJAymIz3dEmig9CkOb65Mwnaw,41802916
|
|
6
|
-
rusterize-0.4.0.dist-info/RECORD,,
|
|
File without changes
|