rusterize 0.1.0__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rusterize might be problematic. Click here for more details.
- rusterize/__init__.py +4 -0
- rusterize/core.py +80 -0
- rusterize/rusterize.pyd +0 -0
- rusterize-0.1.0.dist-info/METADATA +240 -0
- rusterize-0.1.0.dist-info/RECORD +7 -0
- rusterize-0.1.0.dist-info/WHEEL +4 -0
- rusterize-0.1.0.dist-info/licenses/LICENSE +23 -0
rusterize/__init__.py
ADDED
rusterize/core.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
import rioxarray
|
|
8
|
+
from xarray import DataArray
|
|
9
|
+
from .rusterize import _rusterize
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def rusterize(gdf: DataFrame,
|
|
13
|
+
res: Union[Tuple[int, ...], Tuple[float, ...]],
|
|
14
|
+
field: Optional[str] = None,
|
|
15
|
+
by: Optional[str] = None,
|
|
16
|
+
fun: str = "last",
|
|
17
|
+
background: Optional[Union[int, float]] = None,
|
|
18
|
+
) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
Fast geopandas rasterization into xarray.DataArray
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
:param gdf: geopandas dataframe to rasterize.
|
|
24
|
+
:param res: tuple of (xres, yres) for rasterized data.
|
|
25
|
+
:param field: field to rasterize. Default is None.
|
|
26
|
+
:param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
|
|
27
|
+
:param fun: pixel function to use, see fasterize for options. Default is `last`.
|
|
28
|
+
:param background: background value in final raster. Default is None.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Dictionary containing rasterized geometries and spatial attributes to build a xarray.DataArray.
|
|
32
|
+
"""
|
|
33
|
+
# type checks
|
|
34
|
+
if not isinstance(gdf, DataFrame):
|
|
35
|
+
raise TypeError("Must pass a valid geopandas dataframe.")
|
|
36
|
+
if not isinstance(field, (str, type(None))):
|
|
37
|
+
raise TypeError("Must pass a valid string to field.")
|
|
38
|
+
if not isinstance(by, (str, type(None))):
|
|
39
|
+
raise TypeError("Must pass a valid string to by.")
|
|
40
|
+
if not isinstance(res, tuple):
|
|
41
|
+
raise TypeError("Must pass a valid resolution tuple (x, y).")
|
|
42
|
+
if not isinstance(fun, str):
|
|
43
|
+
raise TypeError("Must pass a valid string to pixel_fn. Select only of sum, first, last, min, max, count, or any.")
|
|
44
|
+
if not isinstance(background, (int, float, type(None))):
|
|
45
|
+
raise TypeError("Must pass a valid background type.")
|
|
46
|
+
|
|
47
|
+
# value check
|
|
48
|
+
if by and not field:
|
|
49
|
+
raise ValueError("If by is specified, field must also be specified.")
|
|
50
|
+
if len(res) != 2 or any((res[0], res[1])) <= 0 or not isinstance(res[0], type(res[1])):
|
|
51
|
+
raise ValueError("Must pass valid resolution tuple of values of consistent dtype.")
|
|
52
|
+
|
|
53
|
+
# RasterInfo
|
|
54
|
+
bounds = gdf.total_bounds
|
|
55
|
+
raster_info = {
|
|
56
|
+
"xmin": bounds[0],
|
|
57
|
+
"ymin": bounds[1],
|
|
58
|
+
"xmax": bounds[2],
|
|
59
|
+
"ymax": bounds[3],
|
|
60
|
+
"xres": res[0],
|
|
61
|
+
"yres": res[1],
|
|
62
|
+
"nrows": 0,
|
|
63
|
+
"ncols": 0
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# extract columns of interest and convert to polars
|
|
67
|
+
cols = list(set([col for col in (field, by) if col]))
|
|
68
|
+
df = pl.from_pandas(gdf[cols]) if cols else None
|
|
69
|
+
|
|
70
|
+
# rusterize
|
|
71
|
+
r = _rusterize(
|
|
72
|
+
gdf.geometry,
|
|
73
|
+
raster_info,
|
|
74
|
+
fun,
|
|
75
|
+
df,
|
|
76
|
+
field,
|
|
77
|
+
by,
|
|
78
|
+
background
|
|
79
|
+
)
|
|
80
|
+
return DataArray.from_dict(r).rio.write_crs(gdf.crs, inplace=True)
|
rusterize/rusterize.pyd
ADDED
|
Binary file
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rusterize
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
5
|
+
Classifier: Operating System :: OS Independent
|
|
6
|
+
Classifier: Programming Language :: Rust
|
|
7
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
8
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
9
|
+
Requires-Dist: geopandas >=1.0.1
|
|
10
|
+
Requires-Dist: pandas >=2.2.3
|
|
11
|
+
Requires-Dist: pyarrow >=18.1.0
|
|
12
|
+
Requires-Dist: polars >=1.19.0
|
|
13
|
+
Requires-Dist: xarray >=2025.1.1
|
|
14
|
+
Requires-Dist: rioxarray >=0.18.2
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Summary: High performance rasterization tool for Python build in Rust
|
|
17
|
+
Keywords: fast,raster
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
20
|
+
Project-URL: repository, https://github.com/ttrotto/rusterize
|
|
21
|
+
|
|
22
|
+
# rusterize
|
|
23
|
+
|
|
24
|
+
High performance rasterization tool for python built in Rust. This
|
|
25
|
+
repository is heavily based on the [**fasterize**](https://github.com/ecohealthalliance/fasterize.git) package built in C++
|
|
26
|
+
for R. This version ports it to Python with a Rust backend.
|
|
27
|
+
|
|
28
|
+
Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/)
|
|
29
|
+
dataframes and returns a [xarray](https://docs.xarray.dev/en/stable/). It
|
|
30
|
+
tighly mirrors the processing routine of fasterize, so it works only on
|
|
31
|
+
(multi)polygon geometries at the moment.
|
|
32
|
+
|
|
33
|
+
# Installation
|
|
34
|
+
|
|
35
|
+
Install the current version with pip:
|
|
36
|
+
|
|
37
|
+
``` {shell}
|
|
38
|
+
pip install rusterize
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
# Contributing
|
|
42
|
+
|
|
43
|
+
Any contribution is welcome! You can install **rusterize** directly
|
|
44
|
+
from this repo using [maturin](https://www.maturin.rs/) as an editable
|
|
45
|
+
package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
|
|
46
|
+
[cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
|
|
47
|
+
installed.
|
|
48
|
+
|
|
49
|
+
``` {shell}
|
|
50
|
+
# Clone repo
|
|
51
|
+
git clone https://github.com/<username>/rusterize.git
|
|
52
|
+
cd rusterize
|
|
53
|
+
|
|
54
|
+
# Install the Rust nightly toolchain
|
|
55
|
+
rustup toolchain install nightly-2025-01-05
|
|
56
|
+
|
|
57
|
+
# Install maturin
|
|
58
|
+
pip install maturin
|
|
59
|
+
|
|
60
|
+
# Install editable version with optmized code
|
|
61
|
+
maturin develop --profile dist-release
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
# API
|
|
65
|
+
|
|
66
|
+
This function has a simple API:
|
|
67
|
+
|
|
68
|
+
``` {shell}
|
|
69
|
+
from rusterize.core import rusterize
|
|
70
|
+
|
|
71
|
+
# gdf = <import datasets as needed>
|
|
72
|
+
|
|
73
|
+
# rusterize
|
|
74
|
+
rusterize(gdf,
|
|
75
|
+
(30, 30),
|
|
76
|
+
"field",
|
|
77
|
+
"by",
|
|
78
|
+
"sum",
|
|
79
|
+
0)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
- `gdf`: geopandas dataframe to rasterize
|
|
83
|
+
- `res`: tuple of (xres, yres) for final resolution
|
|
84
|
+
- `field`: field to rasterize. Default is None (a value of `1` is rasterized).
|
|
85
|
+
- `by`: column to rasterize. Assigns each group to a band in the
|
|
86
|
+
stack. Values are taken from `field`. Default is None
|
|
87
|
+
- `fun`: pixel function to use when multiple values overlap. Default is
|
|
88
|
+
`last`. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`
|
|
89
|
+
- `background`: background value in final raster. Default is None (NaN)
|
|
90
|
+
|
|
91
|
+
# Usage
|
|
92
|
+
|
|
93
|
+
**rusterize** consists of a single function `rusterize()`. The Rust implementation
|
|
94
|
+
returns an array that is then converted to a xarray on the Python side
|
|
95
|
+
for simpliicty.
|
|
96
|
+
|
|
97
|
+
``` python
|
|
98
|
+
from rusterize.core import rusterize
|
|
99
|
+
import geopandas as gpd
|
|
100
|
+
from shapely import wkt
|
|
101
|
+
import matplotlib.pyplot as plt
|
|
102
|
+
|
|
103
|
+
# example from fasterize
|
|
104
|
+
polygons = [
|
|
105
|
+
"POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
|
|
106
|
+
"POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
|
|
107
|
+
"POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))"
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
# Convert WKT strings to Shapely geometries
|
|
111
|
+
geometries = [wkt.loads(polygon) for polygon in polygons]
|
|
112
|
+
|
|
113
|
+
# Create a GeoDataFrame
|
|
114
|
+
gdf = gpd.GeoDataFrame({'value': range(1, len(polygons) + 1)}, geometry=geometries, crs='EPSG:32619')
|
|
115
|
+
|
|
116
|
+
# rusterize
|
|
117
|
+
output = rusterize(
|
|
118
|
+
gdf,
|
|
119
|
+
res=(1, 1),
|
|
120
|
+
field="value",
|
|
121
|
+
fun="sum"
|
|
122
|
+
).squeeze()
|
|
123
|
+
|
|
124
|
+
# plot it
|
|
125
|
+
fig, ax = plt.subplots(figsize=(12, 6))
|
|
126
|
+
output.plot.imshow(ax=ax)
|
|
127
|
+
plt.show()
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+

|
|
131
|
+
|
|
132
|
+
# Benchmarks
|
|
133
|
+
|
|
134
|
+
**fasterize** is fast and so is **rusterize**! Let’s try it on small and large
|
|
135
|
+
datasets.
|
|
136
|
+
|
|
137
|
+
``` python
|
|
138
|
+
from rusterize.core import rusterize
|
|
139
|
+
import geopandas as gpd
|
|
140
|
+
import requests
|
|
141
|
+
import zipfile
|
|
142
|
+
from io import BytesIO
|
|
143
|
+
|
|
144
|
+
# large dataset (~380 MB)
|
|
145
|
+
url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
|
|
146
|
+
response = requests.get(url)
|
|
147
|
+
|
|
148
|
+
# unzip
|
|
149
|
+
with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
|
|
150
|
+
zip_ref.extractall()
|
|
151
|
+
|
|
152
|
+
# read
|
|
153
|
+
gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
|
|
154
|
+
|
|
155
|
+
# small dataset (first 1000 rows)
|
|
156
|
+
gdf_small = gdf_large.iloc[:1000, :]
|
|
157
|
+
|
|
158
|
+
# rusterize at 1/6 degree resolution
|
|
159
|
+
def test_large(benchmark):
|
|
160
|
+
benchmark(rusterize, gdf_large, (1/6, 1/6), fun="sum")
|
|
161
|
+
|
|
162
|
+
def test_small(benchmark):
|
|
163
|
+
benchmark(rusterize, gdf_small, (1/6, 1/6), fun="sum")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Then you can run it with [pytest](https://docs.pytest.org/en/stable/)
|
|
167
|
+
and
|
|
168
|
+
[pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
|
|
169
|
+
|
|
170
|
+
``` {shell}
|
|
171
|
+
pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
|
|
172
|
+
|
|
173
|
+
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
174
|
+
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
175
|
+
-------------------------------------------------------------------------------------------------------------
|
|
176
|
+
test_large 10.5870 11.2302 10.8633 0.1508 10.8417 0.1594 4;1 0.0921 20 1
|
|
177
|
+
test_small 0.5083 0.6416 0.5265 0.0393 0.5120 0.0108 2;2 1.8995 20 1
|
|
178
|
+
-------------------------------------------------------------------------------------------------------------
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
And fasterize:
|
|
182
|
+
|
|
183
|
+
``` {r}
|
|
184
|
+
large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
|
|
185
|
+
small <- large[1:1000, ]
|
|
186
|
+
fn <- function(v) {
|
|
187
|
+
r <- raster(v, res = 1/6)
|
|
188
|
+
return(fasterize(v, r, fun = "sum"))
|
|
189
|
+
}
|
|
190
|
+
microbenchmark(
|
|
191
|
+
fasterize_large = f <- fn(large),
|
|
192
|
+
fasterize_small = f <- fn(small),
|
|
193
|
+
times=20L,
|
|
194
|
+
unit='s'
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
``` {shell}
|
|
199
|
+
Unit: seconds
|
|
200
|
+
expr min lq mean median uq max neval
|
|
201
|
+
fasterize_large 9.565781 9.815375 10.02838 9.984965 10.18532 10.66656 20
|
|
202
|
+
fasterize_small 0.469389 0.500616 0.571851 0.558818 0.613419 0.795159 20
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
And on even
|
|
206
|
+
[larger](https://open.canada.ca/data/en/dataset/fbf12500-bffe-4209-a1ae-fba86f154ebf/resource/cc90d77c-fba3-4f84-b30a-e684cfe0649a)
|
|
207
|
+
datasets? This is a benchmark with 350K+ geometries rasterized at 30
|
|
208
|
+
meters (20 rounds) with no field value and pixel function `sum`.
|
|
209
|
+
|
|
210
|
+
``` {shell}
|
|
211
|
+
# rusterize
|
|
212
|
+
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
213
|
+
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
214
|
+
-------------------------------------------------------------------------------------------------------------
|
|
215
|
+
test_sbw 46.5711 49.0212 48.4340 0.5504 48.5812 0.5054 3;1 0.0206 20 1
|
|
216
|
+
-------------------------------------------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
# fasterize
|
|
219
|
+
Unit: seconds
|
|
220
|
+
expr min lq mean median uq max neval
|
|
221
|
+
fasterize 62.12409 72.13832 74.53424 75.12375 77.72899 84.77415 20
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
# Comparison with other tools
|
|
225
|
+
|
|
226
|
+
While `rusterize` is fast, there are other very fast solutions out there, including
|
|
227
|
+
- `GDAL`
|
|
228
|
+
- `rasterio`
|
|
229
|
+
- `geocube`
|
|
230
|
+
|
|
231
|
+
However, `rusterize` allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing.
|
|
232
|
+
|
|
233
|
+
The following is a time comparison run on a dataset with 340K+ geometries, rasterized at 2m resolution.
|
|
234
|
+
```
|
|
235
|
+
rusterize: 24 sec
|
|
236
|
+
fasterize: 47 sec
|
|
237
|
+
GDAL (cli): 40 sec (read from fast drive, write to fast drive)
|
|
238
|
+
rasterio: 20 sec (but no spatial information)
|
|
239
|
+
geocube: 42 sec (larger memory footprint)
|
|
240
|
+
```
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
rusterize-0.1.0.dist-info/METADATA,sha256=vBY0cdi3hAcyq6AJ8Tb7F4wRf3ftfSVT3CAQTsZngoA,8156
|
|
2
|
+
rusterize-0.1.0.dist-info/WHEEL,sha256=OP9M1tCSrnmep3a1BuV3EynKyOuGWiyM2EjKtCEnCOs,95
|
|
3
|
+
rusterize-0.1.0.dist-info/licenses/LICENSE,sha256=FXkix0amECHul0Y2qWBXnEGNV2fd8GuVCIZuuzQwR-c,1130
|
|
4
|
+
rusterize/core.py,sha256=wcDZBkJOOCZz7zEAT3DvtKQx1NMQTb9IHhthYrCSqGE,2940
|
|
5
|
+
rusterize/__init__.py,sha256=rQSJ7V7ykrsuWz-cQK5Dm9E7usCYmCD3dIUrnosWABc,105
|
|
6
|
+
rusterize/rusterize.pyd,sha256=q8Fk-VlojeHk8N9-E8rhVc8ULdqSGkqQj4b4iLMjqnw,37707776
|
|
7
|
+
rusterize-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tommaso Trotto
|
|
4
|
+
Copyright (c) 2017 EcoHealth Alliance
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
7
|
+
a copy of this software and associated documentation files (the
|
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
12
|
+
the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be
|
|
15
|
+
included in all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
21
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
22
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
23
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|