rusterize 0.1.0__cp310-abi3-musllinux_1_2_x86_64.whl → 0.2.0__cp310-abi3-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rusterize might be problematic. Click here for more details.
- rusterize/core.py +51 -24
- rusterize/rusterize.abi3.so +0 -0
- {rusterize-0.1.0.dist-info → rusterize-0.2.0.dist-info}/METADATA +49 -40
- rusterize-0.2.0.dist-info/RECORD +8 -0
- {rusterize-0.1.0.dist-info → rusterize-0.2.0.dist-info}/WHEEL +1 -1
- rusterize-0.1.0.dist-info/RECORD +0 -8
- {rusterize-0.1.0.dist-info → rusterize-0.2.0.dist-info}/licenses/LICENSE +0 -0
rusterize/core.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from types import NoneType
|
|
3
4
|
from typing import Any, Dict, Optional, Tuple, Union
|
|
4
5
|
|
|
5
6
|
import polars as pl
|
|
@@ -10,7 +11,9 @@ from .rusterize import _rusterize
|
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def rusterize(gdf: DataFrame,
|
|
13
|
-
res: Union[Tuple[int, ...], Tuple[float, ...]],
|
|
14
|
+
res: Optional[Union[Tuple[int, ...], Tuple[float, ...]]] = None,
|
|
15
|
+
out_shape: Optional[Union[Tuple[int, ...]]] = None,
|
|
16
|
+
extent: Optional[Union[Tuple[int, ...], Tuple[float, ...]]] = None,
|
|
14
17
|
field: Optional[str] = None,
|
|
15
18
|
by: Optional[str] = None,
|
|
16
19
|
fun: str = "last",
|
|
@@ -20,47 +23,71 @@ def rusterize(gdf: DataFrame,
|
|
|
20
23
|
Fast geopandas rasterization into xarray.DataArray
|
|
21
24
|
|
|
22
25
|
Args:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
:param gdf: geopandas dataframe to rasterize.
|
|
27
|
+
:param res: tuple of (xres, yres) for rasterized data.
|
|
28
|
+
:param out_shape: tuple of (nrows, ncols) for regularized output shape.
|
|
29
|
+
:param extent: tuple of (xmin, xmax, ymin, ymax) for regularized extent.
|
|
30
|
+
:param field: field to rasterize. Default is None.
|
|
31
|
+
:param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
|
|
32
|
+
:param fun: pixel function to use, see fasterize for options. Default is `last`.
|
|
33
|
+
:param background: background value in final raster. Default is None.
|
|
29
34
|
|
|
30
35
|
Returns:
|
|
31
|
-
|
|
36
|
+
Rasterized xarray.DataArray.
|
|
37
|
+
|
|
38
|
+
Note:
|
|
39
|
+
When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
|
|
40
|
+
Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
|
|
41
|
+
The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
|
|
32
42
|
"""
|
|
33
43
|
# type checks
|
|
34
44
|
if not isinstance(gdf, DataFrame):
|
|
35
45
|
raise TypeError("Must pass a valid geopandas dataframe.")
|
|
36
|
-
if not isinstance(
|
|
46
|
+
if not isinstance(res, (tuple, NoneType)):
|
|
47
|
+
raise TypeError("Must pass a valid resolution tuple (x, y).")
|
|
48
|
+
if not isinstance(out_shape, (tuple, NoneType)):
|
|
49
|
+
raise TypeError("Must pass a valid output shape tuple (nrows, ncols).")
|
|
50
|
+
if not isinstance(extent, (tuple, NoneType)):
|
|
51
|
+
raise TypeError("Must pass a valid extent tuple (xmin, ymin, xmax, ymax).")
|
|
52
|
+
if not isinstance(field, (str, NoneType)):
|
|
37
53
|
raise TypeError("Must pass a valid string to field.")
|
|
38
|
-
if not isinstance(by, (str,
|
|
54
|
+
if not isinstance(by, (str, NoneType)):
|
|
39
55
|
raise TypeError("Must pass a valid string to by.")
|
|
40
|
-
if not isinstance(res, tuple):
|
|
41
|
-
raise TypeError("Must pass a valid resolution tuple (x, y).")
|
|
42
56
|
if not isinstance(fun, str):
|
|
43
57
|
raise TypeError("Must pass a valid string to pixel_fn. Select only of sum, first, last, min, max, count, or any.")
|
|
44
|
-
if not isinstance(background, (int, float,
|
|
58
|
+
if not isinstance(background, (int, float, NoneType)):
|
|
45
59
|
raise TypeError("Must pass a valid background type.")
|
|
46
60
|
|
|
47
61
|
# value check
|
|
62
|
+
if not res and not out_shape and not extent:
|
|
63
|
+
raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
|
|
64
|
+
if extent and not res and not out_shape:
|
|
65
|
+
raise ValueError("Must also specify `res` or `out_shape` with extent.")
|
|
66
|
+
if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
|
|
67
|
+
raise ValueError("Resolution must be 2 positive numbers.")
|
|
68
|
+
if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
|
|
69
|
+
raise ValueError("Output shape must be 2 positive integers.")
|
|
70
|
+
if extent and len(extent) != 4:
|
|
71
|
+
raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
|
|
48
72
|
if by and not field:
|
|
49
73
|
raise ValueError("If by is specified, field must also be specified.")
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
|
|
75
|
+
# defaults
|
|
76
|
+
_res = res if res else (0, 0)
|
|
77
|
+
_shape = out_shape if out_shape else (0, 0)
|
|
78
|
+
(_bounds, has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
|
|
52
79
|
|
|
53
80
|
# RasterInfo
|
|
54
|
-
bounds = gdf.total_bounds
|
|
55
81
|
raster_info = {
|
|
56
|
-
"xmin":
|
|
57
|
-
"ymin":
|
|
58
|
-
"xmax":
|
|
59
|
-
"ymax":
|
|
60
|
-
"xres":
|
|
61
|
-
"yres":
|
|
62
|
-
"nrows": 0,
|
|
63
|
-
"ncols":
|
|
82
|
+
"xmin": _bounds[0],
|
|
83
|
+
"ymin": _bounds[1],
|
|
84
|
+
"xmax": _bounds[2],
|
|
85
|
+
"ymax": _bounds[3],
|
|
86
|
+
"xres": _res[0],
|
|
87
|
+
"yres": _res[1],
|
|
88
|
+
"nrows": _shape[0],
|
|
89
|
+
"ncols": _shape[1],
|
|
90
|
+
"has_extent": has_extent
|
|
64
91
|
}
|
|
65
92
|
|
|
66
93
|
# extract columns of interest and convert to polars
|
rusterize/rusterize.abi3.so
CHANGED
|
Binary file
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rusterize
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Classifier: License :: OSI Approved :: MIT License
|
|
5
5
|
Classifier: Operating System :: OS Independent
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
7
7
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
8
8
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
9
|
-
Requires-Dist: geopandas
|
|
10
|
-
Requires-Dist: pandas
|
|
11
|
-
Requires-Dist: pyarrow
|
|
12
|
-
Requires-Dist: polars
|
|
13
|
-
Requires-Dist: xarray
|
|
14
|
-
Requires-Dist: rioxarray
|
|
9
|
+
Requires-Dist: geopandas>=1.0.1
|
|
10
|
+
Requires-Dist: pandas>=2.2.3
|
|
11
|
+
Requires-Dist: pyarrow>=18.1.0
|
|
12
|
+
Requires-Dist: polars>=1.19.0
|
|
13
|
+
Requires-Dist: xarray>=2025.1.1
|
|
14
|
+
Requires-Dist: rioxarray>=0.18.2
|
|
15
15
|
License-File: LICENSE
|
|
16
|
-
Summary: High performance rasterization tool for Python
|
|
16
|
+
Summary: High performance rasterization tool for Python built in Rust
|
|
17
17
|
Keywords: fast,raster
|
|
18
18
|
Requires-Python: >=3.10
|
|
19
19
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
@@ -21,20 +21,17 @@ Project-URL: repository, https://github.com/ttrotto/rusterize
|
|
|
21
21
|
|
|
22
22
|
# rusterize
|
|
23
23
|
|
|
24
|
-
High performance rasterization tool for
|
|
25
|
-
repository
|
|
26
|
-
for R
|
|
24
|
+
High performance rasterization tool for Python built in Rust. This
|
|
25
|
+
repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
|
|
26
|
+
for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements.
|
|
27
27
|
|
|
28
|
-
Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/)
|
|
29
|
-
dataframes and returns a [xarray](https://docs.xarray.dev/en/stable/). It
|
|
30
|
-
tighly mirrors the processing routine of fasterize, so it works only on
|
|
31
|
-
(multi)polygon geometries at the moment.
|
|
28
|
+
**rusterize** is designed to work on *(multi)polygons* and *(multi)linestrings*. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
|
|
32
29
|
|
|
33
30
|
# Installation
|
|
34
31
|
|
|
35
32
|
Install the current version with pip:
|
|
36
33
|
|
|
37
|
-
```
|
|
34
|
+
``` shell
|
|
38
35
|
pip install rusterize
|
|
39
36
|
```
|
|
40
37
|
|
|
@@ -46,7 +43,7 @@ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.or
|
|
|
46
43
|
[cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
|
|
47
44
|
installed.
|
|
48
45
|
|
|
49
|
-
```
|
|
46
|
+
``` shell
|
|
50
47
|
# Clone repo
|
|
51
48
|
git clone https://github.com/<username>/rusterize.git
|
|
52
49
|
cd rusterize
|
|
@@ -65,33 +62,43 @@ maturin develop --profile dist-release
|
|
|
65
62
|
|
|
66
63
|
This function has a simple API:
|
|
67
64
|
|
|
68
|
-
```
|
|
65
|
+
``` python
|
|
69
66
|
from rusterize.core import rusterize
|
|
70
67
|
|
|
71
|
-
# gdf = <import
|
|
68
|
+
# gdf = <import/modify dataframe as needed>
|
|
72
69
|
|
|
73
70
|
# rusterize
|
|
74
71
|
rusterize(gdf,
|
|
75
|
-
(30, 30),
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
"
|
|
79
|
-
|
|
72
|
+
res=(30, 30),
|
|
73
|
+
out_shape=(10, 10)
|
|
74
|
+
extent=(0, 300, 0, 300)
|
|
75
|
+
field="field",
|
|
76
|
+
by="by",
|
|
77
|
+
fun="sum",
|
|
78
|
+
background=0)
|
|
80
79
|
```
|
|
81
80
|
|
|
82
81
|
- `gdf`: geopandas dataframe to rasterize
|
|
83
|
-
- `res`: tuple of (xres, yres) for
|
|
82
|
+
- `res`: tuple of (xres, yres) for desired resolution
|
|
83
|
+
- `out_shape`: tuple of (nrows, ncols) for desired output shape
|
|
84
|
+
- `extent`: tuple of (xmin, ymin, xmax, ymax) for desired output extent
|
|
84
85
|
- `field`: field to rasterize. Default is None (a value of `1` is rasterized).
|
|
85
86
|
- `by`: column to rasterize. Assigns each group to a band in the
|
|
86
|
-
stack. Values are taken from `field`. Default is None
|
|
87
|
+
stack. Values are taken from `field`. Default is None (singleband raster)
|
|
87
88
|
- `fun`: pixel function to use when multiple values overlap. Default is
|
|
88
89
|
`last`. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`
|
|
89
90
|
- `background`: background value in final raster. Default is None (NaN)
|
|
90
91
|
|
|
92
|
+
Note that control over the desired extent is not as strict as for resolution and shape. That is,
|
|
93
|
+
when resolution, output shape, and extent are specified, priority is given to resolution and shape.
|
|
94
|
+
So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
|
|
95
|
+
from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
|
|
96
|
+
shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
|
|
97
|
+
|
|
91
98
|
# Usage
|
|
92
99
|
|
|
93
100
|
**rusterize** consists of a single function `rusterize()`. The Rust implementation
|
|
94
|
-
returns an array that is
|
|
101
|
+
returns an array that is converted to a xarray on the Python side
|
|
95
102
|
for simpliicty.
|
|
96
103
|
|
|
97
104
|
``` python
|
|
@@ -101,17 +108,18 @@ from shapely import wkt
|
|
|
101
108
|
import matplotlib.pyplot as plt
|
|
102
109
|
|
|
103
110
|
# example from fasterize
|
|
104
|
-
|
|
111
|
+
geoms = [
|
|
105
112
|
"POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
|
|
106
113
|
"POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
|
|
107
|
-
"POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))"
|
|
114
|
+
"POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
|
|
115
|
+
"MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))"
|
|
108
116
|
]
|
|
109
117
|
|
|
110
118
|
# Convert WKT strings to Shapely geometries
|
|
111
|
-
geometries = [wkt.loads(
|
|
119
|
+
geometries = [wkt.loads(geom) for geom in geoms]
|
|
112
120
|
|
|
113
121
|
# Create a GeoDataFrame
|
|
114
|
-
gdf = gpd.GeoDataFrame({'value': range(1, len(
|
|
122
|
+
gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
|
|
115
123
|
|
|
116
124
|
# rusterize
|
|
117
125
|
output = rusterize(
|
|
@@ -127,12 +135,11 @@ output.plot.imshow(ax=ax)
|
|
|
127
135
|
plt.show()
|
|
128
136
|
```
|
|
129
137
|
|
|
130
|
-

|
|
131
139
|
|
|
132
140
|
# Benchmarks
|
|
133
141
|
|
|
134
|
-
**
|
|
135
|
-
datasets.
|
|
142
|
+
**rusterize** is fast! Let’s try it on small and large datasets.
|
|
136
143
|
|
|
137
144
|
``` python
|
|
138
145
|
from rusterize.core import rusterize
|
|
@@ -167,7 +174,7 @@ Then you can run it with [pytest](https://docs.pytest.org/en/stable/)
|
|
|
167
174
|
and
|
|
168
175
|
[pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
|
|
169
176
|
|
|
170
|
-
```
|
|
177
|
+
```
|
|
171
178
|
pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
|
|
172
179
|
|
|
173
180
|
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
@@ -180,7 +187,7 @@ test_small 0.5083 0.6416 0.5265 0.0393 0.5120 0.0108 2;
|
|
|
180
187
|
|
|
181
188
|
And fasterize:
|
|
182
189
|
|
|
183
|
-
```
|
|
190
|
+
``` r
|
|
184
191
|
large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
|
|
185
192
|
small <- large[1:1000, ]
|
|
186
193
|
fn <- function(v) {
|
|
@@ -195,7 +202,7 @@ microbenchmark(
|
|
|
195
202
|
)
|
|
196
203
|
```
|
|
197
204
|
|
|
198
|
-
```
|
|
205
|
+
```
|
|
199
206
|
Unit: seconds
|
|
200
207
|
expr min lq mean median uq max neval
|
|
201
208
|
fasterize_large 9.565781 9.815375 10.02838 9.984965 10.18532 10.66656 20
|
|
@@ -207,7 +214,7 @@ And on even
|
|
|
207
214
|
datasets? This is a benchmark with 350K+ geometries rasterized at 30
|
|
208
215
|
meters (20 rounds) with no field value and pixel function `sum`.
|
|
209
216
|
|
|
210
|
-
```
|
|
217
|
+
```
|
|
211
218
|
# rusterize
|
|
212
219
|
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
213
220
|
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
@@ -221,14 +228,16 @@ Unit: seconds
|
|
|
221
228
|
fasterize 62.12409 72.13832 74.53424 75.12375 77.72899 84.77415 20
|
|
222
229
|
```
|
|
223
230
|
|
|
231
|
+
In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing water courses for a total of ~4.5 million multilinestrings.
|
|
232
|
+
|
|
224
233
|
# Comparison with other tools
|
|
225
234
|
|
|
226
|
-
While
|
|
235
|
+
While **rusterize** is fast, there are other fast alternatives out there, including:
|
|
227
236
|
- `GDAL`
|
|
228
237
|
- `rasterio`
|
|
229
238
|
- `geocube`
|
|
230
239
|
|
|
231
|
-
However,
|
|
240
|
+
However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, and extent.
|
|
232
241
|
|
|
233
242
|
The following is a time comparison run on a dataset with 340K+ geometries, rasterized at 2m resolution.
|
|
234
243
|
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
rusterize-0.2.0.dist-info/METADATA,sha256=m_0TCBYklNLCwsmLrN1Cd1lWP-zdxFikhL-HvxvsdL8,9035
|
|
2
|
+
rusterize-0.2.0.dist-info/WHEEL,sha256=PGZoSbdnEvAPi3yZfEu0AvlQPB3tzKKAO5uUe8Ufh0A,106
|
|
3
|
+
rusterize-0.2.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
|
|
4
|
+
rusterize.libs/libgcc_s-1e52349c.so.1,sha256=Ani0u_W_tB8ESsFePO4D2mn2lrgWLhyFdw6RETxBTYM,437537
|
|
5
|
+
rusterize/__init__.py,sha256=OymrFdgWCN3VMuSM3oXoGKeagAd-5ayPsYyXnQ_HsDE,101
|
|
6
|
+
rusterize/core.py,sha256=9nsqL-v5Bw5DezS_kpnDx1eQHooJr6zK1TpWSKxJT94,4574
|
|
7
|
+
rusterize/rusterize.abi3.so,sha256=dHXG1AwCCNjRTdUHk6fa0Mr7sv-Lyzh-3aBVPKDurq8,35676345
|
|
8
|
+
rusterize-0.2.0.dist-info/RECORD,,
|
rusterize-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
rusterize-0.1.0.dist-info/METADATA,sha256=pS1UlizVrXEDCis4_jefaIP1bDrZH5lJGquhtWwM69g,7938
|
|
2
|
-
rusterize-0.1.0.dist-info/WHEEL,sha256=mUAOKVF4E29A5oud-Hdyi-U210B5mh3h4Z9sZAEXcHA,106
|
|
3
|
-
rusterize-0.1.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
|
|
4
|
-
rusterize.libs/libgcc_s-1e52349c.so.1,sha256=Ani0u_W_tB8ESsFePO4D2mn2lrgWLhyFdw6RETxBTYM,437537
|
|
5
|
-
rusterize/__init__.py,sha256=OymrFdgWCN3VMuSM3oXoGKeagAd-5ayPsYyXnQ_HsDE,101
|
|
6
|
-
rusterize/core.py,sha256=eSBSQMebf3vELR6FHvmL7j-66mnWcRDXiow-flG5H8M,2861
|
|
7
|
-
rusterize/rusterize.abi3.so,sha256=QbeGUWs-adE5EkwW7_ztgiyOzZNv6k2zsSWOuJkz5nw,35573913
|
|
8
|
-
rusterize-0.1.0.dist-info/RECORD,,
|
|
File without changes
|