rusterize 0.4.0__cp311-abi3-manylinux_2_28_ppc64le.whl → 0.4.1__cp311-abi3-manylinux_2_28_ppc64le.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rusterize might be problematic. Click here for more details.

rusterize/__init__.py CHANGED
@@ -1,14 +1,15 @@
1
1
  from __future__ import annotations
2
- import importlib.metadata
3
2
 
3
+ import importlib.metadata
4
4
  from types import NoneType
5
5
  from typing import List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import polars as pl
9
- from geopandas import GeoDataFrame
10
9
  import rioxarray
10
+ from geopandas import GeoDataFrame
11
11
  from xarray import DataArray
12
+
12
13
  from .rusterize import _rusterize
13
14
 
14
15
  __version__ = importlib.metadata.version("rusterize")
@@ -16,6 +17,7 @@ __version__ = importlib.metadata.version("rusterize")
16
17
 
17
18
  def rusterize(
18
19
  gdf: GeoDataFrame,
20
+ like: DataArray | None = None,
19
21
  res: Tuple | List | None = None,
20
22
  out_shape: Tuple | List | None = None,
21
23
  extent: Tuple | List | None = None,
@@ -31,6 +33,7 @@ def rusterize(
31
33
 
32
34
  Args:
33
35
  :param gdf: geopandas dataframe to rasterize.
36
+ :param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
34
37
  :param res: (xres, yres) for rasterized data.
35
38
  :param out_shape: (nrows, ncols) for regularized output shape.
36
39
  :param extent: (xmin, xmax, ymin, ymax) for regularized extent.
@@ -46,17 +49,20 @@ def rusterize(
46
49
 
47
50
  Notes:
48
51
  When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
52
+ If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
49
53
  Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
50
54
  The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
51
-
55
+
52
56
  If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
53
-
57
+
54
58
  A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
55
59
  that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
56
60
  """
57
61
  # type checks
58
62
  if not isinstance(gdf, GeoDataFrame):
59
63
  raise TypeError("`gdf` must be a geopandas dataframe.")
64
+ if not isinstance(like, (DataArray, NoneType)):
65
+ raise TypeError("`'ike' must be a xarray.DataArray")
60
66
  if not isinstance(res, (tuple, list, NoneType)):
61
67
  raise TypeError("`resolution` must be a tuple or list of (x, y).")
62
68
  if not isinstance(out_shape, (tuple, list, NoneType)):
@@ -74,26 +80,39 @@ def rusterize(
74
80
  if not isinstance(background, (int, float, NoneType)):
75
81
  raise TypeError("`background` must be integer, float, or None.")
76
82
  if not isinstance(dtype, str):
77
- raise TypeError("`dtype` must be a one of uint8, uint16, uint32, uint64, int8, int16, int32, int64, float32, float64")
78
-
79
- # value checks
80
- if not res and not out_shape and not extent:
81
- raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
82
- if extent and not res and not out_shape:
83
- raise ValueError("Must also specify `res` or `out_shape` with extent.")
84
- if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
85
- raise ValueError("Resolution must be 2 positive numbers.")
86
- if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
87
- raise ValueError("Output shape must be 2 positive integers.")
88
- if extent and len(extent) != 4:
89
- raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
83
+ raise TypeError(
84
+ "`dtype` must be a one of uint8, uint16, uint32, uint64, int8, int16, int32, int64, float32, float64"
85
+ )
86
+
87
+ # value checks and defaults
90
88
  if field and burn:
91
89
  raise ValueError("Only one of `field` or `burn` can be specified.")
90
+ if like is not None:
91
+ if any((res, out_shape, extent)):
92
+ raise ValueError("`like` is mutually exclusive with `res`, `out_shape`, and `extent`.")
93
+ else:
94
+ affine = like.rio.transform()
95
+ _res = (affine.a, abs(affine.e))
96
+ _shape = like.squeeze().shape
97
+ _bounds, _has_extent = like.rio.bounds(), True
98
+ else:
99
+ if not res and not out_shape and not extent:
100
+ raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
101
+ if extent and not res and not out_shape:
102
+ raise ValueError("Must also specify `res` or `out_shape` with extent.")
103
+ if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
104
+ raise ValueError("`res` must be 2 positive numbers.")
105
+ if out_shape and (
106
+ len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)
107
+ ):
108
+ raise ValueError("`out_shape` must be 2 positive integers.")
109
+ if extent and len(extent) != 4:
110
+ raise ValueError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
92
111
 
93
- # defaults
94
- _res = res if res else (0, 0)
95
- _shape = out_shape if out_shape else (0, 0)
96
- (_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
112
+ # defaults
113
+ _res = res if res else (0, 0)
114
+ _shape = out_shape if out_shape else (0, 0)
115
+ (_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
97
116
 
98
117
  # RasterInfo
99
118
  raster_info = {
@@ -113,7 +132,7 @@ def rusterize(
113
132
  try:
114
133
  df = pl.from_pandas(gdf[cols]) if cols else None
115
134
  except KeyError as e:
116
- raise KeyError("Column not found in GeoDataFrame") from e
135
+ raise KeyError("Column not found in GeoDataFrame.") from e
117
136
 
118
137
  # rusterize
119
138
  r = _rusterize(gdf.geometry, raster_info, fun, df, field, by, burn, background, dtype)
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rusterize
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Classifier: License :: OSI Approved :: MIT License
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Rust
@@ -25,13 +25,13 @@ High performance rasterization tool for Python built in Rust. This
25
25
  repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
26
  for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements (see [API](#API)).
27
27
 
28
- **rusterize** is designed to work on *(multi)polygons* and *(multi)linestrings*, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
28
+ **rusterize** is designed to work on _(multi)polygons_ and _(multi)linestrings_, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
29
29
 
30
30
  # Installation
31
31
 
32
32
  Install the current version with pip:
33
33
 
34
- ``` shell
34
+ ```shell
35
35
  pip install rusterize
36
36
  ```
37
37
 
@@ -43,7 +43,7 @@ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.or
43
43
  [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
44
44
  installed.
45
45
 
46
- ``` shell
46
+ ```shell
47
47
  # Clone repo
48
48
  git clone https://github.com/<username>/rusterize.git
49
49
  cd rusterize
@@ -62,25 +62,29 @@ maturin develop --profile dist-release
62
62
 
63
63
  This package has a simple API:
64
64
 
65
- ``` python
65
+ ```python
66
66
  from rusterize import rusterize
67
67
 
68
68
  # gdf = <import/modify dataframe as needed>
69
69
 
70
70
  # rusterize
71
- rusterize(gdf,
72
- res=(30, 30),
73
- out_shape=(10, 10)
74
- extent=(0, 10, 10, 20)
75
- field="field",
76
- by="by",
77
- burn=None,
78
- fun="sum",
79
- background=0,
80
- dtype="uint8")
71
+ rusterize(
72
+ gdf,
73
+ like=None,
74
+ res=(30, 30),
75
+ out_shape=(10, 10),
76
+ extent=(0, 10, 10, 20),
77
+ field="field",
78
+ by="by",
79
+ burn=None,
80
+ fun="sum",
81
+ background=0,
82
+ dtype="uint8"
83
+ )
81
84
  ```
82
85
 
83
86
  - `gdf`: geopandas dataframe to rasterize
87
+ - `like`: xr.DataArray to use as template for `res`, `out_shape`, and `extent`. Mutually exclusive with these parameters (default: `None`)
84
88
  - `res`: (xres, yres) for desired resolution (default: `None`)
85
89
  - `out_shape`: (nrows, ncols) for desired output shape (default: `None`)
86
90
  - `extent`: (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
@@ -103,7 +107,7 @@ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
103
107
  returns a dictionary that is converted to a xarray on the Python side
104
108
  for simpliicty.
105
109
 
106
- ``` python
110
+ ```python
107
111
  from rusterize import rusterize
108
112
  import geopandas as gpd
109
113
  from shapely import wkt
@@ -144,7 +148,7 @@ plt.show()
144
148
 
145
149
  **rusterize** is fast! Let’s try it on small and large datasets.
146
150
 
147
- ``` python
151
+ ```python
148
152
  from rusterize import rusterize
149
153
  import geopandas as gpd
150
154
  import requests
@@ -158,7 +162,7 @@ response = requests.get(url)
158
162
  # unzip
159
163
  with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
160
164
  zip_ref.extractall()
161
-
165
+
162
166
  # read
163
167
  gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
164
168
 
@@ -168,12 +172,13 @@ gdf_small = gdf_large.iloc[:1000, :]
168
172
  # rusterize at 1/6 degree resolution
169
173
  def test_large(benchmark):
170
174
  benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
171
-
175
+
172
176
  def test_small(benchmark):
173
- benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
177
+ benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
174
178
  ```
175
179
 
176
180
  Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
181
+
177
182
  ```
178
183
  pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
179
184
 
@@ -184,8 +189,10 @@ rusterize_small 0.0791 0.0899 0.0812 0.0027 0.0803 0.0020 2
184
189
  rusterize_large 1.379545 1.4474 1.4006 0.0178 1.3966 0.0214 5;1 0.7140 20 1
185
190
  -------------------------------------------------------------------------------------------------------------
186
191
  ```
192
+
187
193
  And fasterize:
188
- ``` r
194
+
195
+ ```r
189
196
  library(sf)
190
197
  library(raster)
191
198
  library(fasterize)
@@ -204,13 +211,16 @@ microbenchmark(
204
211
  unit='s'
205
212
  )
206
213
  ```
214
+
207
215
  ```
208
216
  Unit: seconds
209
217
  expr min lq mean median uq max neval
210
218
  fasterize_small 0.4741043 0.4926114 0.5191707 0.5193289 0.536741 0.5859029 20
211
219
  fasterize_large 9.2199426 10.3595465 10.6653139 10.5369429 11.025771 11.7944567 20
212
220
  ```
221
+
213
222
  And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value and pixel function `any`. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
223
+
214
224
  ```
215
225
  # rusterize
216
226
  --------------------------------------------- benchmark: 1 tests --------------------------------------------
@@ -228,7 +238,9 @@ Unit: seconds
228
238
  Time (mean ± σ): 5.495 s ± 0.038 s [User: 4.268 s, System: 1.225 s]
229
239
  Range (min … max): 5.452 s … 5.623 s 20 runs
230
240
  ```
241
+
231
242
  In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
243
+
232
244
  ```
233
245
  # rusterize
234
246
  --------------------------------------------- benchmark: 1 tests --------------------------------------------
@@ -241,14 +253,17 @@ test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;
241
253
  Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
242
254
  Range (min … max): 8.658 s … 8.874 s 20 runs
243
255
  ```
256
+
244
257
  # Comparison with other tools
245
258
 
246
259
  While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type.
247
260
 
248
261
  The following is a time comparison on a single run on the same forest stands dataset used earlier.
262
+
249
263
  ```
250
264
  rusterize: 5.9 sec
251
265
  rasterio: 68 sec (but no spatial information)
252
266
  fasterize: 157 sec (including raster creation)
253
267
  geocube: 260 sec (larger memory footprint)
254
268
  ```
269
+
@@ -0,0 +1,6 @@
1
+ rusterize-0.4.1.dist-info/METADATA,sha256=1kAxrvUCGH9PGUTwL0b40mas-5YyNN3w8QIIxaBtXW4,11278
2
+ rusterize-0.4.1.dist-info/WHEEL,sha256=LyKrmraG_uSXY5vhpX0CJs5HieuGNOxWFsl0o305mFk,108
3
+ rusterize-0.4.1.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
4
+ rusterize/__init__.py,sha256=u5ZbtujqvgUALd446NMUP28W7csKbgg-qb9B0GRt9xU,6409
5
+ rusterize/rusterize.abi3.so,sha256=gqoznfyypqLHCu_AvHpItWcDS51oiMVMvA68XPjThRQ,52167072
6
+ rusterize-0.4.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.3)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-manylinux_2_28_ppc64le
@@ -1,6 +0,0 @@
1
- rusterize-0.4.0.dist-info/METADATA,sha256=VxdEZ9jhsBsBCzPY-yZiktAmNs4wuSWEe3GpU863CTQ,11176
2
- rusterize-0.4.0.dist-info/WHEEL,sha256=MGaQjBHAphO6IVR063SFNWIT-ZlGUh0_fKN0-9trTkw,108
3
- rusterize-0.4.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
4
- rusterize/__init__.py,sha256=TZvnGqurMBCNrnTfdtjkFhQofqUk-w7TO19JhB5m1OQ,5525
5
- rusterize/rusterize.abi3.so,sha256=8XDkTy4fMGQeErO_QYQgg50UcmAgX_v-u54LHUMcgyo,48538216
6
- rusterize-0.4.0.dist-info/RECORD,,