rusterize 0.5.0__cp311-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rusterize/__init__.py ADDED
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.metadata
4
+ from types import NoneType
5
+ from typing import TYPE_CHECKING, List, Tuple
6
+
7
+ import numpy as np
8
+ import polars as pl
9
+ from geopandas import GeoDataFrame
10
+ from xarray import DataArray, Dataset
11
+
12
+ # if TYPE_CHECKING:
13
+ from .rusterize import _rusterize
14
+
15
+ if TYPE_CHECKING:
16
+ from .rusterize import SparseArray
17
+
18
+ __version__ = importlib.metadata.version("rusterize")
19
+
20
+
21
+ def rusterize(
22
+ gdf: GeoDataFrame,
23
+ like: DataArray | Dataset | None = None,
24
+ res: Tuple | List | None = None,
25
+ out_shape: Tuple | List | None = None,
26
+ extent: Tuple | List | None = None,
27
+ field: str | None = None,
28
+ by: str | None = None,
29
+ burn: int | float | None = None,
30
+ fun: str = "last",
31
+ background: int | float | None = np.nan,
32
+ encoding: str = "dense",
33
+ dtype: str = "float64",
34
+ ) -> DataArray | SparseArray:
35
+ """
36
+ Fast geopandas rasterization into xarray.DataArray
37
+
38
+ Args:
39
+ :param gdf: geopandas dataframe to rasterize.
40
+ :param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
41
+ :param res: (xres, yres) for rasterized data.
42
+ :param out_shape: (nrows, ncols) for regularized output shape.
43
+ :param extent: (xmin, xmax, ymin, ymax) for regularized extent.
44
+ :param field: field to rasterize, mutually exclusive with `burn`. Default is None.
45
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
46
+ :param burn: burn a value onto the raster, mutually exclusive with `field`. Default is None.
47
+ :param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
48
+ :param background: background value in final raster. Default is np.nan.
49
+ :param encoding: return a dense array (burned geometries onto a raster) or a sparse array in COOrdinate format (coordinates and values of the rasterized geometries). Default is `dense`.
50
+ :param dtype: specify the output dtype. Default is `float64`.
51
+
52
+ Returns:
53
+ Rasterized xarray.DataArray in dense or COO sparse format.
54
+
55
+ Notes:
56
+ When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
57
+ If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
58
+ Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
59
+ The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
60
+
61
+ If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
62
+
63
+ A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
64
+ that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
65
+ """
66
+ # type checks
67
+ if not isinstance(gdf, GeoDataFrame):
68
+ raise TypeError("`gdf` must be a geopandas dataframe.")
69
+ if not isinstance(like, (DataArray, Dataset, NoneType)):
70
+ raise TypeError("`like' must be a xarray.DataArray or xarray.Dataset")
71
+ if not isinstance(res, (tuple, list, NoneType)):
72
+ raise TypeError("`resolution` must be a tuple or list of (x, y).")
73
+ if not isinstance(out_shape, (tuple, list, NoneType)):
74
+ raise TypeError("`out_shape` must be a tuple or list of (nrows, ncols).")
75
+ if not isinstance(extent, (tuple, list, NoneType)):
76
+ raise TypeError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
77
+ if not isinstance(field, (str, NoneType)):
78
+ raise TypeError("`field` must be a string column name.")
79
+ if not isinstance(by, (str, NoneType)):
80
+ raise TypeError("`by` must be a string column name.")
81
+ if not isinstance(burn, (int, float, NoneType)):
82
+ raise TypeError("`burn` must be an integer or float.")
83
+ if not isinstance(fun, str):
84
+ raise TypeError("`pixel_fn` must be one of sum, first, last, min, max, count, or any.")
85
+ if not isinstance(background, (int, float, NoneType)):
86
+ raise TypeError("`background` must be integer, float, or None.")
87
+ if not isinstance(encoding, str):
88
+ raise TypeError("`encoding` must be one of 'dense' or 'sparse'.")
89
+ if not isinstance(dtype, str):
90
+ raise TypeError(
91
+ "`dtype` must be a one of 'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64', 'float32', 'float64'"
92
+ )
93
+
94
+ # value checks and defaults
95
+ if field and burn:
96
+ raise ValueError("Only one of `field` or `burn` can be specified.")
97
+ if encoding not in ["dense", "sparse"]:
98
+ raise ValueError("`encoding` must be one of `dense` or `sparse`.")
99
+ if like is not None:
100
+ if any((res, out_shape, extent)):
101
+ raise ValueError("`like` is mutually exclusive with `res`, `out_shape`, and `extent`.")
102
+ else:
103
+ affine = like.rio.transform()
104
+ _res = (affine.a, abs(affine.e))
105
+ _shape = like.squeeze().shape
106
+ _bounds, _has_extent = like.rio.bounds(), True
107
+ else:
108
+ if not res and not out_shape and not extent:
109
+ raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
110
+ if extent and not res and not out_shape:
111
+ raise ValueError("Must also specify `res` or `out_shape` with extent.")
112
+ if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
113
+ raise ValueError("`res` must be 2 positive numbers.")
114
+ if out_shape and (
115
+ len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)
116
+ ):
117
+ raise ValueError("`out_shape` must be 2 positive integers.")
118
+ if extent and len(extent) != 4:
119
+ raise ValueError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
120
+
121
+ # defaults
122
+ _res = res if res else (0, 0)
123
+ _shape = out_shape if out_shape else (0, 0)
124
+ (_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
125
+
126
+ # RasterInfo
127
+ raster_info = {
128
+ "nrows": _shape[0],
129
+ "ncols": _shape[1],
130
+ "xmin": _bounds[0],
131
+ "ymin": _bounds[1],
132
+ "xmax": _bounds[2],
133
+ "ymax": _bounds[3],
134
+ "xres": _res[0],
135
+ "yres": _res[1],
136
+ "has_extent": _has_extent,
137
+ "epsg": gdf.crs.to_epsg(),
138
+ }
139
+
140
+ # extract columns of interest and convert to polars
141
+ cols = list(set([col for col in (field, by) if col]))
142
+ try:
143
+ df = pl.from_pandas(gdf[cols]) if cols else None
144
+ except KeyError as e:
145
+ raise KeyError("Column not found in GeoDataFrame.") from e
146
+
147
+ return _rusterize(gdf.geometry, raster_info, fun, df, field, by, burn, background, encoding, dtype)
rusterize/py.typed ADDED
File without changes
Binary file
@@ -0,0 +1,56 @@
1
+ from typing import List, Tuple
2
+
3
+ import numpy as np
4
+ from geopandas import GeoDataFrame
5
+ from polars import DataFrame
6
+ from xarray import DataArray, Dataset
7
+
8
+ def rusterize(
9
+ gdf: GeoDataFrame,
10
+ like: DataArray | Dataset | None = None,
11
+ res: Tuple | List | None = None,
12
+ out_shape: Tuple | List | None = None,
13
+ extent: Tuple | List | None = None,
14
+ field: str | None = None,
15
+ by: str | None = None,
16
+ burn: int | float | None = None,
17
+ fun: str = "last",
18
+ background: int | float | None = np.nan,
19
+ encoding: str = "dense",
20
+ dtype: str = "float64",
21
+ ) -> DataArray | SparseArray:
22
+ """
23
+ Fast geopandas rasterization into xarray.DataArray
24
+
25
+ Args:
26
+ :param gdf: geopandas dataframe to rasterize.
27
+ :param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
28
+ :param res: (xres, yres) for rasterized data.
29
+ :param out_shape: (nrows, ncols) for regularized output shape.
30
+ :param extent: (xmin, xmax, ymin, ymax) for regularized extent.
31
+ :param field: field to rasterize, mutually exclusive with `burn`. Default is None.
32
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
33
+ :param burn: burn a value onto the raster, mutually exclusive with `field`. Default is None.
34
+ :param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
35
+ :param background: background value in final raster. Default is np.nan.
36
+ :param encoding: return a dense array (burned geometries onto a raster) or a sparse array in COOrdinate format (coordinates and values of the rasterized geometries). Default is `dense`.
37
+ :param dtype: specify the output dtype. Default is `float64`.
38
+
39
+ Returns:
40
+ Rasterized xarray.DataArray in dense or COO sparse format.
41
+
42
+ Notes:
43
+ When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
44
+ If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
45
+ Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
46
+ The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
47
+
48
+ If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
49
+
50
+ A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
51
+ that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
52
+ """
53
+
54
+ class SparseArray:
55
+ def to_xarray(self) -> DataArray: ...
56
+ def to_frame(self) -> DataFrame: ...
@@ -0,0 +1,313 @@
1
+ Metadata-Version: 2.4
2
+ Name: rusterize
3
+ Version: 0.5.0
4
+ Classifier: License :: OSI Approved :: MIT License
5
+ Classifier: Operating System :: OS Independent
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Programming Language :: Python :: Implementation :: CPython
8
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
9
+ Requires-Dist: geopandas>=1.0.1
10
+ Requires-Dist: pandas>=2.2.3
11
+ Requires-Dist: pyarrow>=18.1.0
12
+ Requires-Dist: polars>=1.19.0
13
+ Requires-Dist: xarray>=2025.1.1
14
+ Requires-Dist: rioxarray>=0.18.2
15
+ License-File: LICENSE
16
+ Summary: High performance rasterization tool for Python built in Rust
17
+ Keywords: rust,fast,raster,geometry,geopandas,xarray
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
+ Project-URL: repository, https://github.com/ttrotto/rusterize
21
+
22
+ # rusterize
23
+
24
+ High performance rasterization tool for Python built in Rust. This
25
+ repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
+ for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements (see [API](#API)).
27
+
28
+ **rusterize** is designed to work on _(multi)polygons_ and _(multi)linestrings_, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/) or a sparse array in COOrdinate format.
29
+
30
+ # Installation
31
+
32
+ Install the current version with pip:
33
+
34
+ ```shell
35
+ pip install rusterize
36
+ ```
37
+
38
+ # Contributing
39
+
40
+ Any contribution is welcome! You can install **rusterize** directly
41
+ from this repo using [maturin](https://www.maturin.rs/) as an editable
42
+ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
43
+ [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
44
+ installed.
45
+
46
+ ```shell
47
+ # Clone repo
48
+ git clone https://github.com/<username>/rusterize.git
49
+ cd rusterize
50
+
51
+ # Install the Rust nightly toolchain
52
+ rustup toolchain install nightly-2025-07-31
53
+
54
+ # Install maturin
55
+ pip install maturin
56
+
57
+ # Install editable version with optmized code
58
+ maturin develop --profile dist-release
59
+ ```
60
+
61
+ # API
62
+
63
+ This package has a simple API:
64
+
65
+ ```python
66
+ from rusterize import rusterize
67
+
68
+ # gdf = <import/modify dataframe as needed>
69
+
70
+ # rusterize
71
+ rusterize(
72
+ gdf,
73
+ like=None,
74
+ res=(30, 30),
75
+ out_shape=(10, 10),
76
+ extent=(0, 10, 10, 20),
77
+ field="field",
78
+ by="by",
79
+ burn=None,
80
+ fun="sum",
81
+ background=0,
82
+ encoding="dense",
83
+ dtype="uint8"
84
+ )
85
+ ```
86
+
87
+ - `gdf`: geopandas dataframe to rasterize
88
+ - `like`: xr.DataArray to use as template for `res`, `out_shape`, and `extent`. Mutually exclusive with these parameters (default: `None`)
89
+ - `res`: (xres, yres) for desired resolution (default: `None`)
90
+ - `out_shape`: (nrows, ncols) for desired output shape (default: `None`)
91
+ - `extent`: (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
92
+ - `field`: column to rasterize. Mutually exclusive with `burn`. (default: `None` -> a value of `1` is rasterized)
93
+ - `by`: column for grouping. Assign each group to a band in the stack. Values are taken from `field` if specified, else `burn` is rasterized. (default: `None` -> singleband raster)
94
+ - `burn`: a single value to burn. Mutually exclusive with `field`. (default: `None`). If no field is found in `gdf` or if `field` is `None`, then `burn=1`
95
+ - `fun`: pixel function to use when multiple values overlap. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. (default: `last`)
96
+ - `background`: background value in final raster. (default: `np.nan`). A `None` value corresponds to the default of the specified dtype. An illegal value for a dtype will be replaced with the default of that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
97
+ - `encoding`: defines the output format of the rasterization. This is either a dense xarray representing the burned rasterized geometries, or a sparse array in COOrdinate format good for sparse observations and low memory consumption.
98
+ - `dtype`: dtype of the final raster. Possible values are `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, `float32`, `float64` (default: `float64`)
99
+
100
+ Note that control over the desired extent is not as strict as for resolution and shape. That is,
101
+ when resolution, output shape, and extent are specified, priority is given to resolution and shape.
102
+ So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
103
+ from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
104
+ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
105
+
106
+ # Encoding
107
+
108
+ Version 0.5.0 introduces a new `encoding` parameter to control the output format of the rasterization. This means that you can return a xarray with the burned rasterized geometries, or a new structure `SparseArray`. This `SparseArray` structure stores the band/row/column triplets of where the geometries should be burned onto the final raster, as well as their corresponding values before applying any pixel function. This can be used as an intermediate output to avoid allocating memory before materializing the final raster, or as a final product. `SparseArray` has two convenience functions: `to_xarray()` and `to_frame()`. The first returns the final xarray, the second produces a polars dataframe with only the coordinates and values of the rasterized geometries. Note that `SparseArray` avoids allocating memory for the array during rasterization until when it's actually needed (calling `to_xarray()`). See below for an example.
109
+
110
+ # Usage
111
+
112
+ **rusterize** consists of a single function `rusterize()`.
113
+
114
+ ```python
115
+ from rusterize import rusterize
116
+ import geopandas as gpd
117
+ from shapely import wkt
118
+ import matplotlib.pyplot as plt
119
+
120
+ # Construct geometries
121
+ geoms = [
122
+ "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
123
+ "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
124
+ "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
125
+ "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))",
126
+ "GEOMETRYCOLLECTION (POINT (50 -40), POLYGON ((75 -40, 75 -30, 100 -30, 100 -40, 75 -40)), LINESTRING (60 -40, 80 0), GEOMETRYCOLLECTION (POLYGON ((100 20, 100 30, 110 30, 110 20, 100 20))))"
127
+ ]
128
+
129
+ # Convert WKT strings to Shapely geometries
130
+ geometries = [wkt.loads(geom) for geom in geoms]
131
+
132
+ # Create a GeoDataFrame
133
+ gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
134
+
135
+ # rusterize to "dense" -> return a xarray with the burned geometries (default)
136
+ output = rusterize(
137
+ gdf,
138
+ res=(1, 1),
139
+ field="value",
140
+ fun="sum",
141
+ ).squeeze()
142
+
143
+ # plot it
144
+ fig, ax = plt.subplots(figsize=(12, 6))
145
+ output.plot.imshow(ax=ax)
146
+ plt.show()
147
+
148
+ # rusterize to "sparse" -> custom structure storing the coordinates and values of the rasterized geometries
149
+ output = rusterize(
150
+ gdf,
151
+ res=(1, 1),
152
+ field="value",
153
+ fun="sum",
154
+ encoding="sparse"
155
+ )
156
+ output
157
+ # SparseArray:
158
+ # - Shape: (131, 361)
159
+ # - Extent: (-180.5, -70.5, 180.5, 60.5)
160
+ # - Resolution: (1.0, 1.0)
161
+ # - EPSG: 32619
162
+ # - Estimated size: 369.46 KB
163
+
164
+ # materialize into xarray
165
+ array = output.to_xarray()
166
+
167
+ # get only coordinates and values
168
+ coo = output.to_frame()
169
+ # shape: (29_340, 3)
170
+ # ┌─────┬─────┬──────┐
171
+ # │ row ┆ col ┆ data │
172
+ # │ --- ┆ --- ┆ --- │
173
+ # │ u32 ┆ u32 ┆ f64 │
174
+ # ╞═════╪═════╪══════╡
175
+ # │ 6 ┆ 40 ┆ 1.0 │
176
+ # │ 6 ┆ 41 ┆ 1.0 │
177
+ # │ 6 ┆ 42 ┆ 1.0 │
178
+ # │ 7 ┆ 39 ┆ 1.0 │
179
+ # │ 7 ┆ 40 ┆ 1.0 │
180
+ # │ … ┆ … ┆ … │
181
+ # │ 64 ┆ 258 ┆ 1.0 │
182
+ # │ 63 ┆ 259 ┆ 1.0 │
183
+ # │ 62 ┆ 259 ┆ 1.0 │
184
+ # │ 61 ┆ 260 ┆ 1.0 │
185
+ # │ 60 ┆ 260 ┆ 1.0 │
186
+ # └─────┴─────┴──────┘
187
+ ```
188
+
189
+ ![](img/plot.png)
190
+
191
+ # Benchmarks
192
+
193
+ **rusterize** is fast! Let’s try it on small and large datasets.
194
+
195
+ ```python
196
+ from rusterize import rusterize
197
+ import geopandas as gpd
198
+ import requests
199
+ import zipfile
200
+ from io import BytesIO
201
+
202
+ # large dataset (~380 MB)
203
+ url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
204
+ response = requests.get(url)
205
+
206
+ # unzip
207
+ with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
208
+ zip_ref.extractall()
209
+
210
+ # read
211
+ gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
212
+
213
+ # small dataset (first 1000 rows)
214
+ gdf_small = gdf_large.iloc[:1000, :]
215
+
216
+ # rusterize at 1/6 degree resolution
217
+ def test_large(benchmark):
218
+ benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
219
+
220
+ def test_small(benchmark):
221
+ benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
222
+ ```
223
+
224
+ Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
225
+
226
+ ```
227
+ pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
228
+
229
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
230
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
231
+ -------------------------------------------------------------------------------------------------------------
232
+ rusterize_small 0.0791 0.0899 0.0812 0.0027 0.0803 0.0020 2;2 12.3214 20 1
233
+ rusterize_large 1.379545 1.4474 1.4006 0.0178 1.3966 0.0214 5;1 0.7140 20 1
234
+ -------------------------------------------------------------------------------------------------------------
235
+ ```
236
+
237
+ And fasterize:
238
+
239
+ ```r
240
+ library(sf)
241
+ library(raster)
242
+ library(fasterize)
243
+ library(microbenchmark)
244
+
245
+ large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
246
+ small <- large[1:1000, ]
247
+ fn <- function(v) {
248
+ r <- raster(v, res = 1/6)
249
+ return(fasterize(v, r, fun = "sum"))
250
+ }
251
+ microbenchmark(
252
+ fasterize_large = f <- fn(large),
253
+ fasterize_small = f <- fn(small),
254
+ times=20L,
255
+ unit='s'
256
+ )
257
+ ```
258
+
259
+ ```
260
+ Unit: seconds
261
+ expr min lq mean median uq max neval
262
+ fasterize_small 0.4741043 0.4926114 0.5191707 0.5193289 0.536741 0.5859029 20
263
+ fasterize_large 9.2199426 10.3595465 10.6653139 10.5369429 11.025771 11.7944567 20
264
+ ```
265
+
266
+ And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value, pixel function `any`, and `dense` encoding. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
267
+
268
+ ```
269
+ # rusterize
270
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
271
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
272
+ -------------------------------------------------------------------------------------------------------------
273
+ rusterize 5.9331 7.2308 6.1302 0.3183 5.9903 0.1736 2;4 0.1631 20 1
274
+ -------------------------------------------------------------------------------------------------------------
275
+
276
+ # fasterize
277
+ Unit: seconds
278
+ expr min lq mean median uq max neval
279
+ fasterize 157.4734 177.2055 194.3222 194.6455 213.9195 230.6504 20
280
+
281
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
282
+ Time (mean ± σ): 5.495 s ± 0.038 s [User: 4.268 s, System: 1.225 s]
283
+ Range (min … max): 5.452 s … 5.623 s 20 runs
284
+ ```
285
+
286
+ In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
287
+
288
+ ```
289
+ # rusterize
290
+ --------------------------------------------- benchmark: 1 tests --------------------------------------------
291
+ Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
292
+ -------------------------------------------------------------------------------------------------------------
293
+ test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;2 0.2120 20 1
294
+ -------------------------------------------------------------------------------------------------------------
295
+
296
+ # gdal_rasterize (CLI) - read from fast drive, write to fast drive
297
+ Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
298
+ Range (min … max): 8.658 s … 8.874 s 20 runs
299
+ ```
300
+
301
+ # Comparison with other tools
302
+
303
+ While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type.
304
+
305
+ The following is a time comparison on a single run on the same forest stands dataset used earlier.
306
+
307
+ ```
308
+ rusterize: 5.9 sec
309
+ rasterio: 68 sec (but no spatial information)
310
+ fasterize: 157 sec (including raster creation)
311
+ geocube: 260 sec (larger memory footprint)
312
+ ```
313
+
@@ -0,0 +1,8 @@
1
+ rusterize-0.5.0.dist-info/METADATA,sha256=iYMUElyGvkPsYMNIWWJQvYOOsptwlcEZdrH6Fh-oLjY,13923
2
+ rusterize-0.5.0.dist-info/WHEEL,sha256=sgpeF0IXz3jzHg627c97rIV0EvAHwTqdmK1eoh6Pt68,96
3
+ rusterize-0.5.0.dist-info/licenses/LICENSE,sha256=FXkix0amECHul0Y2qWBXnEGNV2fd8GuVCIZuuzQwR-c,1130
4
+ rusterize/__init__.py,sha256=SvjfN61C_kO_4ZLoj3kRzDgJovTbkMBaRLkzsund8Qs,7157
5
+ rusterize/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ rusterize/rusterize.pyd,sha256=AWn4MdYY-aaGPgdGIwUnzEwKF1MATZr3tLU_Hdz6Y-Q,51045888
7
+ rusterize/rusterize.pyi,sha256=avY2KGWRXxBP09z__M0ZVsaAVtEFuLo3gGVgtVD5W-Q,2957
8
+ rusterize-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.10.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-abi3-win_amd64
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tommaso Trotto
4
+ Copyright (c) 2017 EcoHealth Alliance
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.