rusterize 0.5.0__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rusterize/__init__.py +147 -0
- rusterize/py.typed +0 -0
- rusterize/rusterize.pyd +0 -0
- rusterize/rusterize.pyi +56 -0
- rusterize-0.5.0.dist-info/METADATA +313 -0
- rusterize-0.5.0.dist-info/RECORD +8 -0
- rusterize-0.5.0.dist-info/WHEEL +4 -0
- rusterize-0.5.0.dist-info/licenses/LICENSE +23 -0
rusterize/__init__.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib.metadata
|
|
4
|
+
from types import NoneType
|
|
5
|
+
from typing import TYPE_CHECKING, List, Tuple
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import polars as pl
|
|
9
|
+
from geopandas import GeoDataFrame
|
|
10
|
+
from xarray import DataArray, Dataset
|
|
11
|
+
|
|
12
|
+
# if TYPE_CHECKING:
|
|
13
|
+
from .rusterize import _rusterize
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from .rusterize import SparseArray
|
|
17
|
+
|
|
18
|
+
__version__ = importlib.metadata.version("rusterize")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def rusterize(
|
|
22
|
+
gdf: GeoDataFrame,
|
|
23
|
+
like: DataArray | Dataset | None = None,
|
|
24
|
+
res: Tuple | List | None = None,
|
|
25
|
+
out_shape: Tuple | List | None = None,
|
|
26
|
+
extent: Tuple | List | None = None,
|
|
27
|
+
field: str | None = None,
|
|
28
|
+
by: str | None = None,
|
|
29
|
+
burn: int | float | None = None,
|
|
30
|
+
fun: str = "last",
|
|
31
|
+
background: int | float | None = np.nan,
|
|
32
|
+
encoding: str = "dense",
|
|
33
|
+
dtype: str = "float64",
|
|
34
|
+
) -> DataArray | SparseArray:
|
|
35
|
+
"""
|
|
36
|
+
Fast geopandas rasterization into xarray.DataArray
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
:param gdf: geopandas dataframe to rasterize.
|
|
40
|
+
:param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
|
|
41
|
+
:param res: (xres, yres) for rasterized data.
|
|
42
|
+
:param out_shape: (nrows, ncols) for regularized output shape.
|
|
43
|
+
:param extent: (xmin, xmax, ymin, ymax) for regularized extent.
|
|
44
|
+
:param field: field to rasterize, mutually exclusive with `burn`. Default is None.
|
|
45
|
+
:param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
|
|
46
|
+
:param burn: burn a value onto the raster, mutually exclusive with `field`. Default is None.
|
|
47
|
+
:param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
|
|
48
|
+
:param background: background value in final raster. Default is np.nan.
|
|
49
|
+
:param encoding: return a dense array (burned geometries onto a raster) or a sparse array in COOrdinate format (coordinates and values of the rasterized geometries). Default is `dense`.
|
|
50
|
+
:param dtype: specify the output dtype. Default is `float64`.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Rasterized xarray.DataArray in dense or COO sparse format.
|
|
54
|
+
|
|
55
|
+
Notes:
|
|
56
|
+
When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
|
|
57
|
+
If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
|
|
58
|
+
Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
|
|
59
|
+
The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
|
|
60
|
+
|
|
61
|
+
If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
|
|
62
|
+
|
|
63
|
+
A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
|
|
64
|
+
that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
|
|
65
|
+
"""
|
|
66
|
+
# type checks
|
|
67
|
+
if not isinstance(gdf, GeoDataFrame):
|
|
68
|
+
raise TypeError("`gdf` must be a geopandas dataframe.")
|
|
69
|
+
if not isinstance(like, (DataArray, Dataset, NoneType)):
|
|
70
|
+
raise TypeError("`like' must be a xarray.DataArray or xarray.Dataset")
|
|
71
|
+
if not isinstance(res, (tuple, list, NoneType)):
|
|
72
|
+
raise TypeError("`resolution` must be a tuple or list of (x, y).")
|
|
73
|
+
if not isinstance(out_shape, (tuple, list, NoneType)):
|
|
74
|
+
raise TypeError("`out_shape` must be a tuple or list of (nrows, ncols).")
|
|
75
|
+
if not isinstance(extent, (tuple, list, NoneType)):
|
|
76
|
+
raise TypeError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
|
|
77
|
+
if not isinstance(field, (str, NoneType)):
|
|
78
|
+
raise TypeError("`field` must be a string column name.")
|
|
79
|
+
if not isinstance(by, (str, NoneType)):
|
|
80
|
+
raise TypeError("`by` must be a string column name.")
|
|
81
|
+
if not isinstance(burn, (int, float, NoneType)):
|
|
82
|
+
raise TypeError("`burn` must be an integer or float.")
|
|
83
|
+
if not isinstance(fun, str):
|
|
84
|
+
raise TypeError("`pixel_fn` must be one of sum, first, last, min, max, count, or any.")
|
|
85
|
+
if not isinstance(background, (int, float, NoneType)):
|
|
86
|
+
raise TypeError("`background` must be integer, float, or None.")
|
|
87
|
+
if not isinstance(encoding, str):
|
|
88
|
+
raise TypeError("`encoding` must be one of 'dense' or 'sparse'.")
|
|
89
|
+
if not isinstance(dtype, str):
|
|
90
|
+
raise TypeError(
|
|
91
|
+
"`dtype` must be a one of 'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64', 'float32', 'float64'"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# value checks and defaults
|
|
95
|
+
if field and burn:
|
|
96
|
+
raise ValueError("Only one of `field` or `burn` can be specified.")
|
|
97
|
+
if encoding not in ["dense", "sparse"]:
|
|
98
|
+
raise ValueError("`encoding` must be one of `dense` or `sparse`.")
|
|
99
|
+
if like is not None:
|
|
100
|
+
if any((res, out_shape, extent)):
|
|
101
|
+
raise ValueError("`like` is mutually exclusive with `res`, `out_shape`, and `extent`.")
|
|
102
|
+
else:
|
|
103
|
+
affine = like.rio.transform()
|
|
104
|
+
_res = (affine.a, abs(affine.e))
|
|
105
|
+
_shape = like.squeeze().shape
|
|
106
|
+
_bounds, _has_extent = like.rio.bounds(), True
|
|
107
|
+
else:
|
|
108
|
+
if not res and not out_shape and not extent:
|
|
109
|
+
raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
|
|
110
|
+
if extent and not res and not out_shape:
|
|
111
|
+
raise ValueError("Must also specify `res` or `out_shape` with extent.")
|
|
112
|
+
if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
|
|
113
|
+
raise ValueError("`res` must be 2 positive numbers.")
|
|
114
|
+
if out_shape and (
|
|
115
|
+
len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)
|
|
116
|
+
):
|
|
117
|
+
raise ValueError("`out_shape` must be 2 positive integers.")
|
|
118
|
+
if extent and len(extent) != 4:
|
|
119
|
+
raise ValueError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).")
|
|
120
|
+
|
|
121
|
+
# defaults
|
|
122
|
+
_res = res if res else (0, 0)
|
|
123
|
+
_shape = out_shape if out_shape else (0, 0)
|
|
124
|
+
(_bounds, _has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
|
|
125
|
+
|
|
126
|
+
# RasterInfo
|
|
127
|
+
raster_info = {
|
|
128
|
+
"nrows": _shape[0],
|
|
129
|
+
"ncols": _shape[1],
|
|
130
|
+
"xmin": _bounds[0],
|
|
131
|
+
"ymin": _bounds[1],
|
|
132
|
+
"xmax": _bounds[2],
|
|
133
|
+
"ymax": _bounds[3],
|
|
134
|
+
"xres": _res[0],
|
|
135
|
+
"yres": _res[1],
|
|
136
|
+
"has_extent": _has_extent,
|
|
137
|
+
"epsg": gdf.crs.to_epsg(),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# extract columns of interest and convert to polars
|
|
141
|
+
cols = list(set([col for col in (field, by) if col]))
|
|
142
|
+
try:
|
|
143
|
+
df = pl.from_pandas(gdf[cols]) if cols else None
|
|
144
|
+
except KeyError as e:
|
|
145
|
+
raise KeyError("Column not found in GeoDataFrame.") from e
|
|
146
|
+
|
|
147
|
+
return _rusterize(gdf.geometry, raster_info, fun, df, field, by, burn, background, encoding, dtype)
|
rusterize/py.typed
ADDED
|
File without changes
|
rusterize/rusterize.pyd
ADDED
|
Binary file
|
rusterize/rusterize.pyi
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import List, Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from geopandas import GeoDataFrame
|
|
5
|
+
from polars import DataFrame
|
|
6
|
+
from xarray import DataArray, Dataset
|
|
7
|
+
|
|
8
|
+
def rusterize(
|
|
9
|
+
gdf: GeoDataFrame,
|
|
10
|
+
like: DataArray | Dataset | None = None,
|
|
11
|
+
res: Tuple | List | None = None,
|
|
12
|
+
out_shape: Tuple | List | None = None,
|
|
13
|
+
extent: Tuple | List | None = None,
|
|
14
|
+
field: str | None = None,
|
|
15
|
+
by: str | None = None,
|
|
16
|
+
burn: int | float | None = None,
|
|
17
|
+
fun: str = "last",
|
|
18
|
+
background: int | float | None = np.nan,
|
|
19
|
+
encoding: str = "dense",
|
|
20
|
+
dtype: str = "float64",
|
|
21
|
+
) -> DataArray | SparseArray:
|
|
22
|
+
"""
|
|
23
|
+
Fast geopandas rasterization into xarray.DataArray
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
:param gdf: geopandas dataframe to rasterize.
|
|
27
|
+
:param like: array to use as blueprint for spatial matching (resolution, shape, extent). Mutually exlusive with res, out_shape, and extent.
|
|
28
|
+
:param res: (xres, yres) for rasterized data.
|
|
29
|
+
:param out_shape: (nrows, ncols) for regularized output shape.
|
|
30
|
+
:param extent: (xmin, xmax, ymin, ymax) for regularized extent.
|
|
31
|
+
:param field: field to rasterize, mutually exclusive with `burn`. Default is None.
|
|
32
|
+
:param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
|
|
33
|
+
:param burn: burn a value onto the raster, mutually exclusive with `field`. Default is None.
|
|
34
|
+
:param fun: pixel function to use. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. Default is `last`.
|
|
35
|
+
:param background: background value in final raster. Default is np.nan.
|
|
36
|
+
:param encoding: return a dense array (burned geometries onto a raster) or a sparse array in COOrdinate format (coordinates and values of the rasterized geometries). Default is `dense`.
|
|
37
|
+
:param dtype: specify the output dtype. Default is `float64`.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Rasterized xarray.DataArray in dense or COO sparse format.
|
|
41
|
+
|
|
42
|
+
Notes:
|
|
43
|
+
When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
|
|
44
|
+
If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray.
|
|
45
|
+
Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
|
|
46
|
+
The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
|
|
47
|
+
|
|
48
|
+
If `field` is not in `gdf`, then a default `burn` value of 1 is rasterized.
|
|
49
|
+
|
|
50
|
+
A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of
|
|
51
|
+
that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class SparseArray:
|
|
55
|
+
def to_xarray(self) -> DataArray: ...
|
|
56
|
+
def to_frame(self) -> DataFrame: ...
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rusterize
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
5
|
+
Classifier: Operating System :: OS Independent
|
|
6
|
+
Classifier: Programming Language :: Rust
|
|
7
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
8
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
9
|
+
Requires-Dist: geopandas>=1.0.1
|
|
10
|
+
Requires-Dist: pandas>=2.2.3
|
|
11
|
+
Requires-Dist: pyarrow>=18.1.0
|
|
12
|
+
Requires-Dist: polars>=1.19.0
|
|
13
|
+
Requires-Dist: xarray>=2025.1.1
|
|
14
|
+
Requires-Dist: rioxarray>=0.18.2
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Summary: High performance rasterization tool for Python built in Rust
|
|
17
|
+
Keywords: rust,fast,raster,geometry,geopandas,xarray
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
20
|
+
Project-URL: repository, https://github.com/ttrotto/rusterize
|
|
21
|
+
|
|
22
|
+
# rusterize
|
|
23
|
+
|
|
24
|
+
High performance rasterization tool for Python built in Rust. This
|
|
25
|
+
repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
|
|
26
|
+
for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements (see [API](#API)).
|
|
27
|
+
|
|
28
|
+
**rusterize** is designed to work on _(multi)polygons_ and _(multi)linestrings_, even when they are nested inside complex geometry collections. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/) or a sparse array in COOrdinate format.
|
|
29
|
+
|
|
30
|
+
# Installation
|
|
31
|
+
|
|
32
|
+
Install the current version with pip:
|
|
33
|
+
|
|
34
|
+
```shell
|
|
35
|
+
pip install rusterize
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
# Contributing
|
|
39
|
+
|
|
40
|
+
Any contribution is welcome! You can install **rusterize** directly
|
|
41
|
+
from this repo using [maturin](https://www.maturin.rs/) as an editable
|
|
42
|
+
package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and
|
|
43
|
+
[cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
|
|
44
|
+
installed.
|
|
45
|
+
|
|
46
|
+
```shell
|
|
47
|
+
# Clone repo
|
|
48
|
+
git clone https://github.com/<username>/rusterize.git
|
|
49
|
+
cd rusterize
|
|
50
|
+
|
|
51
|
+
# Install the Rust nightly toolchain
|
|
52
|
+
rustup toolchain install nightly-2025-07-31
|
|
53
|
+
|
|
54
|
+
# Install maturin
|
|
55
|
+
pip install maturin
|
|
56
|
+
|
|
57
|
+
# Install editable version with optmized code
|
|
58
|
+
maturin develop --profile dist-release
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
# API
|
|
62
|
+
|
|
63
|
+
This package has a simple API:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from rusterize import rusterize
|
|
67
|
+
|
|
68
|
+
# gdf = <import/modify dataframe as needed>
|
|
69
|
+
|
|
70
|
+
# rusterize
|
|
71
|
+
rusterize(
|
|
72
|
+
gdf,
|
|
73
|
+
like=None,
|
|
74
|
+
res=(30, 30),
|
|
75
|
+
out_shape=(10, 10),
|
|
76
|
+
extent=(0, 10, 10, 20),
|
|
77
|
+
field="field",
|
|
78
|
+
by="by",
|
|
79
|
+
burn=None,
|
|
80
|
+
fun="sum",
|
|
81
|
+
background=0,
|
|
82
|
+
encoding="dense",
|
|
83
|
+
dtype="uint8"
|
|
84
|
+
)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
- `gdf`: geopandas dataframe to rasterize
|
|
88
|
+
- `like`: xr.DataArray to use as template for `res`, `out_shape`, and `extent`. Mutually exclusive with these parameters (default: `None`)
|
|
89
|
+
- `res`: (xres, yres) for desired resolution (default: `None`)
|
|
90
|
+
- `out_shape`: (nrows, ncols) for desired output shape (default: `None`)
|
|
91
|
+
- `extent`: (xmin, ymin, xmax, ymax) for desired output extent (default: `None`)
|
|
92
|
+
- `field`: column to rasterize. Mutually exclusive with `burn`. (default: `None` -> a value of `1` is rasterized)
|
|
93
|
+
- `by`: column for grouping. Assign each group to a band in the stack. Values are taken from `field` if specified, else `burn` is rasterized. (default: `None` -> singleband raster)
|
|
94
|
+
- `burn`: a single value to burn. Mutually exclusive with `field`. (default: `None`). If no field is found in `gdf` or if `field` is `None`, then `burn=1`
|
|
95
|
+
- `fun`: pixel function to use when multiple values overlap. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. (default: `last`)
|
|
96
|
+
- `background`: background value in final raster. (default: `np.nan`). A `None` value corresponds to the default of the specified dtype. An illegal value for a dtype will be replaced with the default of that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`.
|
|
97
|
+
- `encoding`: defines the output format of the rasterization. This is either a dense xarray representing the burned rasterized geometries, or a sparse array in COOrdinate format good for sparse observations and low memory consumption.
|
|
98
|
+
- `dtype`: dtype of the final raster. Possible values are `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, `float32`, `float64` (default: `float64`)
|
|
99
|
+
|
|
100
|
+
Note that control over the desired extent is not as strict as for resolution and shape. That is,
|
|
101
|
+
when resolution, output shape, and extent are specified, priority is given to resolution and shape.
|
|
102
|
+
So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
|
|
103
|
+
from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
|
|
104
|
+
shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
|
|
105
|
+
|
|
106
|
+
# Encoding
|
|
107
|
+
|
|
108
|
+
Version 0.5.0 introduces a new `encoding` parameter to control the output format of the rasterization. This means that you can return a xarray with the burned rasterized geometries, or a new structure `SparseArray`. This `SparseArray` structure stores the band/row/column triplets of where the geometries should be burned onto the final raster, as well as their corresponding values before applying any pixel function. This can be used as an intermediate output to avoid allocating memory before materializing the final raster, or as a final product. `SparseArray` has two convenience functions: `to_xarray()` and `to_frame()`. The first returns the final xarray, the second produces a polars dataframe with only the coordinates and values of the rasterized geometries. Note that `SparseArray` avoids allocating memory for the array during rasterization until when it's actually needed (calling `to_xarray()`). See below for an example.
|
|
109
|
+
|
|
110
|
+
# Usage
|
|
111
|
+
|
|
112
|
+
**rusterize** consists of a single function `rusterize()`.
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from rusterize import rusterize
|
|
116
|
+
import geopandas as gpd
|
|
117
|
+
from shapely import wkt
|
|
118
|
+
import matplotlib.pyplot as plt
|
|
119
|
+
|
|
120
|
+
# Construct geometries
|
|
121
|
+
geoms = [
|
|
122
|
+
"POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
|
|
123
|
+
"POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
|
|
124
|
+
"POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
|
|
125
|
+
"MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))",
|
|
126
|
+
"GEOMETRYCOLLECTION (POINT (50 -40), POLYGON ((75 -40, 75 -30, 100 -30, 100 -40, 75 -40)), LINESTRING (60 -40, 80 0), GEOMETRYCOLLECTION (POLYGON ((100 20, 100 30, 110 30, 110 20, 100 20))))"
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
# Convert WKT strings to Shapely geometries
|
|
130
|
+
geometries = [wkt.loads(geom) for geom in geoms]
|
|
131
|
+
|
|
132
|
+
# Create a GeoDataFrame
|
|
133
|
+
gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
|
|
134
|
+
|
|
135
|
+
# rusterize to "dense" -> return a xarray with the burned geometries (default)
|
|
136
|
+
output = rusterize(
|
|
137
|
+
gdf,
|
|
138
|
+
res=(1, 1),
|
|
139
|
+
field="value",
|
|
140
|
+
fun="sum",
|
|
141
|
+
).squeeze()
|
|
142
|
+
|
|
143
|
+
# plot it
|
|
144
|
+
fig, ax = plt.subplots(figsize=(12, 6))
|
|
145
|
+
output.plot.imshow(ax=ax)
|
|
146
|
+
plt.show()
|
|
147
|
+
|
|
148
|
+
# rusterize to "sparse" -> custom structure storing the coordinates and values of the rasterized geometries
|
|
149
|
+
output = rusterize(
|
|
150
|
+
gdf,
|
|
151
|
+
res=(1, 1),
|
|
152
|
+
field="value",
|
|
153
|
+
fun="sum",
|
|
154
|
+
encoding="sparse"
|
|
155
|
+
)
|
|
156
|
+
output
|
|
157
|
+
# SparseArray:
|
|
158
|
+
# - Shape: (131, 361)
|
|
159
|
+
# - Extent: (-180.5, -70.5, 180.5, 60.5)
|
|
160
|
+
# - Resolution: (1.0, 1.0)
|
|
161
|
+
# - EPSG: 32619
|
|
162
|
+
# - Estimated size: 369.46 KB
|
|
163
|
+
|
|
164
|
+
# materialize into xarray
|
|
165
|
+
array = output.to_xarray()
|
|
166
|
+
|
|
167
|
+
# get only coordinates and values
|
|
168
|
+
coo = output.to_frame()
|
|
169
|
+
# shape: (29_340, 3)
|
|
170
|
+
# ┌─────┬─────┬──────┐
|
|
171
|
+
# │ row ┆ col ┆ data │
|
|
172
|
+
# │ --- ┆ --- ┆ --- │
|
|
173
|
+
# │ u32 ┆ u32 ┆ f64 │
|
|
174
|
+
# ╞═════╪═════╪══════╡
|
|
175
|
+
# │ 6 ┆ 40 ┆ 1.0 │
|
|
176
|
+
# │ 6 ┆ 41 ┆ 1.0 │
|
|
177
|
+
# │ 6 ┆ 42 ┆ 1.0 │
|
|
178
|
+
# │ 7 ┆ 39 ┆ 1.0 │
|
|
179
|
+
# │ 7 ┆ 40 ┆ 1.0 │
|
|
180
|
+
# │ … ┆ … ┆ … │
|
|
181
|
+
# │ 64 ┆ 258 ┆ 1.0 │
|
|
182
|
+
# │ 63 ┆ 259 ┆ 1.0 │
|
|
183
|
+
# │ 62 ┆ 259 ┆ 1.0 │
|
|
184
|
+
# │ 61 ┆ 260 ┆ 1.0 │
|
|
185
|
+
# │ 60 ┆ 260 ┆ 1.0 │
|
|
186
|
+
# └─────┴─────┴──────┘
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+

|
|
190
|
+
|
|
191
|
+
# Benchmarks
|
|
192
|
+
|
|
193
|
+
**rusterize** is fast! Let’s try it on small and large datasets.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from rusterize import rusterize
|
|
197
|
+
import geopandas as gpd
|
|
198
|
+
import requests
|
|
199
|
+
import zipfile
|
|
200
|
+
from io import BytesIO
|
|
201
|
+
|
|
202
|
+
# large dataset (~380 MB)
|
|
203
|
+
url = "https://s3.amazonaws.com/hp3-shapefiles/Mammals_Terrestrial.zip"
|
|
204
|
+
response = requests.get(url)
|
|
205
|
+
|
|
206
|
+
# unzip
|
|
207
|
+
with zipfile.ZipFile(BytesIO(response.content), 'r') as zip_ref:
|
|
208
|
+
zip_ref.extractall()
|
|
209
|
+
|
|
210
|
+
# read
|
|
211
|
+
gdf_large = gpd.read_file("Mammals_Terrestrial/Mammals_Terrestrial.shp")
|
|
212
|
+
|
|
213
|
+
# small dataset (first 1000 rows)
|
|
214
|
+
gdf_small = gdf_large.iloc[:1000, :]
|
|
215
|
+
|
|
216
|
+
# rusterize at 1/6 degree resolution
|
|
217
|
+
def test_large(benchmark):
|
|
218
|
+
benchmark(rusterize, gdf_large, res=(1/6, 1/6), fun="sum")
|
|
219
|
+
|
|
220
|
+
def test_small(benchmark):
|
|
221
|
+
benchmark(rusterize, gdf_small, res=(1/6, 1/6), fun="sum")
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Then you can run it with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
|
|
225
|
+
|
|
226
|
+
```
|
|
227
|
+
pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
|
|
228
|
+
|
|
229
|
+
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
230
|
+
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
231
|
+
-------------------------------------------------------------------------------------------------------------
|
|
232
|
+
rusterize_small 0.0791 0.0899 0.0812 0.0027 0.0803 0.0020 2;2 12.3214 20 1
|
|
233
|
+
rusterize_large 1.379545 1.4474 1.4006 0.0178 1.3966 0.0214 5;1 0.7140 20 1
|
|
234
|
+
-------------------------------------------------------------------------------------------------------------
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
And fasterize:
|
|
238
|
+
|
|
239
|
+
```r
|
|
240
|
+
library(sf)
|
|
241
|
+
library(raster)
|
|
242
|
+
library(fasterize)
|
|
243
|
+
library(microbenchmark)
|
|
244
|
+
|
|
245
|
+
large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
|
|
246
|
+
small <- large[1:1000, ]
|
|
247
|
+
fn <- function(v) {
|
|
248
|
+
r <- raster(v, res = 1/6)
|
|
249
|
+
return(fasterize(v, r, fun = "sum"))
|
|
250
|
+
}
|
|
251
|
+
microbenchmark(
|
|
252
|
+
fasterize_large = f <- fn(large),
|
|
253
|
+
fasterize_small = f <- fn(small),
|
|
254
|
+
times=20L,
|
|
255
|
+
unit='s'
|
|
256
|
+
)
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
Unit: seconds
|
|
261
|
+
expr min lq mean median uq max neval
|
|
262
|
+
fasterize_small 0.4741043 0.4926114 0.5191707 0.5193289 0.536741 0.5859029 20
|
|
263
|
+
fasterize_large 9.2199426 10.3595465 10.6653139 10.5369429 11.025771 11.7944567 20
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
And on an even larger datasets? Here we use a layer from the province of Quebec, Canada representing ~2M polygons of forest stands, rasterized at 30 meters (20 rounds) with no field value, pixel function `any`, and `dense` encoding. The comparison with `gdal_rasterize` was run with `hyperfine --runs 20 "gdal_rasterize -tr 30 30 -burn 1 <data_in> <data_out>"`.
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
# rusterize
|
|
270
|
+
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
271
|
+
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
272
|
+
-------------------------------------------------------------------------------------------------------------
|
|
273
|
+
rusterize 5.9331 7.2308 6.1302 0.3183 5.9903 0.1736 2;4 0.1631 20 1
|
|
274
|
+
-------------------------------------------------------------------------------------------------------------
|
|
275
|
+
|
|
276
|
+
# fasterize
|
|
277
|
+
Unit: seconds
|
|
278
|
+
expr min lq mean median uq max neval
|
|
279
|
+
fasterize 157.4734 177.2055 194.3222 194.6455 213.9195 230.6504 20
|
|
280
|
+
|
|
281
|
+
# gdal_rasterize (CLI) - read from fast drive, write to fast drive
|
|
282
|
+
Time (mean ± σ): 5.495 s ± 0.038 s [User: 4.268 s, System: 1.225 s]
|
|
283
|
+
Range (min … max): 5.452 s … 5.623 s 20 runs
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing a subset of the road network for a total of ~535K multilinestrings.
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
# rusterize
|
|
290
|
+
--------------------------------------------- benchmark: 1 tests --------------------------------------------
|
|
291
|
+
Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
|
292
|
+
-------------------------------------------------------------------------------------------------------------
|
|
293
|
+
test 4.5272 5.9488 4.7171 0.3236 4.6360 0.1680 2;2 0.2120 20 1
|
|
294
|
+
-------------------------------------------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
# gdal_rasterize (CLI) - read from fast drive, write to fast drive
|
|
297
|
+
Time (mean ± σ): 8.719 s ± 0.063 s [User: 3.782 s, System: 4.917 s]
|
|
298
|
+
Range (min … max): 8.658 s … 8.874 s 20 runs
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
# Comparison with other tools
|
|
302
|
+
|
|
303
|
+
While **rusterize** is fast, there are other fast alternatives out there, including `GDAL`, `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type.
|
|
304
|
+
|
|
305
|
+
The following is a time comparison on a single run on the same forest stands dataset used earlier.
|
|
306
|
+
|
|
307
|
+
```
|
|
308
|
+
rusterize: 5.9 sec
|
|
309
|
+
rasterio: 68 sec (but no spatial information)
|
|
310
|
+
fasterize: 157 sec (including raster creation)
|
|
311
|
+
geocube: 260 sec (larger memory footprint)
|
|
312
|
+
```
|
|
313
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
rusterize-0.5.0.dist-info/METADATA,sha256=iYMUElyGvkPsYMNIWWJQvYOOsptwlcEZdrH6Fh-oLjY,13923
|
|
2
|
+
rusterize-0.5.0.dist-info/WHEEL,sha256=sgpeF0IXz3jzHg627c97rIV0EvAHwTqdmK1eoh6Pt68,96
|
|
3
|
+
rusterize-0.5.0.dist-info/licenses/LICENSE,sha256=FXkix0amECHul0Y2qWBXnEGNV2fd8GuVCIZuuzQwR-c,1130
|
|
4
|
+
rusterize/__init__.py,sha256=SvjfN61C_kO_4ZLoj3kRzDgJovTbkMBaRLkzsund8Qs,7157
|
|
5
|
+
rusterize/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
rusterize/rusterize.pyd,sha256=AWn4MdYY-aaGPgdGIwUnzEwKF1MATZr3tLU_Hdz6Y-Q,51045888
|
|
7
|
+
rusterize/rusterize.pyi,sha256=avY2KGWRXxBP09z__M0ZVsaAVtEFuLo3gGVgtVD5W-Q,2957
|
|
8
|
+
rusterize-0.5.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tommaso Trotto
|
|
4
|
+
Copyright (c) 2017 EcoHealth Alliance
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
7
|
+
a copy of this software and associated documentation files (the
|
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
12
|
+
the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be
|
|
15
|
+
included in all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
21
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
22
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
23
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|