rusterize 0.1.0__cp310-abi3-macosx_10_12_x86_64.whl → 0.2.0__cp310-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rusterize might be problematic. Click here for more details.

rusterize/core.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from types import NoneType
3
4
  from typing import Any, Dict, Optional, Tuple, Union
4
5
 
5
6
  import polars as pl
@@ -10,7 +11,9 @@ from .rusterize import _rusterize
10
11
 
11
12
 
12
13
  def rusterize(gdf: DataFrame,
13
- res: Union[Tuple[int, ...], Tuple[float, ...]],
14
+ res: Optional[Union[Tuple[int, ...], Tuple[float, ...]]] = None,
15
+ out_shape: Optional[Union[Tuple[int, ...]]] = None,
16
+ extent: Optional[Union[Tuple[int, ...], Tuple[float, ...]]] = None,
14
17
  field: Optional[str] = None,
15
18
  by: Optional[str] = None,
16
19
  fun: str = "last",
@@ -20,47 +23,71 @@ def rusterize(gdf: DataFrame,
20
23
  Fast geopandas rasterization into xarray.DataArray
21
24
 
22
25
  Args:
23
- :param gdf: geopandas dataframe to rasterize.
24
- :param res: tuple of (xres, yres) for rasterized data.
25
- :param field: field to rasterize. Default is None.
26
- :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
27
- :param fun: pixel function to use, see fasterize for options. Default is `last`.
28
- :param background: background value in final raster. Default is None.
26
+ :param gdf: geopandas dataframe to rasterize.
27
+ :param res: tuple of (xres, yres) for rasterized data.
28
+ :param out_shape: tuple of (nrows, ncols) for regularized output shape.
29
+ :param extent: tuple of (xmin, xmax, ymin, ymax) for regularized extent.
30
+ :param field: field to rasterize. Default is None.
31
+ :param by: column to rasterize, assigns each unique value to a layer in the stack based on field. Default is None.
32
+ :param fun: pixel function to use, see fasterize for options. Default is `last`.
33
+ :param background: background value in final raster. Default is None.
29
34
 
30
35
  Returns:
31
- Dictionary containing rasterized geometries and spatial attributes to build a xarray.DataArray.
36
+ Rasterized xarray.DataArray.
37
+
38
+ Note:
39
+ When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable.
40
+ Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border.
41
+ The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL.
32
42
  """
33
43
  # type checks
34
44
  if not isinstance(gdf, DataFrame):
35
45
  raise TypeError("Must pass a valid geopandas dataframe.")
36
- if not isinstance(field, (str, type(None))):
46
+ if not isinstance(res, (tuple, NoneType)):
47
+ raise TypeError("Must pass a valid resolution tuple (x, y).")
48
+ if not isinstance(out_shape, (tuple, NoneType)):
49
+ raise TypeError("Must pass a valid output shape tuple (nrows, ncols).")
50
+ if not isinstance(extent, (tuple, NoneType)):
51
+ raise TypeError("Must pass a valid extent tuple (xmin, ymin, xmax, ymax).")
52
+ if not isinstance(field, (str, NoneType)):
37
53
  raise TypeError("Must pass a valid string to field.")
38
- if not isinstance(by, (str, type(None))):
54
+ if not isinstance(by, (str, NoneType)):
39
55
  raise TypeError("Must pass a valid string to by.")
40
- if not isinstance(res, tuple):
41
- raise TypeError("Must pass a valid resolution tuple (x, y).")
42
56
  if not isinstance(fun, str):
43
57
  raise TypeError("Must pass a valid string to pixel_fn. Select only of sum, first, last, min, max, count, or any.")
44
- if not isinstance(background, (int, float, type(None))):
58
+ if not isinstance(background, (int, float, NoneType)):
45
59
  raise TypeError("Must pass a valid background type.")
46
60
 
47
61
  # value check
62
+ if not res and not out_shape and not extent:
63
+ raise ValueError("One of `res`, `out_shape`, or `extent` must be provided.")
64
+ if extent and not res and not out_shape:
65
+ raise ValueError("Must also specify `res` or `out_shape` with extent.")
66
+ if res and (len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res)):
67
+ raise ValueError("Resolution must be 2 positive numbers.")
68
+ if out_shape and (len(out_shape) != 2 or any(s <= 0 for s in out_shape) or any(not isinstance(s, int) for s in out_shape)):
69
+ raise ValueError("Output shape must be 2 positive integers.")
70
+ if extent and len(extent) != 4:
71
+ raise ValueError("Extent must be 4 numbers (xmin, ymin, xmax, ymax).")
48
72
  if by and not field:
49
73
  raise ValueError("If by is specified, field must also be specified.")
50
- if len(res) != 2 or any((res[0], res[1])) <= 0 or not isinstance(res[0], type(res[1])):
51
- raise ValueError("Must pass valid resolution tuple of values of consistent dtype.")
74
+
75
+ # defaults
76
+ _res = res if res else (0, 0)
77
+ _shape = out_shape if out_shape else (0, 0)
78
+ (_bounds, has_extent) = (extent, True) if extent else (gdf.total_bounds, False)
52
79
 
53
80
  # RasterInfo
54
- bounds = gdf.total_bounds
55
81
  raster_info = {
56
- "xmin": bounds[0],
57
- "ymin": bounds[1],
58
- "xmax": bounds[2],
59
- "ymax": bounds[3],
60
- "xres": res[0],
61
- "yres": res[1],
62
- "nrows": 0,
63
- "ncols": 0
82
+ "xmin": _bounds[0],
83
+ "ymin": _bounds[1],
84
+ "xmax": _bounds[2],
85
+ "ymax": _bounds[3],
86
+ "xres": _res[0],
87
+ "yres": _res[1],
88
+ "nrows": _shape[0],
89
+ "ncols": _shape[1],
90
+ "has_extent": has_extent
64
91
  }
65
92
 
66
93
  # extract columns of interest and convert to polars
Binary file
@@ -1,19 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rusterize
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Classifier: License :: OSI Approved :: MIT License
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Rust
7
7
  Classifier: Programming Language :: Python :: Implementation :: CPython
8
8
  Classifier: Programming Language :: Python :: Implementation :: PyPy
9
- Requires-Dist: geopandas >=1.0.1
10
- Requires-Dist: pandas >=2.2.3
11
- Requires-Dist: pyarrow >=18.1.0
12
- Requires-Dist: polars >=1.19.0
13
- Requires-Dist: xarray >=2025.1.1
14
- Requires-Dist: rioxarray >=0.18.2
9
+ Requires-Dist: geopandas>=1.0.1
10
+ Requires-Dist: pandas>=2.2.3
11
+ Requires-Dist: pyarrow>=18.1.0
12
+ Requires-Dist: polars>=1.19.0
13
+ Requires-Dist: xarray>=2025.1.1
14
+ Requires-Dist: rioxarray>=0.18.2
15
15
  License-File: LICENSE
16
- Summary: High performance rasterization tool for Python build in Rust
16
+ Summary: High performance rasterization tool for Python built in Rust
17
17
  Keywords: fast,raster
18
18
  Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
@@ -21,20 +21,17 @@ Project-URL: repository, https://github.com/ttrotto/rusterize
21
21
 
22
22
  # rusterize
23
23
 
24
- High performance rasterization tool for python built in Rust. This
25
- repository is heavily based on the [**fasterize**](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
- for R. This version ports it to Python with a Rust backend.
24
+ High performance rasterization tool for Python built in Rust. This
25
+ repository stems from the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package built in C++
26
+ for R and ports parts of the logics into Python with a Rust backend, in addition to some useful improvements.
27
27
 
28
- Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/)
29
- dataframes and returns a [xarray](https://docs.xarray.dev/en/stable/). It
30
- tighly mirrors the processing routine of fasterize, so it works only on
31
- (multi)polygon geometries at the moment.
28
+ **rusterize** is designed to work on *(multi)polygons* and *(multi)linestrings*. Functionally, it takes an input [geopandas](https://geopandas.org/en/stable/) dataframe and returns a [xarray](https://docs.xarray.dev/en/stable/).
32
29
 
33
30
  # Installation
34
31
 
35
32
  Install the current version with pip:
36
33
 
37
- ``` {shell}
34
+ ``` shell
38
35
  pip install rusterize
39
36
  ```
40
37
 
@@ -46,7 +43,7 @@ package. For this to work, you’ll need to have [Rust](https://www.rust-lang.or
46
43
  [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html)
47
44
  installed.
48
45
 
49
- ``` {shell}
46
+ ``` shell
50
47
  # Clone repo
51
48
  git clone https://github.com/<username>/rusterize.git
52
49
  cd rusterize
@@ -65,33 +62,43 @@ maturin develop --profile dist-release
65
62
 
66
63
  This function has a simple API:
67
64
 
68
- ``` {shell}
65
+ ``` python
69
66
  from rusterize.core import rusterize
70
67
 
71
- # gdf = <import datasets as needed>
68
+ # gdf = <import/modify dataframe as needed>
72
69
 
73
70
  # rusterize
74
71
  rusterize(gdf,
75
- (30, 30),
76
- "field",
77
- "by",
78
- "sum",
79
- 0)
72
+ res=(30, 30),
73
+ out_shape=(10, 10)
74
+ extent=(0, 300, 0, 300)
75
+ field="field",
76
+ by="by",
77
+ fun="sum",
78
+ background=0)
80
79
  ```
81
80
 
82
81
  - `gdf`: geopandas dataframe to rasterize
83
- - `res`: tuple of (xres, yres) for final resolution
82
+ - `res`: tuple of (xres, yres) for desired resolution
83
+ - `out_shape`: tuple of (nrows, ncols) for desired output shape
84
+ - `extent`: tuple of (xmin, ymin, xmax, ymax) for desired output extent
84
85
  - `field`: field to rasterize. Default is None (a value of `1` is rasterized).
85
86
  - `by`: column to rasterize. Assigns each group to a band in the
86
- stack. Values are taken from `field`. Default is None
87
+ stack. Values are taken from `field`. Default is None (singleband raster)
87
88
  - `fun`: pixel function to use when multiple values overlap. Default is
88
89
  `last`. Available options are `sum`, `first`, `last`, `min`, `max`, `count`, or `any`
89
90
  - `background`: background value in final raster. Default is None (NaN)
90
91
 
92
+ Note that control over the desired extent is not as strict as for resolution and shape. That is,
93
+ when resolution, output shape, and extent are specified, priority is given to resolution and shape.
94
+ So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken
95
+ from the polygons and is not modified, unless you specify a resolution value. If you only specify an output
96
+ shape, the extent is maintained. This mimics the logics of `gdal_rasterize`.
97
+
91
98
  # Usage
92
99
 
93
100
  **rusterize** consists of a single function `rusterize()`. The Rust implementation
94
- returns an array that is then converted to a xarray on the Python side
101
+ returns an array that is converted to a xarray on the Python side
95
102
  for simpliicty.
96
103
 
97
104
  ``` python
@@ -101,17 +108,18 @@ from shapely import wkt
101
108
  import matplotlib.pyplot as plt
102
109
 
103
110
  # example from fasterize
104
- polygons = [
111
+ geoms = [
105
112
  "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))",
106
113
  "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))",
107
- "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))"
114
+ "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))",
115
+ "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))"
108
116
  ]
109
117
 
110
118
  # Convert WKT strings to Shapely geometries
111
- geometries = [wkt.loads(polygon) for polygon in polygons]
119
+ geometries = [wkt.loads(geom) for geom in geoms]
112
120
 
113
121
  # Create a GeoDataFrame
114
- gdf = gpd.GeoDataFrame({'value': range(1, len(polygons) + 1)}, geometry=geometries, crs='EPSG:32619')
122
+ gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=geometries, crs='EPSG:32619')
115
123
 
116
124
  # rusterize
117
125
  output = rusterize(
@@ -127,12 +135,11 @@ output.plot.imshow(ax=ax)
127
135
  plt.show()
128
136
  ```
129
137
 
130
- ![](README_files/figure-commonmark/cell-2-output-1.png)
138
+ ![](img/plot.png)
131
139
 
132
140
  # Benchmarks
133
141
 
134
- **fasterize** is fast and so is **rusterize**! Let’s try it on small and large
135
- datasets.
142
+ **rusterize** is fast! Let’s try it on small and large datasets.
136
143
 
137
144
  ``` python
138
145
  from rusterize.core import rusterize
@@ -167,7 +174,7 @@ Then you can run it with [pytest](https://docs.pytest.org/en/stable/)
167
174
  and
168
175
  [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/):
169
176
 
170
- ``` {shell}
177
+ ```
171
178
  pytest <python file> --benchmark-min-rounds=20 --benchmark-time-unit='s'
172
179
 
173
180
  --------------------------------------------- benchmark: 1 tests --------------------------------------------
@@ -180,7 +187,7 @@ test_small 0.5083 0.6416 0.5265 0.0393 0.5120 0.0108 2;
180
187
 
181
188
  And fasterize:
182
189
 
183
- ``` {r}
190
+ ``` r
184
191
  large <- st_read("Mammals_Terrestrial/Mammals_Terrestrial.shp", quiet = TRUE)
185
192
  small <- large[1:1000, ]
186
193
  fn <- function(v) {
@@ -195,7 +202,7 @@ microbenchmark(
195
202
  )
196
203
  ```
197
204
 
198
- ``` {shell}
205
+ ```
199
206
  Unit: seconds
200
207
  expr min lq mean median uq max neval
201
208
  fasterize_large 9.565781 9.815375 10.02838 9.984965 10.18532 10.66656 20
@@ -207,7 +214,7 @@ And on even
207
214
  datasets? This is a benchmark with 350K+ geometries rasterized at 30
208
215
  meters (20 rounds) with no field value and pixel function `sum`.
209
216
 
210
- ``` {shell}
217
+ ```
211
218
  # rusterize
212
219
  --------------------------------------------- benchmark: 1 tests --------------------------------------------
213
220
  Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
@@ -221,14 +228,16 @@ Unit: seconds
221
228
  fasterize 62.12409 72.13832 74.53424 75.12375 77.72899 84.77415 20
222
229
  ```
223
230
 
231
+ In terms of (multi)line rasterization speed, here's a benchmark against `gdal_rasterize` using a layer from the province of Quebec, Canada, representing water courses for a total of ~4.5 million multilinestrings.
232
+
224
233
  # Comparison with other tools
225
234
 
226
- While `rusterize` is fast, there are other very fast solutions out there, including
235
+ While **rusterize** is fast, there are other fast alternatives out there, including:
227
236
  - `GDAL`
228
237
  - `rasterio`
229
238
  - `geocube`
230
239
 
231
- However, `rusterize` allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing.
240
+ However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that doesn't require you to leave Python, and returns the geoinformation you need for downstream processing with ample control over resolution, shape, and extent.
232
241
 
233
242
  The following is a time comparison run on a dataset with 340K+ geometries, rasterized at 2m resolution.
234
243
  ```
@@ -0,0 +1,7 @@
1
+ rusterize-0.2.0.dist-info/METADATA,sha256=m_0TCBYklNLCwsmLrN1Cd1lWP-zdxFikhL-HvxvsdL8,9035
2
+ rusterize-0.2.0.dist-info/WHEEL,sha256=eNkHHVQ2ZVf7_h6IXFcTf7nuT468OLJNEY9FQRJLBcc,105
3
+ rusterize-0.2.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
4
+ rusterize/__init__.py,sha256=OymrFdgWCN3VMuSM3oXoGKeagAd-5ayPsYyXnQ_HsDE,101
5
+ rusterize/core.py,sha256=9nsqL-v5Bw5DezS_kpnDx1eQHooJr6zK1TpWSKxJT94,4574
6
+ rusterize/rusterize.abi3.so,sha256=Q51iPEqNMkD9ifUI6tOWopqSxLXL4l122E-diJ1qEMk,33627216
7
+ rusterize-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.8.1)
2
+ Generator: maturin (1.8.3)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-abi3-macosx_10_12_x86_64
@@ -1,7 +0,0 @@
1
- rusterize-0.1.0.dist-info/METADATA,sha256=pS1UlizVrXEDCis4_jefaIP1bDrZH5lJGquhtWwM69g,7938
2
- rusterize-0.1.0.dist-info/WHEEL,sha256=o7axhT60SX08cKnffoNQ5wynMFwx6jcjMA-vg-8vXS0,105
3
- rusterize-0.1.0.dist-info/licenses/LICENSE,sha256=v-2DqBji_azGEWFDxBhw-CNIRu8450vBbloLx6UNqLU,1108
4
- rusterize/__init__.py,sha256=OymrFdgWCN3VMuSM3oXoGKeagAd-5ayPsYyXnQ_HsDE,101
5
- rusterize/core.py,sha256=eSBSQMebf3vELR6FHvmL7j-66mnWcRDXiow-flG5H8M,2861
6
- rusterize/rusterize.abi3.so,sha256=KjOv8BwV6Xs916FdPJxHMJMh3vleZ0yPPoM1594VUzk,33543688
7
- rusterize-0.1.0.dist-info/RECORD,,