gregor 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gregor/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from . import aggregate, disaggregate, raster
gregor/aggregate.py ADDED
@@ -0,0 +1,127 @@
1
+ from pathlib import Path
2
+
3
+ import geopandas as gpd
4
+ import rasterio as rio
5
+ import xarray as xr
6
+ from rasterstats import zonal_stats
7
+
8
+
9
+ def aggregate_raster_to_polygon(
10
+ raster: str | Path | xr.DataArray,
11
+ polygons: gpd.GeoSeries | gpd.GeoDataFrame,
12
+ stats: str = "sum",
13
+ ) -> gpd.GeoDataFrame:
14
+ r"""
15
+ Aggregate raster data to polygons.
16
+
17
+ Parameters
18
+ ----------
19
+ raster : str | Path | xr.DataArray
20
+ Path to the raster file or xarray DataArray.
21
+ polygons : gpd.GeoSeries | gpd.GeoDataFrame
22
+ GeoSeries or GeoDataFrame with the spatial units.
23
+ stats : str, optional
24
+ Statistics to compute, by default "sum".
25
+
26
+ Returns
27
+ -------
28
+ gpd.GeoDataFrame
29
+ GeoDataFrame containing the original geometries
30
+ and the aggregated statistics.
31
+ """
32
+ if isinstance(raster, (str, Path)):
33
+ results_gdf = _aggregate_file_to_polygon(raster, polygons, stats)
34
+ elif isinstance(raster, xr.DataArray):
35
+ results_gdf = _aggregate_xarray_to_polygon(raster, polygons, stats)
36
+
37
+ return results_gdf
38
+
39
+
40
+ def _aggregate_file_to_polygon(raster, polygons, stats, nodata=0):
41
+ with rio.open(raster) as src:
42
+ affine = src.transform
43
+ array = src.read(1)
44
+
45
+ polygons_projected = polygons.to_crs(src.crs)
46
+
47
+ zs = zonal_stats(
48
+ polygons_projected,
49
+ array,
50
+ affine=affine,
51
+ stats=stats,
52
+ nodata=nodata,
53
+ geojson_out=True,
54
+ )
55
+
56
+ results_gdf = gpd.GeoDataFrame.from_features(zs)
57
+
58
+ results_gdf = results_gdf.set_crs(src.crs)
59
+ results_gdf = results_gdf.to_crs(crs=polygons.crs)
60
+
61
+ return results_gdf
62
+
63
+
64
+ def _aggregate_xarray_to_polygon(raster, polygons, stats, nodata=0):
65
+ # Project the polygons to the raster coordinate reference system
66
+ polygons_projected = polygons.to_crs(raster.rio.crs)
67
+
68
+ agg_raster_poly = zonal_stats(
69
+ polygons_projected,
70
+ raster.values,
71
+ affine=raster.rio.transform(),
72
+ stats=stats,
73
+ nodata=nodata,
74
+ )
75
+
76
+ results_gdf = gpd.GeoDataFrame(
77
+ agg_raster_poly,
78
+ index=polygons_projected.index,
79
+ crs=polygons_projected.crs,
80
+ geometry=polygons_projected.geometry,
81
+ )
82
+
83
+ results_gdf.index.name = polygons_projected.index.name
84
+
85
+ # Project back to the original crs
86
+ results_gdf = results_gdf.to_crs(crs=polygons.crs)
87
+
88
+ return results_gdf
89
+
90
+
91
+ def aggregate_point_to_polygon(
92
+ points: gpd.GeoDataFrame, polygons: gpd.GeoSeries | gpd.GeoDataFrame, aggfunc="sum"
93
+ ):
94
+ r"""
95
+ Aggregate point data to polygons.
96
+
97
+ Parameters
98
+ ----------
99
+ points : gpd.GeoDataFrame
100
+ GeoDataFrame containing data defined on point geometries.
101
+ polygons : gpd.GeoSeries | gpd.GeoDataFrame
102
+ GeoSeries or GeoDataFrame of polygon geometries.
103
+ aggfunc : str, optional
104
+ Aggregation function, by default "sum".
105
+
106
+ Returns
107
+ -------
108
+ gpd.GeoDataFrame
109
+ GeoDataFrame containing the original geometries
110
+ and the aggregated statistics.
111
+ """
112
+ if isinstance(polygons, gpd.GeoSeries):
113
+ _polygons = polygons.to_frame()
114
+ elif isinstance(polygons, gpd.GeoDataFrame):
115
+ _polygons = polygons
116
+ else:
117
+ raise ValueError("`polygons` should be either a GeoSeries or a GeoDataFrame.")
118
+
119
+ joined_data = gpd.sjoin(points, _polygons, how="inner", op="within").drop(
120
+ columns="geometry"
121
+ )
122
+
123
+ aggregated_data = joined_data.groupby("index_right").agg(aggfunc)
124
+
125
+ result = _polygons.join(aggregated_data)
126
+
127
+ return result
gregor/cli.py ADDED
@@ -0,0 +1,55 @@
1
+ from pathlib import Path
2
+
3
+ import click
4
+ import geopandas as gpd
5
+ import rioxarray as rxr
6
+
7
+ import gregor
8
+
9
+
10
+ @click.command(help="Aggregate raster data to polygon boundaries.")
11
+ @click.argument("raster", type=click.STRING)
12
+ @click.argument("polygons", type=click.STRING)
13
+ @click.argument("destination", type=click.STRING)
14
+ @click.argument("stats", type=click.STRING, default="sum")
15
+ def agg(raster, polygons, destination, stats):
16
+ if Path(destination).exists():
17
+ raise ValueError("Destination file already exists.")
18
+
19
+ _raster = rxr.open_rasterio(raster).squeeze()
20
+ _polygons = gpd.read_file(polygons)
21
+
22
+ aggregated = gregor.aggregate.aggregate_raster_to_polygon(_raster, _polygons, stats)
23
+ aggregated.to_file(destination)
24
+
25
+
26
+ @click.command(help="Disaggregate polygon data to raster data using proxy.")
27
+ @click.argument("data", type=click.STRING)
28
+ @click.argument("column", type=click.STRING)
29
+ @click.argument("proxy", type=click.STRING)
30
+ @click.argument("destination", type=click.STRING)
31
+ @click.option("--to-data-crs", default=False, type=click.BOOL)
32
+ def disagg(data, column, proxy, destination, to_data_crs):
33
+ if Path(destination).exists():
34
+ raise ValueError("Destination file already exists.")
35
+
36
+ _data = gpd.read_file(data)
37
+ _proxy = rxr.open_rasterio(proxy)
38
+
39
+ # Clip proxy to extent of data for better performance
40
+ minx, miny, maxx, maxy = _data.to_crs(_proxy.rio.crs).total_bounds
41
+ _proxy = gregor.raster.clip(_proxy, minx, miny, maxx, maxy).squeeze()
42
+
43
+ disaggregated = gregor.disaggregate.disaggregate_polygon_to_raster(
44
+ _data, column, _proxy, to_data_crs
45
+ )
46
+ disaggregated.rio.to_raster(destination)
47
+
48
+
49
+ @click.group()
50
+ def cli():
51
+ pass
52
+
53
+
54
+ cli.add_command(agg)
55
+ cli.add_command(disagg)
gregor/disaggregate.py ADDED
@@ -0,0 +1,146 @@
1
+ import geopandas as gpd
2
+ import numpy as np
3
+ import xarray as xr
4
+ from rasterio.features import geometry_mask
5
+
6
+
7
+ def disaggregate_polygon_to_raster(
8
+ data: gpd.GeoDataFrame,
9
+ column: str,
10
+ proxy: xr.Dataset,
11
+ to_data_crs: bool = False,
12
+ ) -> xr.Dataset:
13
+ r"""
14
+ Disaggregate polygon data to raster data using proxy.
15
+ Normalization of the proxy happens internally.
16
+
17
+ Parameters
18
+ ----------
19
+ data : gpd.GeoDataFrame
20
+ Data to be disaggregated.
21
+ column : str
22
+ Column name of the data to be disaggregated.
23
+ proxy : xr.Dataset
24
+ Proxy data for disaggregation.
25
+ to_data_crs : bool, optional
26
+ Whether to reproject proxy to `data`'s CRS or keep it in `raster`'s CRS. Default is False.
27
+
28
+ Returns
29
+ -------
30
+ xr.Dataset
31
+ Disaggregated raster data.
32
+ """
33
+ _data = data.copy()
34
+ index_name = _data.index.name
35
+ if index_name is None:
36
+ index_name = "id"
37
+ _data.index.name = index_name
38
+
39
+ if not proxy.rio.crs == data.crs:
40
+ print(
41
+ f"CRS of `proxy` ({proxy.rio.crs}) does not match CRS of `data` ({data.crs}). Reprojecting CRS of `data` to `proxy`'s CRS."
42
+ )
43
+ _data = _data.to_crs(proxy.rio.crs)
44
+
45
+ # Each raster point belongs to one spatial_unit
46
+ belongs_to = get_belongs_to_matrix(proxy, _data.geometry)
47
+ _data = _data[[column]].to_xarray()
48
+ normalization = proxy.groupby(belongs_to).sum().rename(group=index_name)
49
+
50
+ # # Remove regions that do not belong to any geometry
51
+ _data = _data.sel({index_name: normalization.coords[index_name]})
52
+
53
+ # Disaggregate data to raster using proxy
54
+ # raster_{x,y} = 1/normalization_{id} * _data_{id} * belongs_to_{id,x,y} * proxy_{x,y}
55
+ raster = xr.DataArray(data=0, dims=["y", "x"], coords={"y": proxy.y, "x": proxy.x})
56
+ for id in normalization.coords[index_name]:
57
+ raster_id = (
58
+ 1
59
+ / normalization.sel({index_name: id})
60
+ * _data.sel({index_name: id})
61
+ * (belongs_to == id)
62
+ * proxy
63
+ )
64
+ raster = raster + raster_id
65
+
66
+ if to_data_crs:
67
+ print(f"Reprojecting results to `data`'s CRS {data.crs}.")
68
+ raster = raster.rio.reproject(data.crs)
69
+
70
+ return raster
71
+
72
+
73
+ def get_uniform_proxy(
74
+ polygons: gpd.GeoSeries, raster_resolution: tuple[int, int]
75
+ ) -> xr.Dataset:
76
+ r"""
77
+ Get a uniform proxy which sums to one for each region.
78
+
79
+ Parameters
80
+ ----------
81
+ polygons : gpd.GeoSeries
82
+ Polygons to compute the proxy for.
83
+ raster_resolution : tuple[int, int]
84
+ Resolution of the desired raster proxy.
85
+
86
+ Returns
87
+ -------
88
+ xr.Dataset
89
+ Uniform proxy which sums to 1 in each region.
90
+ """
91
+ # get spatial extent of spatial_units
92
+ x_min, y_min, x_max, y_max = polygons.total_bounds
93
+
94
+ # define coords
95
+ x_coords = np.linspace(x_min, x_max, raster_resolution[0])
96
+ y_coords = np.linspace(y_min, y_max, raster_resolution[1])
97
+
98
+ # create raster Dataset
99
+ uniform_proxy = xr.Dataset(
100
+ data_vars={}, coords={"x": ("x", x_coords), "y": ("y", y_coords)}
101
+ )
102
+
103
+ # TODO Set transform and crs
104
+ # uniform_proxy = uniform_proxy.rio.set_spatial_dims('x', 'y')
105
+ # uniform_proxy = uniform_proxy.rio.write_transform()
106
+ uniform_proxy = uniform_proxy.rio.set_crs(polygons.crs)
107
+
108
+ return uniform_proxy
109
+
110
+
111
+ def get_belongs_to_matrix(raster: xr.Dataset, polygons: gpd.GeoSeries) -> xr.Dataset:
112
+ r"""
113
+ Get a matrix which indicates which polygon each raster point belongs to.
114
+
115
+ Parameters
116
+ ----------
117
+ raster : xr.Dataset
118
+ Raster data to get the matrix for.
119
+ polygons : gpd.GeoSeries
120
+ Polygons to compute the matrix for.
121
+
122
+ Returns
123
+ -------
124
+ xr.Dataset
125
+ Matrix which indicates which polygon each raster point belongs to.
126
+ """
127
+ assert len(raster.dims) == 2, "Raster data should have 2 dimensions."
128
+ # create an empty dataarray with the coords matching raster and spatial_units
129
+ belongs_to_matrix = xr.DataArray(
130
+ data=None, dims=["y", "x"], coords={"y": raster.y, "x": raster.x}
131
+ )
132
+ belongs_to_matrix.attrs["transform"] = raster.rio.transform
133
+ belongs_to_matrix.attrs["crs"] = raster.rio.crs
134
+
135
+ for id, geometry in polygons.items():
136
+ mask = geometry_mask(
137
+ [geometry],
138
+ out_shape=raster.shape,
139
+ transform=raster.rio.transform(),
140
+ invert=True,
141
+ )
142
+ mask = xr.DataArray(mask, coords=raster.coords, dims=raster.dims)
143
+ # assert belongs_to_matrix.where(mask).isnull().all(), "Trying to assign to value which is not None. Maybe cause of overlapping geometries."
144
+ belongs_to_matrix = belongs_to_matrix.where(~mask, id)
145
+
146
+ return belongs_to_matrix
gregor/raster.py ADDED
@@ -0,0 +1,74 @@
1
+ from pathlib import Path
2
+
3
+ import geopandas as gpd
4
+ import rasterio as rio
5
+ import xarray as xr
6
+ from shapely.geometry import box
7
+
8
+
9
+ # TODO read the docs first
10
+ # https://corteva.github.io/rioxarray/html/examples/clip_geom.html#Clipping-larger-rasters
11
+ # can use from_disk=True
12
+
13
+
14
+ def clip(raster: str | Path | xr.DataArray, minx, miny, maxx, maxy, destination=None):
15
+ r"""
16
+ Clip raster to bounding box.
17
+
18
+ Parameters
19
+ ----------
20
+ raster : str | Path | xr.DataArray
21
+ Path to the raster file or xarray DataArray.
22
+ minx : float
23
+ Minimum x-coordinate of the bounding box.
24
+ miny : float
25
+ Minimum y-coordinate of the bounding box.
26
+ maxx : float
27
+ Maximum x-coordinate of the bounding box.
28
+ maxy : float
29
+ Maximum y-coordinate of the bounding box.
30
+ destination : str | Path, optional
31
+ Path to save the clipped raster, by default None.
32
+
33
+ Returns
34
+ -------
35
+ xr.DataArray
36
+ Clipped raster.
37
+ """
38
+ if isinstance(raster, xr.DataArray):
39
+ return _clip_xarray(raster, minx, miny, maxx, maxy)
40
+ if isinstance(raster, str) or isinstance(raster, Path):
41
+ if destination is None:
42
+ raise ValueError("Destination must be provided when clipping a file.")
43
+ _clip_file(raster, minx, miny, maxx, maxy, destination)
44
+
45
+
46
+ def _clip_xarray(raster, minx, miny, maxx, maxy):
47
+ return raster.rio.clip_box(minx=minx, miny=miny, maxx=maxx, maxy=maxy)
48
+
49
+
50
+ def _clip_file(raster, minx, miny, maxx, maxy, destination):
51
+ bbox = box(minx, miny, maxx, maxy)
52
+ bbox = gpd.GeoDataFrame({"geometry": bbox}, index=[0], crs="EPSG:4326")
53
+
54
+ with rio.open(raster) as src:
55
+ # Reproject the bounding box to the raster's CRS
56
+ bbox = bbox.to_crs(src.crs)
57
+
58
+ # Clip raster
59
+ out_raster, out_transform = rio.mask.mask(src, bbox.geometry, crop=True)
60
+
61
+ # Update metadata
62
+ out_meta = src.meta.copy()
63
+ out_meta.update(
64
+ {
65
+ "driver": "GTiff",
66
+ "height": out_raster.shape[1],
67
+ "width": out_raster.shape[2],
68
+ "transform": out_transform,
69
+ }
70
+ )
71
+
72
+ # Save the clipped raster to a new file
73
+ with rio.open(destination, "w", **out_meta) as dest:
74
+ dest.write(out_raster)
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright 2024 Jann Launer
4
+
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the “Software”), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.1
2
+ Name: gregor
3
+ Version: 0.0.1
4
+ Summary: A library for spatial aggregation and disaggregation
5
+ Author-email: Jann Launer <j.a.c.launer@tudelft.nl>
6
+ Project-URL: changelog, https://github.com/jnnr/gregor/blob/main/CHANGELOG.md
7
+ Project-URL: homepage, https://gregor.readthedocs.io/en/latest/
8
+ Project-URL: repository, https://github.com/jnnr/gregor.git
9
+ Keywords: gis
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Requires-Python: <4,>=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE.txt
17
+ Requires-Dist: pandas >=1.2
18
+ Requires-Dist: numpy
19
+ Requires-Dist: matplotlib
20
+ Requires-Dist: geopandas
21
+ Requires-Dist: xarray
22
+ Requires-Dist: rasterio
23
+ Requires-Dist: rioxarray
24
+ Requires-Dist: rasterstats
25
+ Requires-Dist: click
26
+
27
+ # Gregor: Aggregation and disaggregation of spatial data
28
+
29
+ Gregor is a tool that makes your life easier when aggregating and dis-aggregating spatial data. It has been developed in the context of preparing data for energy system modeling, but can be applied in any situation involving spatial data.
30
+
31
+ ## Installation
32
+
33
+ Install the latest release from pypi using conda.
34
+
35
+ conda install gregor
36
+
37
+ Or, using pip.
38
+
39
+ pip install gregor
40
+
41
+ Alternatively, install an editable local version in an environment by cloning the repository and running:
42
+
43
+ pip install -e <path-to-repo>
44
+
45
+ ## Usage
46
+
47
+ Please have a look at the examples presented in the [documentation](https://gregor.readthedocs.io/en/latest/).
48
+
49
+ ## Development
50
+
51
+ If you encounter a bug, consider opening an issue on [GitHub](https://github.com/jnnr/gregor/issues).
@@ -0,0 +1,11 @@
1
+ gregor/__init__.py,sha256=wdAMuMvxGyJ5zqOJ0wxGfqYslw54F0UMzGXwWkHYqiE,46
2
+ gregor/aggregate.py,sha256=1yPDt5EUJhpRXkvSBDoIqUKT61gv1Pajs3KsZldbHsA,3542
3
+ gregor/cli.py,sha256=HV1XE_zdt-DQ_bwOU0kX_0TkXM-vSUX3sdIDvJGlDWw,1738
4
+ gregor/disaggregate.py,sha256=gh0Znm3FfbNVlBKFaTrt4XIo1br86o8bUES5MJoMrxQ,4714
5
+ gregor/raster.py,sha256=UamcoJpjTUXt3mhVYEkPelssqtRO8OzyYJNm6_5uvS0,2283
6
+ gregor-0.0.1.dist-info/LICENSE.txt,sha256=DiSkTik3mrx7H70-KVdfpcQA99Z1B020afGwIVqQrqk,1082
7
+ gregor-0.0.1.dist-info/METADATA,sha256=kIBjXqz1uSGkg6MhL40hZA_EjV__Ynum94D5rzxqGd4,1709
8
+ gregor-0.0.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
+ gregor-0.0.1.dist-info/entry_points.txt,sha256=xcbxY9eHnsM1tk0V70JOMfeEHF9RBBl-zb3BAmRZENE,42
10
+ gregor-0.0.1.dist-info/top_level.txt,sha256=SABPp7uleCLafjHIZ7ap6XlYNzsK5gIpG12BsNX8RnM,7
11
+ gregor-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (72.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gregor = gregor.cli:cli
@@ -0,0 +1 @@
1
+ gregor