giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,370 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from pydantic import BaseModel, Field
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Dict, List, Optional, Union, Callable, TypeVar, Generic
|
5
|
+
from shapely.geometry import Polygon
|
6
|
+
|
7
|
+
import geopandas as gpd
|
8
|
+
import pandas as pd
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
from gigaspatial.core.io.data_store import DataStore
|
12
|
+
from gigaspatial.core.io.local_data_store import LocalDataStore
|
13
|
+
from gigaspatial.core.io.writers import write_dataset
|
14
|
+
from gigaspatial.config import config as global_config
|
15
|
+
from gigaspatial.processing.geo import (
|
16
|
+
convert_to_geodataframe,
|
17
|
+
aggregate_polygons_to_zones,
|
18
|
+
aggregate_points_to_zones,
|
19
|
+
)
|
20
|
+
from gigaspatial.processing.tif_processor import (
|
21
|
+
TifProcessor,
|
22
|
+
sample_multiple_tifs_by_polygons,
|
23
|
+
)
|
24
|
+
from functools import lru_cache
|
25
|
+
import logging
|
26
|
+
|
27
|
+
|
28
|
+
class ZonalViewGeneratorConfig(BaseModel):
|
29
|
+
"""Configuration for zonal view generation.
|
30
|
+
|
31
|
+
Attributes:
|
32
|
+
base_path (Path): Base directory path for storing zonal views. Defaults to
|
33
|
+
configured zonal views path.
|
34
|
+
output_format (str): Default output format for saved views. Defaults to "parquet".
|
35
|
+
"""
|
36
|
+
|
37
|
+
base_path: Path = Field(default=global_config.get_path("zonal", "views"))
|
38
|
+
output_format: str = "parquet"
|
39
|
+
ensure_available: bool = True
|
40
|
+
|
41
|
+
|
42
|
+
T = TypeVar("T") # For zone type
|
43
|
+
|
44
|
+
|
45
|
+
class ZonalViewGenerator(ABC, Generic[T]):
|
46
|
+
"""Base class for mapping data to zonal datasets.
|
47
|
+
|
48
|
+
This class provides the framework for mapping various data sources (points, polygons, rasters)
|
49
|
+
to zonal geometries like grid tiles or catchment areas. It serves as an abstract base class
|
50
|
+
that must be subclassed to implement specific zonal systems.
|
51
|
+
|
52
|
+
The class supports three main types of data mapping:
|
53
|
+
- Point data aggregation to zones
|
54
|
+
- Polygon data aggregation with optional area weighting
|
55
|
+
- Raster data sampling and statistics
|
56
|
+
|
57
|
+
Attributes:
|
58
|
+
data_store (DataStore): The data store for accessing input data.
|
59
|
+
generator_config (ZonalViewGeneratorConfig): Configuration for the generator.
|
60
|
+
logger: Logger instance for this class.
|
61
|
+
"""
|
62
|
+
|
63
|
+
def __init__(
|
64
|
+
self,
|
65
|
+
config: Optional[ZonalViewGeneratorConfig] = None,
|
66
|
+
data_store: Optional[DataStore] = None,
|
67
|
+
logger: logging.Logger = None,
|
68
|
+
):
|
69
|
+
"""Initialize the ZonalViewGenerator.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
generator_config (ZonalViewGeneratorConfig, optional): Configuration for the generator.
|
73
|
+
If None, uses default configuration.
|
74
|
+
data_store (DataStore, optional): The data store for accessing input data.
|
75
|
+
If None, uses LocalDataStore.
|
76
|
+
"""
|
77
|
+
self.config = config or ZonalViewGeneratorConfig()
|
78
|
+
self.data_store = data_store or LocalDataStore()
|
79
|
+
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
80
|
+
|
81
|
+
@abstractmethod
|
82
|
+
def get_zonal_geometries(self) -> List[Polygon]:
|
83
|
+
"""Get the geometries of the zones.
|
84
|
+
|
85
|
+
This method must be implemented by subclasses to return the actual geometric
|
86
|
+
shapes of the zones (e.g., grid tiles, catchment boundaries, administrative areas).
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
List[Polygon]: A list of Shapely Polygon objects representing zone geometries.
|
90
|
+
"""
|
91
|
+
pass
|
92
|
+
|
93
|
+
@abstractmethod
|
94
|
+
def get_zone_identifiers(self) -> List[T]:
|
95
|
+
"""Get unique identifiers for each zone.
|
96
|
+
|
97
|
+
This method must be implemented by subclasses to return identifiers that
|
98
|
+
correspond one-to-one with the geometries returned by get_zonal_geometries().
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
List[T]: A list of zone identifiers (e.g., quadkeys, H3 indices, tile IDs).
|
102
|
+
The type T is determined by the specific zonal system implementation.
|
103
|
+
"""
|
104
|
+
pass
|
105
|
+
|
106
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
107
|
+
"""Convert zones to a GeoDataFrame.
|
108
|
+
|
109
|
+
Creates a GeoDataFrame containing zone identifiers and their corresponding
|
110
|
+
geometries in WGS84 (EPSG:4326) coordinate reference system.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
gpd.GeoDataFrame: A GeoDataFrame with 'zone_id' and 'geometry' columns,
|
114
|
+
where zone_id contains the identifiers and geometry contains the
|
115
|
+
corresponding Polygon objects.
|
116
|
+
"""
|
117
|
+
return gpd.GeoDataFrame(
|
118
|
+
{
|
119
|
+
"zone_id": self.get_zone_identifiers(),
|
120
|
+
"geometry": self.get_zonal_geometries(),
|
121
|
+
},
|
122
|
+
crs="EPSG:4326",
|
123
|
+
)
|
124
|
+
|
125
|
+
@property
|
126
|
+
def zone_gdf(self) -> gpd.GeoDataFrame:
|
127
|
+
"""Cached GeoDataFrame of zones.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
gpd.GeoDataFrame: Lazily-computed and cached GeoDataFrame of zone geometries
|
131
|
+
and identifiers.
|
132
|
+
"""
|
133
|
+
if not hasattr(self, "_zone_gdf"):
|
134
|
+
self._zone_gdf = self.to_geodataframe()
|
135
|
+
return self._zone_gdf
|
136
|
+
|
137
|
+
def map_points(
|
138
|
+
self,
|
139
|
+
points: Union[pd.DataFrame, gpd.GeoDataFrame],
|
140
|
+
value_columns: Optional[Union[str, List[str]]] = None,
|
141
|
+
aggregation: Union[str, Dict[str, str]] = "count",
|
142
|
+
predicate: str = "within",
|
143
|
+
output_suffix: str = "",
|
144
|
+
mapping_function: Optional[Callable] = None,
|
145
|
+
**mapping_kwargs,
|
146
|
+
) -> Dict:
|
147
|
+
"""Map point data to zones with spatial aggregation.
|
148
|
+
|
149
|
+
Aggregates point data to zones using spatial relationships. Points can be
|
150
|
+
counted or have their attribute values aggregated using various statistical methods.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
points (Union[pd.DataFrame, gpd.GeoDataFrame]): The point data to map.
|
154
|
+
Must contain geometry information if DataFrame.
|
155
|
+
value_columns (Union[str, List[str]], optional): Column name(s) containing
|
156
|
+
values to aggregate. If None, only point counts are performed.
|
157
|
+
aggregation (Union[str, Dict[str, str]]): Aggregation method(s) to use.
|
158
|
+
Can be a single string ("count", "mean", "sum", "min", "max", etc.)
|
159
|
+
or a dictionary mapping column names to aggregation methods.
|
160
|
+
predicate (str): Spatial predicate for point-to-zone relationship.
|
161
|
+
Options include "within", "intersects", "contains". Defaults to "within".
|
162
|
+
output_suffix (str): Suffix to add to output column names. Defaults to empty string.
|
163
|
+
mapping_function (Callable, optional): Custom function for mapping points to zones.
|
164
|
+
If provided, signature should be mapping_function(self, points, **mapping_kwargs).
|
165
|
+
When used, all other parameters except mapping_kwargs are ignored.
|
166
|
+
**mapping_kwargs: Additional keyword arguments passed to the mapping function.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
Dict: Dictionary with zone IDs as keys and aggregated values as values.
|
170
|
+
If value_columns is None, returns point counts per zone.
|
171
|
+
If value_columns is specified, returns aggregated values per zone.
|
172
|
+
"""
|
173
|
+
if mapping_function is not None:
|
174
|
+
return mapping_function(self, points, **mapping_kwargs)
|
175
|
+
|
176
|
+
else:
|
177
|
+
self.logger.warning(
|
178
|
+
"Using default points mapping implementation. Consider creating a specialized mapping function."
|
179
|
+
)
|
180
|
+
result = aggregate_points_to_zones(
|
181
|
+
points=points,
|
182
|
+
zones=self.zone_gdf,
|
183
|
+
value_columns=value_columns,
|
184
|
+
aggregation=aggregation,
|
185
|
+
point_zone_predicate=predicate,
|
186
|
+
zone_id_column="zone_id",
|
187
|
+
output_suffix=output_suffix,
|
188
|
+
)
|
189
|
+
|
190
|
+
if not value_columns:
|
191
|
+
return result["point_count"].to_dict()
|
192
|
+
|
193
|
+
return result[value_columns].to_dict()
|
194
|
+
|
195
|
+
def map_polygons(
|
196
|
+
self,
|
197
|
+
polygons: Union[pd.DataFrame, gpd.GeoDataFrame],
|
198
|
+
value_columns: Optional[Union[str, List[str]]] = None,
|
199
|
+
aggregation: Union[str, Dict[str, str]] = "sum",
|
200
|
+
area_weighted: bool = False,
|
201
|
+
area_column: str = "area_in_meters",
|
202
|
+
mapping_function: Optional[Callable] = None,
|
203
|
+
**mapping_kwargs,
|
204
|
+
) -> Dict:
|
205
|
+
"""Map polygon data to zones with optional area weighting.
|
206
|
+
|
207
|
+
Aggregates polygon data to zones based on spatial intersections. Values can be
|
208
|
+
weighted by the fractional area of intersection between polygons and zones.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
polygons (Union[pd.DataFrame, gpd.GeoDataFrame]): The polygon data to map.
|
212
|
+
Must contain geometry information if DataFrame.
|
213
|
+
value_columns (Union[str, List[str]], optional): Column name(s) to aggregate.
|
214
|
+
If None, only intersection areas will be calculated.
|
215
|
+
aggregation (Union[str, Dict[str, str]]): Aggregation method(s) to use.
|
216
|
+
Can be a single string ("sum", "mean", "max", "min") or a dictionary
|
217
|
+
mapping column names to specific aggregation methods. Defaults to "sum".
|
218
|
+
area_weighted (bool): Whether to weight values by fractional area of
|
219
|
+
intersection. Defaults to False.
|
220
|
+
area_column (str): Name of column to store calculated areas. Only used
|
221
|
+
if area calculation is needed. Defaults to "area_in_meters".
|
222
|
+
mapping_function (Callable, optional): Custom function for mapping polygons
|
223
|
+
to zones. If provided, signature should be mapping_function(self, polygons, **mapping_kwargs).
|
224
|
+
When used, all other parameters except mapping_kwargs are ignored.
|
225
|
+
**mapping_kwargs: Additional keyword arguments passed to the mapping function.
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
Dict: Dictionary with zone IDs as keys and aggregated values as values.
|
229
|
+
Returns aggregated values for the specified value_columns.
|
230
|
+
|
231
|
+
Raises:
|
232
|
+
TypeError: If polygons cannot be converted to a GeoDataFrame.
|
233
|
+
"""
|
234
|
+
if mapping_function is not None:
|
235
|
+
return mapping_function(self, polygons, **mapping_kwargs)
|
236
|
+
|
237
|
+
if area_column not in polygons_gdf:
|
238
|
+
if not isinstance(polygons, gpd.GeoDataFrame):
|
239
|
+
try:
|
240
|
+
polygons_gdf = convert_to_geodataframe(polygons)
|
241
|
+
except:
|
242
|
+
raise TypeError(
|
243
|
+
"polygons must be a GeoDataFrame or convertible to one"
|
244
|
+
)
|
245
|
+
else:
|
246
|
+
polygons_gdf = polygons.copy()
|
247
|
+
|
248
|
+
polygons_gdf[area_column] = polygons_gdf.to_crs(
|
249
|
+
polygons_gdf.estimate_utm_crs()
|
250
|
+
).geometry.area
|
251
|
+
|
252
|
+
if value_columns is None:
|
253
|
+
self.logger.warning(
|
254
|
+
"Using default polygon mapping implementation. Consider providing value_columns."
|
255
|
+
)
|
256
|
+
value_columns = area_column
|
257
|
+
|
258
|
+
result = aggregate_polygons_to_zones(
|
259
|
+
polygons=polygons_gdf,
|
260
|
+
zones=self.zone_gdf,
|
261
|
+
value_columns=value_columns,
|
262
|
+
aggregation=aggregation,
|
263
|
+
area_weighted=area_weighted,
|
264
|
+
zone_id_column="zone_id",
|
265
|
+
)
|
266
|
+
|
267
|
+
return result[value_columns].to_dict()
|
268
|
+
|
269
|
+
def map_rasters(
|
270
|
+
self,
|
271
|
+
tif_processors: List[TifProcessor],
|
272
|
+
mapping_function: Optional[Callable] = None,
|
273
|
+
stat: str = "mean",
|
274
|
+
**mapping_kwargs,
|
275
|
+
) -> Union[np.ndarray, Dict]:
|
276
|
+
"""Map raster data to zones using zonal statistics.
|
277
|
+
|
278
|
+
Samples raster values within each zone and computes statistics. Automatically
|
279
|
+
handles coordinate reference system transformations between raster and zone data.
|
280
|
+
|
281
|
+
Args:
|
282
|
+
tif_processors (List[TifProcessor]): List of TifProcessor objects for
|
283
|
+
accessing raster data. All processors should have the same CRS.
|
284
|
+
mapping_function (Callable, optional): Custom function for mapping rasters
|
285
|
+
to zones. If provided, signature should be mapping_function(self, tif_processors, **mapping_kwargs).
|
286
|
+
When used, stat and other parameters except mapping_kwargs are ignored.
|
287
|
+
stat (str): Statistic to calculate when aggregating raster values within
|
288
|
+
each zone. Options include "mean", "sum", "min", "max", "std", etc.
|
289
|
+
Defaults to "mean".
|
290
|
+
**mapping_kwargs: Additional keyword arguments passed to the mapping function.
|
291
|
+
|
292
|
+
Returns:
|
293
|
+
Union[np.ndarray, Dict]: By default, returns a NumPy array of sampled values
|
294
|
+
with shape (n_zones, n_rasters), taking the first non-nodata value encountered.
|
295
|
+
Custom mapping functions may return different data structures.
|
296
|
+
|
297
|
+
Note:
|
298
|
+
If the coordinate reference system of the rasters differs from the zones,
|
299
|
+
the zone geometries will be automatically transformed to match the raster CRS.
|
300
|
+
"""
|
301
|
+
if mapping_function is not None:
|
302
|
+
return mapping_function(self, tif_processors, **mapping_kwargs)
|
303
|
+
|
304
|
+
self.logger.warning(
|
305
|
+
"Using default raster mapping implementation. Consider creating a specialized mapping function."
|
306
|
+
)
|
307
|
+
|
308
|
+
raster_crs = tif_processors[0].crs
|
309
|
+
|
310
|
+
if raster_crs != self.zone_gdf.crs:
|
311
|
+
self.logger.info(f"Projecting zones to raster CRS: {raster_crs}")
|
312
|
+
zone_geoms = self._get_transformed_geometries(raster_crs)
|
313
|
+
else:
|
314
|
+
zone_geoms = self.get_zonal_geometries()
|
315
|
+
|
316
|
+
# Sample raster values
|
317
|
+
sampled_values = sample_multiple_tifs_by_polygons(
|
318
|
+
tif_processors=tif_processors, polygon_list=zone_geoms, stat=stat
|
319
|
+
)
|
320
|
+
|
321
|
+
return sampled_values
|
322
|
+
|
323
|
+
@lru_cache(maxsize=32)
|
324
|
+
def _get_transformed_geometries(self, target_crs):
|
325
|
+
"""Get zone geometries transformed to target coordinate reference system.
|
326
|
+
|
327
|
+
This method is cached to avoid repeated coordinate transformations for
|
328
|
+
the same target CRS.
|
329
|
+
|
330
|
+
Args:
|
331
|
+
target_crs: Target coordinate reference system for transformation.
|
332
|
+
|
333
|
+
Returns:
|
334
|
+
List[Polygon]: List of zone geometries transformed to the target CRS.
|
335
|
+
"""
|
336
|
+
return self.zone_gdf.to_crs(target_crs).geometry.tolist()
|
337
|
+
|
338
|
+
def save_view(
|
339
|
+
self,
|
340
|
+
view_data: gpd.GeoDataFrame,
|
341
|
+
name: str,
|
342
|
+
output_format: Optional[str] = None,
|
343
|
+
) -> Path:
|
344
|
+
"""Save the generated zonal view to disk.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
view_data (gpd.GeoDataFrame): The zonal view data to save.
|
348
|
+
name (str): Base name for the output file (without extension).
|
349
|
+
output_format (str, optional): File format to save in (e.g., "parquet",
|
350
|
+
"geojson", "shp"). If None, uses the format specified in generator_config.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
Path: The full path where the view was saved.
|
354
|
+
|
355
|
+
Note:
|
356
|
+
The output directory is determined by the generator_config.base_path setting.
|
357
|
+
The file extension is automatically added based on the output format.
|
358
|
+
"""
|
359
|
+
format_to_use = output_format or self.config.output_format
|
360
|
+
output_path = self.config.base_path / f"{name}.{format_to_use}"
|
361
|
+
|
362
|
+
self.logger.info(f"Saving zonal view to {output_path}")
|
363
|
+
write_dataset(
|
364
|
+
df=view_data,
|
365
|
+
path=str(output_path),
|
366
|
+
data_store=self.data_store,
|
367
|
+
format=format_to_use,
|
368
|
+
)
|
369
|
+
|
370
|
+
return output_path
|