giga-spatial 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/METADATA +2 -1
- giga_spatial-0.6.5.dist-info/RECORD +50 -0
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +35 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/database.py +316 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +228 -43
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +221 -64
- gigaspatial/generators/zonal/geometry.py +74 -31
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/__init__.py +5 -1
- gigaspatial/handlers/boundaries.py +226 -48
- gigaspatial/handlers/ghsl.py +79 -14
- gigaspatial/handlers/giga.py +641 -0
- gigaspatial/handlers/hdx.py +50 -51
- gigaspatial/handlers/maxar_image.py +1 -2
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +87 -25
- gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.3.dist-info/RECORD +0 -47
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/top_level.txt +0 -0
@@ -77,6 +77,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
77
77
|
self.config = config or ZonalViewGeneratorConfig()
|
78
78
|
self.data_store = data_store or LocalDataStore()
|
79
79
|
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
80
|
+
self._view: Optional[pd.DataFrame] = None
|
80
81
|
|
81
82
|
@abstractmethod
|
82
83
|
def get_zonal_geometries(self) -> List[Polygon]:
|
@@ -103,7 +104,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
103
104
|
"""
|
104
105
|
pass
|
105
106
|
|
106
|
-
def
|
107
|
+
def get_zone_geodataframe(self) -> gpd.GeoDataFrame:
|
107
108
|
"""Convert zones to a GeoDataFrame.
|
108
109
|
|
109
110
|
Creates a GeoDataFrame containing zone identifiers and their corresponding
|
@@ -131,9 +132,77 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
131
132
|
and identifiers.
|
132
133
|
"""
|
133
134
|
if not hasattr(self, "_zone_gdf"):
|
134
|
-
self._zone_gdf = self.
|
135
|
+
self._zone_gdf = self.get_zone_geodataframe()
|
135
136
|
return self._zone_gdf
|
136
137
|
|
138
|
+
@property
|
139
|
+
def view(self) -> pd.DataFrame:
|
140
|
+
"""The DataFrame representing the current zonal view.
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
pd.DataFrame: The DataFrame containing zone IDs, and
|
144
|
+
any added variables. If no variables have been added,
|
145
|
+
it returns the base `zone_gdf` without geometries.
|
146
|
+
"""
|
147
|
+
if self._view is None:
|
148
|
+
self._view = self.zone_gdf.drop(columns="geometry")
|
149
|
+
return self._view
|
150
|
+
|
151
|
+
def add_variable_to_view(self, data_dict: Dict, column_name: str) -> None:
|
152
|
+
"""
|
153
|
+
Adds a new variable (column) to the zonal view GeoDataFrame.
|
154
|
+
|
155
|
+
This method takes a dictionary (typically the result of map_points or map_polygons)
|
156
|
+
and adds its values as a new column to the internal `_view` (or `zone_gdf` if not yet initialized).
|
157
|
+
The dictionary keys are expected to be the `zone_id` values.
|
158
|
+
|
159
|
+
Args:
|
160
|
+
data_dict (Dict): A dictionary where keys are `zone_id`s and values are
|
161
|
+
the data to be added.
|
162
|
+
column_name (str): The name of the new column to be added to the GeoDataFrame.
|
163
|
+
Raises:
|
164
|
+
ValueError: If the `data_dict` keys do not match the `zone_id`s in the zonal view.
|
165
|
+
If the `column_name` already exists in the zonal view.
|
166
|
+
"""
|
167
|
+
if self._view is None:
|
168
|
+
self._view = self.zone_gdf.drop(columns="geometry")
|
169
|
+
|
170
|
+
if column_name in self._view.columns:
|
171
|
+
raise ValueError(
|
172
|
+
f"Column '{column_name}' already exists in the zonal view."
|
173
|
+
)
|
174
|
+
|
175
|
+
# Create a pandas Series from the dictionary, aligning by index (zone_id)
|
176
|
+
new_series = pd.Series(data_dict, name=column_name)
|
177
|
+
|
178
|
+
# Before merging, ensure the zone_ids in data_dict match those in _view
|
179
|
+
missing_zones_in_data = set(self._view["zone_id"]) - set(new_series.index)
|
180
|
+
extra_zones_in_data = set(new_series.index) - set(self._view["zone_id"])
|
181
|
+
|
182
|
+
if missing_zones_in_data:
|
183
|
+
self.logger.warning(
|
184
|
+
f"Warning: {len(missing_zones_in_data)} zone(s) from the zonal view "
|
185
|
+
f"are missing in the provided data_dict for column '{column_name}'. "
|
186
|
+
f"These zones will have NaN values for '{column_name}'. Missing: {list(missing_zones_in_data)[:5]}..."
|
187
|
+
)
|
188
|
+
if extra_zones_in_data:
|
189
|
+
self.logger.warning(
|
190
|
+
f"Warning: {len(extra_zones_in_data)} zone(s) in the provided data_dict "
|
191
|
+
f"are not present in the zonal view for column '{column_name}'. "
|
192
|
+
f"These will be ignored. Extra: {list(extra_zones_in_data)[:5]}..."
|
193
|
+
)
|
194
|
+
|
195
|
+
# Merge the new series with the _view based on 'zone_id'
|
196
|
+
# Using .set_index() for efficient alignment
|
197
|
+
original_index_name = self._view.index.name
|
198
|
+
self._view = self._view.set_index("zone_id").join(new_series).reset_index()
|
199
|
+
if original_index_name: # Restore original index name if it existed
|
200
|
+
self._view.index.name = original_index_name
|
201
|
+
else: # If it was a default integer index, ensure it's not named 'index'
|
202
|
+
self._view.index.name = None
|
203
|
+
|
204
|
+
self.logger.info(f"Added variable '{column_name}' to the zonal view.")
|
205
|
+
|
137
206
|
def map_points(
|
138
207
|
self,
|
139
208
|
points: Union[pd.DataFrame, gpd.GeoDataFrame],
|
@@ -187,84 +256,129 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
187
256
|
output_suffix=output_suffix,
|
188
257
|
)
|
189
258
|
|
190
|
-
if
|
191
|
-
return result
|
192
|
-
|
193
|
-
|
259
|
+
if isinstance(value_columns, str):
|
260
|
+
return result.set_index("zone_id")[value_columns].to_dict()
|
261
|
+
elif isinstance(value_columns, list):
|
262
|
+
# If multiple value columns, return a dictionary of dictionaries
|
263
|
+
# Or, if preferred, a dictionary where values are lists/tuples of results
|
264
|
+
# For now, let's return a dict of series, which is common.
|
265
|
+
# The previous version implied a single dictionary result from map_points/polygons
|
266
|
+
# but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
|
267
|
+
# or {col_name: {zone_id: val}}
|
268
|
+
# In this version, it'll return a dictionary for each column.
|
269
|
+
return {
|
270
|
+
col: result.set_index("zone_id")[col].to_dict()
|
271
|
+
for col in value_columns
|
272
|
+
}
|
273
|
+
else: # If value_columns is None, it should return point_count
|
274
|
+
return result.set_index("zone_id")["point_count"].to_dict()
|
194
275
|
|
195
276
|
def map_polygons(
|
196
277
|
self,
|
197
|
-
polygons
|
278
|
+
polygons,
|
198
279
|
value_columns: Optional[Union[str, List[str]]] = None,
|
199
|
-
aggregation: Union[str, Dict[str, str]] = "
|
200
|
-
|
201
|
-
|
202
|
-
mapping_function: Optional[Callable] = None,
|
203
|
-
**mapping_kwargs,
|
280
|
+
aggregation: Union[str, Dict[str, str]] = "count",
|
281
|
+
predicate: str = "intersects",
|
282
|
+
**kwargs,
|
204
283
|
) -> Dict:
|
205
|
-
"""
|
284
|
+
"""
|
285
|
+
Maps polygon data to the instance's zones and aggregates values.
|
206
286
|
|
207
|
-
|
208
|
-
|
287
|
+
This method leverages `aggregate_polygons_to_zones` to perform a spatial
|
288
|
+
aggregation of polygon data onto the zones stored within this object instance.
|
289
|
+
It can count polygons, or aggregate their values, based on different spatial
|
290
|
+
relationships defined by the `predicate`.
|
209
291
|
|
210
292
|
Args:
|
211
|
-
polygons (Union[pd.DataFrame, gpd.GeoDataFrame]):
|
212
|
-
Must contain geometry information if
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
293
|
+
polygons (Union[pd.DataFrame, gpd.GeoDataFrame]):
|
294
|
+
The polygon data to map. Must contain geometry information if a
|
295
|
+
DataFrame.
|
296
|
+
value_columns (Union[str, List[str]], optional):
|
297
|
+
The column name(s) from the `polygons` data to aggregate. If `None`,
|
298
|
+
the method will automatically count the number of polygons that
|
299
|
+
match the given `predicate` for each zone.
|
300
|
+
aggregation (Union[str, Dict[str, str]], optional):
|
301
|
+
The aggregation method(s) to use. Can be a single string (e.g., "sum",
|
302
|
+
"mean", "max") or a dictionary mapping column names to specific
|
303
|
+
aggregation methods. This is ignored and set to "count" if
|
304
|
+
`value_columns` is `None`. Defaults to "count".
|
305
|
+
predicate (Literal["intersects", "within", "fractional"], optional):
|
306
|
+
The spatial relationship to use for aggregation:
|
307
|
+
- "intersects": Counts or aggregates values for any polygon that
|
308
|
+
intersects a zone.
|
309
|
+
- "within": Counts or aggregates values for polygons that are
|
310
|
+
entirely contained within a zone.
|
311
|
+
- "fractional": Performs area-weighted aggregation. The value of a
|
312
|
+
polygon is distributed proportionally to the area of its overlap
|
313
|
+
with each zone.
|
314
|
+
Defaults to "intersects".
|
315
|
+
**kwargs:
|
316
|
+
Additional keyword arguments to be passed to the underlying
|
317
|
+
`aggregate_polygons_to_zones_new` function.
|
226
318
|
|
227
319
|
Returns:
|
228
|
-
Dict:
|
229
|
-
|
320
|
+
Dict:
|
321
|
+
A dictionary or a nested dictionary containing the aggregated values,
|
322
|
+
with zone IDs as keys. If `value_columns` is a single string, the
|
323
|
+
return value is a dictionary mapping zone ID to the aggregated value.
|
324
|
+
If `value_columns` is a list, the return value is a nested dictionary
|
325
|
+
mapping each column name to its own dictionary of aggregated values.
|
230
326
|
|
231
327
|
Raises:
|
232
|
-
|
328
|
+
ValueError: If `value_columns` is of an unexpected type after processing.
|
329
|
+
|
330
|
+
Example:
|
331
|
+
>>> # Assuming 'self' is an object with a 'zone_gdf' attribute
|
332
|
+
>>> # Count all land parcels that intersect each zone
|
333
|
+
>>> parcel_counts = self.map_polygons(landuse_polygons)
|
334
|
+
>>>
|
335
|
+
>>> # Aggregate total population within zones using area weighting
|
336
|
+
>>> population_by_zone = self.map_polygons(
|
337
|
+
... landuse_polygons,
|
338
|
+
... value_columns="population",
|
339
|
+
... predicate="fractional",
|
340
|
+
... aggregation="sum"
|
341
|
+
... )
|
342
|
+
>>>
|
343
|
+
>>> # Get the sum of residential area and count of buildings within each zone
|
344
|
+
>>> residential_stats = self.map_polygons(
|
345
|
+
... building_polygons,
|
346
|
+
... value_columns=["residential_area_sqm", "building_id"],
|
347
|
+
... aggregation={"residential_area_sqm": "sum", "building_id": "count"},
|
348
|
+
... predicate="intersects"
|
349
|
+
... )
|
233
350
|
"""
|
234
|
-
if mapping_function is not None:
|
235
|
-
return mapping_function(self, polygons, **mapping_kwargs)
|
236
|
-
|
237
|
-
if area_column not in polygons_gdf:
|
238
|
-
if not isinstance(polygons, gpd.GeoDataFrame):
|
239
|
-
try:
|
240
|
-
polygons_gdf = convert_to_geodataframe(polygons)
|
241
|
-
except:
|
242
|
-
raise TypeError(
|
243
|
-
"polygons must be a GeoDataFrame or convertible to one"
|
244
|
-
)
|
245
|
-
else:
|
246
|
-
polygons_gdf = polygons.copy()
|
247
|
-
|
248
|
-
polygons_gdf[area_column] = polygons_gdf.to_crs(
|
249
|
-
polygons_gdf.estimate_utm_crs()
|
250
|
-
).geometry.area
|
251
351
|
|
252
352
|
if value_columns is None:
|
253
353
|
self.logger.warning(
|
254
|
-
"
|
354
|
+
f"No value_columns specified. Defaulting to counting polygons with {predicate} predicate."
|
255
355
|
)
|
256
|
-
|
356
|
+
temp_value_col = "_temp_polygon_count_dummy"
|
357
|
+
polygons[temp_value_col] = 1
|
358
|
+
actual_value_columns = temp_value_col
|
359
|
+
aggregation = "count" # Force count if no value columns
|
360
|
+
else:
|
361
|
+
actual_value_columns = value_columns
|
257
362
|
|
258
363
|
result = aggregate_polygons_to_zones(
|
259
|
-
polygons=
|
364
|
+
polygons=polygons,
|
260
365
|
zones=self.zone_gdf,
|
261
|
-
value_columns=
|
366
|
+
value_columns=actual_value_columns,
|
262
367
|
aggregation=aggregation,
|
263
|
-
|
368
|
+
predicate=predicate,
|
264
369
|
zone_id_column="zone_id",
|
265
370
|
)
|
266
371
|
|
267
|
-
|
372
|
+
# Convert the result GeoDataFrame to the expected dictionary format
|
373
|
+
if isinstance(actual_value_columns, str):
|
374
|
+
return result.set_index("zone_id")[actual_value_columns].to_dict()
|
375
|
+
elif isinstance(actual_value_columns, list):
|
376
|
+
return {
|
377
|
+
col: result.set_index("zone_id")[col].to_dict()
|
378
|
+
for col in actual_value_columns
|
379
|
+
}
|
380
|
+
else:
|
381
|
+
raise ValueError("Unexpected type for actual_value_columns.")
|
268
382
|
|
269
383
|
def map_rasters(
|
270
384
|
self,
|
@@ -291,7 +405,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
291
405
|
|
292
406
|
Returns:
|
293
407
|
Union[np.ndarray, Dict]: By default, returns a NumPy array of sampled values
|
294
|
-
with shape (n_zones,
|
408
|
+
with shape (n_zones, 1), taking the first non-nodata value encountered.
|
295
409
|
Custom mapping functions may return different data structures.
|
296
410
|
|
297
411
|
Note:
|
@@ -318,7 +432,9 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
318
432
|
tif_processors=tif_processors, polygon_list=zone_geoms, stat=stat
|
319
433
|
)
|
320
434
|
|
321
|
-
|
435
|
+
zone_ids = self.get_zone_identifiers()
|
436
|
+
|
437
|
+
return {zone_id: value for zone_id, value in zip(zone_ids, sampled_values)}
|
322
438
|
|
323
439
|
@lru_cache(maxsize=32)
|
324
440
|
def _get_transformed_geometries(self, target_crs):
|
@@ -337,34 +453,75 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
337
453
|
|
338
454
|
def save_view(
|
339
455
|
self,
|
340
|
-
view_data: gpd.GeoDataFrame,
|
341
456
|
name: str,
|
342
457
|
output_format: Optional[str] = None,
|
343
458
|
) -> Path:
|
344
459
|
"""Save the generated zonal view to disk.
|
345
460
|
|
346
461
|
Args:
|
347
|
-
view_data (gpd.GeoDataFrame): The zonal view data to save.
|
348
462
|
name (str): Base name for the output file (without extension).
|
349
463
|
output_format (str, optional): File format to save in (e.g., "parquet",
|
350
|
-
"geojson", "shp"). If None, uses the format specified in
|
464
|
+
"geojson", "shp"). If None, uses the format specified in config.
|
351
465
|
|
352
466
|
Returns:
|
353
467
|
Path: The full path where the view was saved.
|
354
468
|
|
355
469
|
Note:
|
356
|
-
The output directory is determined by the
|
470
|
+
The output directory is determined by the config.base_path setting.
|
357
471
|
The file extension is automatically added based on the output format.
|
472
|
+
This method now saves the internal `self.view`.
|
358
473
|
"""
|
474
|
+
if self._view is None:
|
475
|
+
self.logger.warning(
|
476
|
+
"No variables have been added to the zonal view. Saving the base zone_gdf."
|
477
|
+
)
|
478
|
+
view_to_save = self.zone_gdf
|
479
|
+
else:
|
480
|
+
view_to_save = self._view
|
481
|
+
|
359
482
|
format_to_use = output_format or self.config.output_format
|
360
483
|
output_path = self.config.base_path / f"{name}.{format_to_use}"
|
361
484
|
|
362
485
|
self.logger.info(f"Saving zonal view to {output_path}")
|
486
|
+
|
487
|
+
if format_to_use in ["geojson", "shp", "gpkg"]:
|
488
|
+
self.logger.warning(
|
489
|
+
f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
|
490
|
+
)
|
491
|
+
# Re-add geometry for saving to geospatial formats
|
492
|
+
view_to_save = self.view.merge(
|
493
|
+
self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
|
494
|
+
)
|
495
|
+
|
363
496
|
write_dataset(
|
364
|
-
|
497
|
+
data=view_to_save,
|
365
498
|
path=str(output_path),
|
366
499
|
data_store=self.data_store,
|
367
|
-
format=format_to_use,
|
368
500
|
)
|
369
501
|
|
370
502
|
return output_path
|
503
|
+
|
504
|
+
def to_dataframe(self) -> pd.DataFrame:
|
505
|
+
"""
|
506
|
+
Returns the current zonal view as a DataFrame.
|
507
|
+
|
508
|
+
This method combines all accumulated variables in the view
|
509
|
+
|
510
|
+
Returns:
|
511
|
+
pd.DataFrame: The current view.
|
512
|
+
"""
|
513
|
+
return self.view
|
514
|
+
|
515
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
516
|
+
"""
|
517
|
+
Returns the current zonal view merged with zone geometries as a GeoDataFrame.
|
518
|
+
|
519
|
+
This method combines all accumulated variables in the view with the corresponding
|
520
|
+
zone geometries, providing a spatially-enabled DataFrame for further analysis or export.
|
521
|
+
|
522
|
+
Returns:
|
523
|
+
gpd.GeoDataFrame: The current view merged with zone geometries.
|
524
|
+
"""
|
525
|
+
return self.view.merge(
|
526
|
+
self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
|
527
|
+
)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, List, Optional, Union
|
1
|
+
from typing import Dict, List, Optional, Union, Literal
|
2
2
|
from shapely.geometry import Polygon, MultiPolygon
|
3
3
|
|
4
4
|
import geopandas as gpd
|
@@ -136,9 +136,9 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
136
136
|
List[T]: A list of zone identifiers in the order they appear in the
|
137
137
|
underlying GeoDataFrame.
|
138
138
|
"""
|
139
|
-
return self._zone_gdf
|
139
|
+
return self._zone_gdf.zone_id.tolist()
|
140
140
|
|
141
|
-
def
|
141
|
+
def get_zone_geodataframe(self) -> gpd.GeoDataFrame:
|
142
142
|
"""Convert zones to a GeoDataFrame with standardized column names.
|
143
143
|
|
144
144
|
Returns:
|
@@ -158,7 +158,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
158
158
|
stat: str = "sum",
|
159
159
|
name_prefix: str = "built_surface_m2_",
|
160
160
|
**kwargs,
|
161
|
-
) ->
|
161
|
+
) -> pd.DataFrame:
|
162
162
|
"""Map GHSL Built-up Surface data to zones.
|
163
163
|
|
164
164
|
Convenience method for mapping Global Human Settlement Layer Built-up Surface
|
@@ -172,7 +172,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
172
172
|
name_prefix (str): Prefix for the output column name. Defaults to "built_surface_m2_".
|
173
173
|
|
174
174
|
Returns:
|
175
|
-
|
175
|
+
pd.DataFrame: Updated GeoDataFrame with zones and built surface metrics.
|
176
176
|
Adds a column named "{name_prefix}{stat}" containing the aggregated values.
|
177
177
|
"""
|
178
178
|
handler = GHSLDataHandler(
|
@@ -190,11 +190,11 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
190
190
|
def map_smod(
|
191
191
|
self,
|
192
192
|
year=2020,
|
193
|
-
resolution=
|
193
|
+
resolution=1000,
|
194
194
|
stat: str = "median",
|
195
195
|
name_prefix: str = "smod_class_",
|
196
196
|
**kwargs,
|
197
|
-
) ->
|
197
|
+
) -> pd.DataFrame:
|
198
198
|
"""Map GHSL Settlement Model data to zones.
|
199
199
|
|
200
200
|
Convenience method for mapping Global Human Settlement Layer Settlement Model
|
@@ -208,7 +208,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
208
208
|
name_prefix (str): Prefix for the output column name. Defaults to "smod_class_".
|
209
209
|
|
210
210
|
Returns:
|
211
|
-
|
211
|
+
pd.DataFrame: Updated DataFrame with zones and settlement classification.
|
212
212
|
Adds a column named "{name_prefix}{stat}" containing the aggregated values.
|
213
213
|
"""
|
214
214
|
handler = GHSLDataHandler(
|
@@ -230,7 +230,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
230
230
|
stat: str,
|
231
231
|
name_prefix: Optional[str] = None,
|
232
232
|
**kwargs,
|
233
|
-
) ->
|
233
|
+
) -> pd.DataFrame:
|
234
234
|
"""Map Global Human Settlement Layer data to zones.
|
235
235
|
|
236
236
|
Loads and processes GHSL raster data for the intersecting tiles, then samples
|
@@ -245,7 +245,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
245
245
|
If None, uses the GHSL product name in lowercase followed by underscore.
|
246
246
|
|
247
247
|
Returns:
|
248
|
-
|
248
|
+
pd.DataFrame: Updated DataFrame with GHSL metrics.
|
249
249
|
Adds a column named "{name_prefix}{stat}" containing the sampled values.
|
250
250
|
|
251
251
|
Note:
|
@@ -269,17 +269,15 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
269
269
|
name_prefix if name_prefix else handler.config.product.lower() + "_"
|
270
270
|
)
|
271
271
|
column_name = f"{name_prefix}{stat}"
|
272
|
-
self.
|
272
|
+
self.add_variable_to_view(sampled_values, column_name)
|
273
273
|
|
274
|
-
self.
|
275
|
-
|
276
|
-
return self._zone_gdf.copy()
|
274
|
+
return self.view
|
277
275
|
|
278
276
|
def map_google_buildings(
|
279
277
|
self,
|
280
278
|
handler: Optional[GoogleOpenBuildingsHandler] = None,
|
281
279
|
use_polygons: bool = False,
|
282
|
-
) ->
|
280
|
+
) -> pd.DataFrame:
|
283
281
|
"""Map Google Open Buildings data to zones.
|
284
282
|
|
285
283
|
Processes Google Open Buildings dataset to calculate building counts and total
|
@@ -295,7 +293,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
295
293
|
area values from attributes for faster processing. Defaults to False.
|
296
294
|
|
297
295
|
Returns:
|
298
|
-
|
296
|
+
pd.DataFrame: Updated DataFrame with building metrics.
|
299
297
|
Adds columns:
|
300
298
|
- 'google_buildings_count': Number of buildings in each zone
|
301
299
|
- 'google_buildings_area_in_meters': Total building area in square meters
|
@@ -341,19 +339,20 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
341
339
|
self.logger.info(
|
342
340
|
"Calculating building areas with area-weighted aggregation"
|
343
341
|
)
|
344
|
-
area_result = self.map_polygons(
|
342
|
+
area_result = self.map_polygons(
|
343
|
+
buildings_gdf,
|
344
|
+
value_columns="area_in_meters",
|
345
|
+
aggregation="sum",
|
346
|
+
predicate="fractional",
|
347
|
+
)
|
345
348
|
|
346
349
|
self.logger.info("Counting buildings using points data")
|
347
350
|
count_result = self.map_points(points=buildings_df, predicate="within")
|
348
351
|
|
349
|
-
self.
|
350
|
-
self.
|
351
|
-
area_result
|
352
|
-
)
|
353
|
-
|
354
|
-
self.logger.info(f"Added Google building data")
|
352
|
+
self.add_variable_to_view(count_result, "google_buildings_count")
|
353
|
+
self.add_variable_to_view(area_result, "google_buildings_area_in_meters")
|
355
354
|
|
356
|
-
return self.
|
355
|
+
return self.view
|
357
356
|
|
358
357
|
def map_ms_buildings(
|
359
358
|
self,
|
@@ -400,7 +399,9 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
400
399
|
)
|
401
400
|
return self._zone_gdf.copy()
|
402
401
|
|
403
|
-
buildings_gdf = add_area_in_meters(
|
402
|
+
buildings_gdf = add_area_in_meters(
|
403
|
+
buildings_gdf, area_column_name="area_in_meters"
|
404
|
+
)
|
404
405
|
|
405
406
|
building_centroids = get_centroids(buildings_gdf)
|
406
407
|
|
@@ -421,7 +422,12 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
421
422
|
self.logger.info(
|
422
423
|
"Calculating building areas with area-weighted aggregation"
|
423
424
|
)
|
424
|
-
area_result = self.map_polygons(
|
425
|
+
area_result = self.map_polygons(
|
426
|
+
buildings_gdf,
|
427
|
+
value_columns="area_in_meters",
|
428
|
+
aggregation="sum",
|
429
|
+
predicate="fractional",
|
430
|
+
)
|
425
431
|
|
426
432
|
self.logger.info("Counting Microsoft buildings per zone")
|
427
433
|
|
@@ -429,11 +435,48 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
429
435
|
points=building_centroids, predicate="within"
|
430
436
|
)
|
431
437
|
|
432
|
-
self.
|
433
|
-
self.
|
434
|
-
|
438
|
+
self.add_variable_to_view(count_result, "ms_buildings_count")
|
439
|
+
self.add_variable_to_view(area_result, "ms_buildings_area_in_meters")
|
440
|
+
|
441
|
+
return self.view
|
442
|
+
|
443
|
+
def map_ghsl_pop(
|
444
|
+
self,
|
445
|
+
year=2020,
|
446
|
+
resolution=100,
|
447
|
+
stat: str = "sum",
|
448
|
+
name_prefix: str = "ghsl_pop_",
|
449
|
+
predicate: Literal["intersects", "fractional"] = "intersects",
|
450
|
+
**kwargs,
|
451
|
+
):
|
452
|
+
handler = GHSLDataHandler(
|
453
|
+
product="GHS_POP",
|
454
|
+
year=year,
|
455
|
+
resolution=resolution,
|
456
|
+
data_store=self.data_store,
|
457
|
+
**kwargs,
|
435
458
|
)
|
436
459
|
|
437
|
-
|
460
|
+
if predicate == "fractional":
|
461
|
+
if resolution == 100:
|
462
|
+
self.logger.warning(
|
463
|
+
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
464
|
+
)
|
465
|
+
predicate = "intersects"
|
466
|
+
else:
|
467
|
+
gdf_pop = handler.load_into_geodataframe()
|
468
|
+
|
469
|
+
result = self.map_polygons(
|
470
|
+
gdf_pop,
|
471
|
+
value_columns="pixel_value",
|
472
|
+
aggregation="sum",
|
473
|
+
predicate="fractional",
|
474
|
+
)
|
475
|
+
|
476
|
+
column_name = f"{name_prefix}{stat}"
|
477
|
+
self.add_variable_to_view(result, column_name)
|
478
|
+
return self.view
|
438
479
|
|
439
|
-
return self.
|
480
|
+
return self.map_ghsl(
|
481
|
+
handler=handler, stat=stat, name_prefix=name_prefix, **kwargs
|
482
|
+
)
|