giga-spatial 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,6 +77,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
77
77
  self.config = config or ZonalViewGeneratorConfig()
78
78
  self.data_store = data_store or LocalDataStore()
79
79
  self.logger = logger or global_config.get_logger(self.__class__.__name__)
80
+ self._view: Optional[pd.DataFrame] = None
80
81
 
81
82
  @abstractmethod
82
83
  def get_zonal_geometries(self) -> List[Polygon]:
@@ -103,7 +104,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
103
104
  """
104
105
  pass
105
106
 
106
- def to_geodataframe(self) -> gpd.GeoDataFrame:
107
+ def get_zone_geodataframe(self) -> gpd.GeoDataFrame:
107
108
  """Convert zones to a GeoDataFrame.
108
109
 
109
110
  Creates a GeoDataFrame containing zone identifiers and their corresponding
@@ -131,9 +132,77 @@ class ZonalViewGenerator(ABC, Generic[T]):
131
132
  and identifiers.
132
133
  """
133
134
  if not hasattr(self, "_zone_gdf"):
134
- self._zone_gdf = self.to_geodataframe()
135
+ self._zone_gdf = self.get_zone_geodataframe()
135
136
  return self._zone_gdf
136
137
 
138
+ @property
139
+ def view(self) -> pd.DataFrame:
140
+ """The DataFrame representing the current zonal view.
141
+
142
+ Returns:
143
+ pd.DataFrame: The DataFrame containing zone IDs, and
144
+ any added variables. If no variables have been added,
145
+ it returns the base `zone_gdf` without geometries.
146
+ """
147
+ if self._view is None:
148
+ self._view = self.zone_gdf.drop(columns="geometry")
149
+ return self._view
150
+
151
+ def add_variable_to_view(self, data_dict: Dict, column_name: str) -> None:
152
+ """
153
+ Adds a new variable (column) to the zonal view GeoDataFrame.
154
+
155
+ This method takes a dictionary (typically the result of map_points or map_polygons)
156
+ and adds its values as a new column to the internal `_view` (or `zone_gdf` if not yet initialized).
157
+ The dictionary keys are expected to be the `zone_id` values.
158
+
159
+ Args:
160
+ data_dict (Dict): A dictionary where keys are `zone_id`s and values are
161
+ the data to be added.
162
+ column_name (str): The name of the new column to be added to the GeoDataFrame.
163
+ Raises:
164
+ ValueError: If the `data_dict` keys do not match the `zone_id`s in the zonal view.
165
+ If the `column_name` already exists in the zonal view.
166
+ """
167
+ if self._view is None:
168
+ self._view = self.zone_gdf.drop(columns="geometry")
169
+
170
+ if column_name in self._view.columns:
171
+ raise ValueError(
172
+ f"Column '{column_name}' already exists in the zonal view."
173
+ )
174
+
175
+ # Create a pandas Series from the dictionary, aligning by index (zone_id)
176
+ new_series = pd.Series(data_dict, name=column_name)
177
+
178
+ # Before merging, ensure the zone_ids in data_dict match those in _view
179
+ missing_zones_in_data = set(self._view["zone_id"]) - set(new_series.index)
180
+ extra_zones_in_data = set(new_series.index) - set(self._view["zone_id"])
181
+
182
+ if missing_zones_in_data:
183
+ self.logger.warning(
184
+ f"Warning: {len(missing_zones_in_data)} zone(s) from the zonal view "
185
+ f"are missing in the provided data_dict for column '{column_name}'. "
186
+ f"These zones will have NaN values for '{column_name}'. Missing: {list(missing_zones_in_data)[:5]}..."
187
+ )
188
+ if extra_zones_in_data:
189
+ self.logger.warning(
190
+ f"Warning: {len(extra_zones_in_data)} zone(s) in the provided data_dict "
191
+ f"are not present in the zonal view for column '{column_name}'. "
192
+ f"These will be ignored. Extra: {list(extra_zones_in_data)[:5]}..."
193
+ )
194
+
195
+ # Merge the new series with the _view based on 'zone_id'
196
+ # Using .set_index() for efficient alignment
197
+ original_index_name = self._view.index.name
198
+ self._view = self._view.set_index("zone_id").join(new_series).reset_index()
199
+ if original_index_name: # Restore original index name if it existed
200
+ self._view.index.name = original_index_name
201
+ else: # If it was a default integer index, ensure it's not named 'index'
202
+ self._view.index.name = None
203
+
204
+ self.logger.info(f"Added variable '{column_name}' to the zonal view.")
205
+
137
206
  def map_points(
138
207
  self,
139
208
  points: Union[pd.DataFrame, gpd.GeoDataFrame],
@@ -187,84 +256,129 @@ class ZonalViewGenerator(ABC, Generic[T]):
187
256
  output_suffix=output_suffix,
188
257
  )
189
258
 
190
- if not value_columns:
191
- return result["point_count"].to_dict()
192
-
193
- return result[value_columns].to_dict()
259
+ if isinstance(value_columns, str):
260
+ return result.set_index("zone_id")[value_columns].to_dict()
261
+ elif isinstance(value_columns, list):
262
+ # If multiple value columns, return a dictionary of dictionaries
263
+ # Or, if preferred, a dictionary where values are lists/tuples of results
264
+ # For now, let's return a dict of series, which is common.
265
+ # The previous version implied a single dictionary result from map_points/polygons
266
+ # but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
267
+ # or {col_name: {zone_id: val}}
268
+ # In this version, it'll return a dictionary for each column.
269
+ return {
270
+ col: result.set_index("zone_id")[col].to_dict()
271
+ for col in value_columns
272
+ }
273
+ else: # If value_columns is None, it should return point_count
274
+ return result.set_index("zone_id")["point_count"].to_dict()
194
275
 
195
276
  def map_polygons(
196
277
  self,
197
- polygons: Union[pd.DataFrame, gpd.GeoDataFrame],
278
+ polygons,
198
279
  value_columns: Optional[Union[str, List[str]]] = None,
199
- aggregation: Union[str, Dict[str, str]] = "sum",
200
- area_weighted: bool = False,
201
- area_column: str = "area_in_meters",
202
- mapping_function: Optional[Callable] = None,
203
- **mapping_kwargs,
280
+ aggregation: Union[str, Dict[str, str]] = "count",
281
+ predicate: str = "intersects",
282
+ **kwargs,
204
283
  ) -> Dict:
205
- """Map polygon data to zones with optional area weighting.
284
+ """
285
+ Maps polygon data to the instance's zones and aggregates values.
206
286
 
207
- Aggregates polygon data to zones based on spatial intersections. Values can be
208
- weighted by the fractional area of intersection between polygons and zones.
287
+ This method leverages `aggregate_polygons_to_zones` to perform a spatial
288
+ aggregation of polygon data onto the zones stored within this object instance.
289
+ It can count polygons, or aggregate their values, based on different spatial
290
+ relationships defined by the `predicate`.
209
291
 
210
292
  Args:
211
- polygons (Union[pd.DataFrame, gpd.GeoDataFrame]): The polygon data to map.
212
- Must contain geometry information if DataFrame.
213
- value_columns (Union[str, List[str]], optional): Column name(s) to aggregate.
214
- If None, only intersection areas will be calculated.
215
- aggregation (Union[str, Dict[str, str]]): Aggregation method(s) to use.
216
- Can be a single string ("sum", "mean", "max", "min") or a dictionary
217
- mapping column names to specific aggregation methods. Defaults to "sum".
218
- area_weighted (bool): Whether to weight values by fractional area of
219
- intersection. Defaults to False.
220
- area_column (str): Name of column to store calculated areas. Only used
221
- if area calculation is needed. Defaults to "area_in_meters".
222
- mapping_function (Callable, optional): Custom function for mapping polygons
223
- to zones. If provided, signature should be mapping_function(self, polygons, **mapping_kwargs).
224
- When used, all other parameters except mapping_kwargs are ignored.
225
- **mapping_kwargs: Additional keyword arguments passed to the mapping function.
293
+ polygons (Union[pd.DataFrame, gpd.GeoDataFrame]):
294
+ The polygon data to map. Must contain geometry information if a
295
+ DataFrame.
296
+ value_columns (Union[str, List[str]], optional):
297
+ The column name(s) from the `polygons` data to aggregate. If `None`,
298
+ the method will automatically count the number of polygons that
299
+ match the given `predicate` for each zone.
300
+ aggregation (Union[str, Dict[str, str]], optional):
301
+ The aggregation method(s) to use. Can be a single string (e.g., "sum",
302
+ "mean", "max") or a dictionary mapping column names to specific
303
+ aggregation methods. This is ignored and set to "count" if
304
+ `value_columns` is `None`. Defaults to "count".
305
+ predicate (Literal["intersects", "within", "fractional"], optional):
306
+ The spatial relationship to use for aggregation:
307
+ - "intersects": Counts or aggregates values for any polygon that
308
+ intersects a zone.
309
+ - "within": Counts or aggregates values for polygons that are
310
+ entirely contained within a zone.
311
+ - "fractional": Performs area-weighted aggregation. The value of a
312
+ polygon is distributed proportionally to the area of its overlap
313
+ with each zone.
314
+ Defaults to "intersects".
315
+ **kwargs:
316
+ Additional keyword arguments to be passed to the underlying
317
+ `aggregate_polygons_to_zones_new` function.
226
318
 
227
319
  Returns:
228
- Dict: Dictionary with zone IDs as keys and aggregated values as values.
229
- Returns aggregated values for the specified value_columns.
320
+ Dict:
321
+ A dictionary or a nested dictionary containing the aggregated values,
322
+ with zone IDs as keys. If `value_columns` is a single string, the
323
+ return value is a dictionary mapping zone ID to the aggregated value.
324
+ If `value_columns` is a list, the return value is a nested dictionary
325
+ mapping each column name to its own dictionary of aggregated values.
230
326
 
231
327
  Raises:
232
- TypeError: If polygons cannot be converted to a GeoDataFrame.
328
+ ValueError: If `value_columns` is of an unexpected type after processing.
329
+
330
+ Example:
331
+ >>> # Assuming 'self' is an object with a 'zone_gdf' attribute
332
+ >>> # Count all land parcels that intersect each zone
333
+ >>> parcel_counts = self.map_polygons(landuse_polygons)
334
+ >>>
335
+ >>> # Aggregate total population within zones using area weighting
336
+ >>> population_by_zone = self.map_polygons(
337
+ ... landuse_polygons,
338
+ ... value_columns="population",
339
+ ... predicate="fractional",
340
+ ... aggregation="sum"
341
+ ... )
342
+ >>>
343
+ >>> # Get the sum of residential area and count of buildings within each zone
344
+ >>> residential_stats = self.map_polygons(
345
+ ... building_polygons,
346
+ ... value_columns=["residential_area_sqm", "building_id"],
347
+ ... aggregation={"residential_area_sqm": "sum", "building_id": "count"},
348
+ ... predicate="intersects"
349
+ ... )
233
350
  """
234
- if mapping_function is not None:
235
- return mapping_function(self, polygons, **mapping_kwargs)
236
-
237
- if area_column not in polygons_gdf:
238
- if not isinstance(polygons, gpd.GeoDataFrame):
239
- try:
240
- polygons_gdf = convert_to_geodataframe(polygons)
241
- except:
242
- raise TypeError(
243
- "polygons must be a GeoDataFrame or convertible to one"
244
- )
245
- else:
246
- polygons_gdf = polygons.copy()
247
-
248
- polygons_gdf[area_column] = polygons_gdf.to_crs(
249
- polygons_gdf.estimate_utm_crs()
250
- ).geometry.area
251
351
 
252
352
  if value_columns is None:
253
353
  self.logger.warning(
254
- "Using default polygon mapping implementation. Consider providing value_columns."
354
+ f"No value_columns specified. Defaulting to counting polygons with {predicate} predicate."
255
355
  )
256
- value_columns = area_column
356
+ temp_value_col = "_temp_polygon_count_dummy"
357
+ polygons[temp_value_col] = 1
358
+ actual_value_columns = temp_value_col
359
+ aggregation = "count" # Force count if no value columns
360
+ else:
361
+ actual_value_columns = value_columns
257
362
 
258
363
  result = aggregate_polygons_to_zones(
259
- polygons=polygons_gdf,
364
+ polygons=polygons,
260
365
  zones=self.zone_gdf,
261
- value_columns=value_columns,
366
+ value_columns=actual_value_columns,
262
367
  aggregation=aggregation,
263
- area_weighted=area_weighted,
368
+ predicate=predicate,
264
369
  zone_id_column="zone_id",
265
370
  )
266
371
 
267
- return result[value_columns].to_dict()
372
+ # Convert the result GeoDataFrame to the expected dictionary format
373
+ if isinstance(actual_value_columns, str):
374
+ return result.set_index("zone_id")[actual_value_columns].to_dict()
375
+ elif isinstance(actual_value_columns, list):
376
+ return {
377
+ col: result.set_index("zone_id")[col].to_dict()
378
+ for col in actual_value_columns
379
+ }
380
+ else:
381
+ raise ValueError("Unexpected type for actual_value_columns.")
268
382
 
269
383
  def map_rasters(
270
384
  self,
@@ -291,7 +405,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
291
405
 
292
406
  Returns:
293
407
  Union[np.ndarray, Dict]: By default, returns a NumPy array of sampled values
294
- with shape (n_zones, n_rasters), taking the first non-nodata value encountered.
408
+ with shape (n_zones, 1), taking the first non-nodata value encountered.
295
409
  Custom mapping functions may return different data structures.
296
410
 
297
411
  Note:
@@ -318,7 +432,9 @@ class ZonalViewGenerator(ABC, Generic[T]):
318
432
  tif_processors=tif_processors, polygon_list=zone_geoms, stat=stat
319
433
  )
320
434
 
321
- return sampled_values
435
+ zone_ids = self.get_zone_identifiers()
436
+
437
+ return {zone_id: value for zone_id, value in zip(zone_ids, sampled_values)}
322
438
 
323
439
  @lru_cache(maxsize=32)
324
440
  def _get_transformed_geometries(self, target_crs):
@@ -337,34 +453,75 @@ class ZonalViewGenerator(ABC, Generic[T]):
337
453
 
338
454
  def save_view(
339
455
  self,
340
- view_data: gpd.GeoDataFrame,
341
456
  name: str,
342
457
  output_format: Optional[str] = None,
343
458
  ) -> Path:
344
459
  """Save the generated zonal view to disk.
345
460
 
346
461
  Args:
347
- view_data (gpd.GeoDataFrame): The zonal view data to save.
348
462
  name (str): Base name for the output file (without extension).
349
463
  output_format (str, optional): File format to save in (e.g., "parquet",
350
- "geojson", "shp"). If None, uses the format specified in generator_config.
464
+ "geojson", "shp"). If None, uses the format specified in config.
351
465
 
352
466
  Returns:
353
467
  Path: The full path where the view was saved.
354
468
 
355
469
  Note:
356
- The output directory is determined by the generator_config.base_path setting.
470
+ The output directory is determined by the config.base_path setting.
357
471
  The file extension is automatically added based on the output format.
472
+ This method now saves the internal `self.view`.
358
473
  """
474
+ if self._view is None:
475
+ self.logger.warning(
476
+ "No variables have been added to the zonal view. Saving the base zone_gdf."
477
+ )
478
+ view_to_save = self.zone_gdf
479
+ else:
480
+ view_to_save = self._view
481
+
359
482
  format_to_use = output_format or self.config.output_format
360
483
  output_path = self.config.base_path / f"{name}.{format_to_use}"
361
484
 
362
485
  self.logger.info(f"Saving zonal view to {output_path}")
486
+
487
+ if format_to_use in ["geojson", "shp", "gpkg"]:
488
+ self.logger.warning(
489
+ f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
490
+ )
491
+ # Re-add geometry for saving to geospatial formats
492
+ view_to_save = self.view.merge(
493
+ self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
494
+ )
495
+
363
496
  write_dataset(
364
- df=view_data,
497
+ data=view_to_save,
365
498
  path=str(output_path),
366
499
  data_store=self.data_store,
367
- format=format_to_use,
368
500
  )
369
501
 
370
502
  return output_path
503
+
504
+ def to_dataframe(self) -> pd.DataFrame:
505
+ """
506
+ Returns the current zonal view as a DataFrame.
507
+
508
+ This method combines all accumulated variables in the view
509
+
510
+ Returns:
511
+ pd.DataFrame: The current view.
512
+ """
513
+ return self.view
514
+
515
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
516
+ """
517
+ Returns the current zonal view merged with zone geometries as a GeoDataFrame.
518
+
519
+ This method combines all accumulated variables in the view with the corresponding
520
+ zone geometries, providing a spatially-enabled DataFrame for further analysis or export.
521
+
522
+ Returns:
523
+ gpd.GeoDataFrame: The current view merged with zone geometries.
524
+ """
525
+ return self.view.merge(
526
+ self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
527
+ )
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional, Union
1
+ from typing import Dict, List, Optional, Union, Literal
2
2
  from shapely.geometry import Polygon, MultiPolygon
3
3
 
4
4
  import geopandas as gpd
@@ -136,9 +136,9 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
136
136
  List[T]: A list of zone identifiers in the order they appear in the
137
137
  underlying GeoDataFrame.
138
138
  """
139
- return self._zone_gdf[self.zone_id_column].tolist()
139
+ return self._zone_gdf.zone_id.tolist()
140
140
 
141
- def to_geodataframe(self) -> gpd.GeoDataFrame:
141
+ def get_zone_geodataframe(self) -> gpd.GeoDataFrame:
142
142
  """Convert zones to a GeoDataFrame with standardized column names.
143
143
 
144
144
  Returns:
@@ -158,7 +158,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
158
158
  stat: str = "sum",
159
159
  name_prefix: str = "built_surface_m2_",
160
160
  **kwargs,
161
- ) -> gpd.GeoDataFrame:
161
+ ) -> pd.DataFrame:
162
162
  """Map GHSL Built-up Surface data to zones.
163
163
 
164
164
  Convenience method for mapping Global Human Settlement Layer Built-up Surface
@@ -172,7 +172,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
172
172
  name_prefix (str): Prefix for the output column name. Defaults to "built_surface_m2_".
173
173
 
174
174
  Returns:
175
- gpd.GeoDataFrame: Updated GeoDataFrame with zones and built surface metrics.
175
+ pd.DataFrame: Updated GeoDataFrame with zones and built surface metrics.
176
176
  Adds a column named "{name_prefix}{stat}" containing the aggregated values.
177
177
  """
178
178
  handler = GHSLDataHandler(
@@ -190,11 +190,11 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
190
190
  def map_smod(
191
191
  self,
192
192
  year=2020,
193
- resolution=100,
193
+ resolution=1000,
194
194
  stat: str = "median",
195
195
  name_prefix: str = "smod_class_",
196
196
  **kwargs,
197
- ) -> gpd.GeoDataFrame:
197
+ ) -> pd.DataFrame:
198
198
  """Map GHSL Settlement Model data to zones.
199
199
 
200
200
  Convenience method for mapping Global Human Settlement Layer Settlement Model
@@ -208,7 +208,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
208
208
  name_prefix (str): Prefix for the output column name. Defaults to "smod_class_".
209
209
 
210
210
  Returns:
211
- gpd.GeoDataFrame: Updated GeoDataFrame with zones and settlement classification.
211
+ pd.DataFrame: Updated DataFrame with zones and settlement classification.
212
212
  Adds a column named "{name_prefix}{stat}" containing the aggregated values.
213
213
  """
214
214
  handler = GHSLDataHandler(
@@ -230,7 +230,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
230
230
  stat: str,
231
231
  name_prefix: Optional[str] = None,
232
232
  **kwargs,
233
- ) -> gpd.GeoDataFrame:
233
+ ) -> pd.DataFrame:
234
234
  """Map Global Human Settlement Layer data to zones.
235
235
 
236
236
  Loads and processes GHSL raster data for the intersecting tiles, then samples
@@ -245,7 +245,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
245
245
  If None, uses the GHSL product name in lowercase followed by underscore.
246
246
 
247
247
  Returns:
248
- gpd.GeoDataFrame: Updated GeoDataFrame with zones and GHSL metrics.
248
+ pd.DataFrame: Updated DataFrame with GHSL metrics.
249
249
  Adds a column named "{name_prefix}{stat}" containing the sampled values.
250
250
 
251
251
  Note:
@@ -269,17 +269,15 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
269
269
  name_prefix if name_prefix else handler.config.product.lower() + "_"
270
270
  )
271
271
  column_name = f"{name_prefix}{stat}"
272
- self._zone_gdf[column_name] = sampled_values
272
+ self.add_variable_to_view(sampled_values, column_name)
273
273
 
274
- self.logger.info(f"Added {column_name} column")
275
-
276
- return self._zone_gdf.copy()
274
+ return self.view
277
275
 
278
276
  def map_google_buildings(
279
277
  self,
280
278
  handler: Optional[GoogleOpenBuildingsHandler] = None,
281
279
  use_polygons: bool = False,
282
- ) -> gpd.GeoDataFrame:
280
+ ) -> pd.DataFrame:
283
281
  """Map Google Open Buildings data to zones.
284
282
 
285
283
  Processes Google Open Buildings dataset to calculate building counts and total
@@ -295,7 +293,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
295
293
  area values from attributes for faster processing. Defaults to False.
296
294
 
297
295
  Returns:
298
- gpd.GeoDataFrame: Updated GeoDataFrame with zones and building metrics.
296
+ pd.DataFrame: Updated DataFrame with building metrics.
299
297
  Adds columns:
300
298
  - 'google_buildings_count': Number of buildings in each zone
301
299
  - 'google_buildings_area_in_meters': Total building area in square meters
@@ -341,19 +339,20 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
341
339
  self.logger.info(
342
340
  "Calculating building areas with area-weighted aggregation"
343
341
  )
344
- area_result = self.map_polygons(buildings_gdf, area_weighted=True)
342
+ area_result = self.map_polygons(
343
+ buildings_gdf,
344
+ value_columns="area_in_meters",
345
+ aggregation="sum",
346
+ predicate="fractional",
347
+ )
345
348
 
346
349
  self.logger.info("Counting buildings using points data")
347
350
  count_result = self.map_points(points=buildings_df, predicate="within")
348
351
 
349
- self._zone_gdf["google_buildings_count"] = self.zone_gdf.index.map(count_result)
350
- self._zone_gdf["google_buildings_area_in_meters"] = self.zone_gdf.index.map(
351
- area_result
352
- )
353
-
354
- self.logger.info(f"Added Google building data")
352
+ self.add_variable_to_view(count_result, "google_buildings_count")
353
+ self.add_variable_to_view(area_result, "google_buildings_area_in_meters")
355
354
 
356
- return self._zone_gdf.copy()
355
+ return self.view
357
356
 
358
357
  def map_ms_buildings(
359
358
  self,
@@ -400,7 +399,9 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
400
399
  )
401
400
  return self._zone_gdf.copy()
402
401
 
403
- buildings_gdf = add_area_in_meters(buildings_gdf)
402
+ buildings_gdf = add_area_in_meters(
403
+ buildings_gdf, area_column_name="area_in_meters"
404
+ )
404
405
 
405
406
  building_centroids = get_centroids(buildings_gdf)
406
407
 
@@ -421,7 +422,12 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
421
422
  self.logger.info(
422
423
  "Calculating building areas with area-weighted aggregation"
423
424
  )
424
- area_result = self.map_polygons(buildings_gdf, area_weighted=True)
425
+ area_result = self.map_polygons(
426
+ buildings_gdf,
427
+ value_columns="area_in_meters",
428
+ aggregation="sum",
429
+ predicate="fractional",
430
+ )
425
431
 
426
432
  self.logger.info("Counting Microsoft buildings per zone")
427
433
 
@@ -429,11 +435,48 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
429
435
  points=building_centroids, predicate="within"
430
436
  )
431
437
 
432
- self._zone_gdf["ms_buildings_count"] = self.zone_gdf.index.map(count_result)
433
- self._zone_gdf["ms_buildings_area_in_meters"] = self.zone_gdf.index.map(
434
- area_result
438
+ self.add_variable_to_view(count_result, "ms_buildings_count")
439
+ self.add_variable_to_view(area_result, "ms_buildings_area_in_meters")
440
+
441
+ return self.view
442
+
443
+ def map_ghsl_pop(
444
+ self,
445
+ year=2020,
446
+ resolution=100,
447
+ stat: str = "sum",
448
+ name_prefix: str = "ghsl_pop_",
449
+ predicate: Literal["intersects", "fractional"] = "intersects",
450
+ **kwargs,
451
+ ):
452
+ handler = GHSLDataHandler(
453
+ product="GHS_POP",
454
+ year=year,
455
+ resolution=resolution,
456
+ data_store=self.data_store,
457
+ **kwargs,
435
458
  )
436
459
 
437
- self.logger.info(f"Added Microsoft building data")
460
+ if predicate == "fractional":
461
+ if resolution == 100:
462
+ self.logger.warning(
463
+ "Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
464
+ )
465
+ predicate = "intersects"
466
+ else:
467
+ gdf_pop = handler.load_into_geodataframe()
468
+
469
+ result = self.map_polygons(
470
+ gdf_pop,
471
+ value_columns="pixel_value",
472
+ aggregation="sum",
473
+ predicate="fractional",
474
+ )
475
+
476
+ column_name = f"{name_prefix}{stat}"
477
+ self.add_variable_to_view(result, column_name)
478
+ return self.view
438
479
 
439
- return self._zone_gdf.copy()
480
+ return self.map_ghsl(
481
+ handler=handler, stat=stat, name_prefix=name_prefix, **kwargs
482
+ )