giga-spatial 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.5
3
+ Version: 0.6.7
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -32,6 +32,7 @@ Requires-Dist: pydantic-settings>=2.7.1
32
32
  Requires-Dist: hdx-python-api>=6.3.8
33
33
  Requires-Dist: bs4==0.0.2
34
34
  Requires-Dist: sqlalchemy-trino==0.5.0
35
+ Requires-Dist: dask>=2024.12.1
35
36
  Dynamic: author
36
37
  Dynamic: author-email
37
38
  Dynamic: classifier
@@ -1,30 +1,30 @@
1
- giga_spatial-0.6.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
- gigaspatial/__init__.py,sha256=KDgkBrBsBSUzbLgrOZ89YsNN06fU4j5bmcuEwo6q5pg,22
1
+ giga_spatial-0.6.7.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
+ gigaspatial/__init__.py,sha256=F6dMrYXtnIxoVQIhQnq6i1IIuwiHqeZxZMHThUjr2vM,22
3
3
  gigaspatial/config.py,sha256=pLbxGc08OHT2IfTBzZVuIJTPR2vvg3KTFfvciOtRswk,9304
4
4
  gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
6
6
  gigaspatial/core/io/adls_data_store.py,sha256=Zv-D_8d_2h57HnCUTJb0JWWjXqR_0XH4F8Nu_UFZK9E,11975
7
- gigaspatial/core/io/data_api.py,sha256=3HMstau3zH3JPRUW0t83DZt74N39bt-jsfAyrUUFMoc,3944
7
+ gigaspatial/core/io/data_api.py,sha256=0TtXEfqIz9m1uC8ktcUhZZtx6ZIPY_SIXNnlIodcry8,3970
8
8
  gigaspatial/core/io/data_store.py,sha256=mi8fy78Dtwj4dpKkyDM6kTlna1lfCQ5ro2hUAOFr83A,3223
9
- gigaspatial/core/io/database.py,sha256=zoOQ1j6bNarngQL8vS8adrWYi9P1NRUytZEzHd08F30,11303
9
+ gigaspatial/core/io/database.py,sha256=kR9ZHuIpZEjXmyj_PnMn6ManQ504kl_f1kJRjHuNWwk,11378
10
10
  gigaspatial/core/io/local_data_store.py,sha256=hcu7DNYa3AL6sEPMqguzxWal_bnP7CIpbwpoiyf5TCw,2933
11
11
  gigaspatial/core/io/readers.py,sha256=gqFKGRCsAP_EBXipqGtT8MEV-x0u6SrCqaSiOC5YPTA,9284
12
12
  gigaspatial/core/io/writers.py,sha256=asb56ZHQEWO2rdilIq7QywDRk8yfebecWv1KwzUpaXI,4367
13
13
  gigaspatial/core/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  gigaspatial/core/schemas/entity.py,sha256=QAhEW0-JgdWh9pjKGbB5ArvqtVK85ayYZJPgjdb-IKw,8590
15
15
  gigaspatial/generators/__init__.py,sha256=cKbMTW7Eh-oDPtM9OfGP14_ekVwc2_7Je7n_pr_anig,223
16
- gigaspatial/generators/poi.py,sha256=cPMH1-0V_P7orGpIcsarZ6rWZiGOfcSxQNxvQJgAuM8,33628
16
+ gigaspatial/generators/poi.py,sha256=FMYH2yPFuCeYiwj-pHYe83bAxXPkQP28VDQx5L2eEnQ,35869
17
17
  gigaspatial/generators/zonal/__init__.py,sha256=egnpvGVeIOS2Zg516AT84tJnIqS4owxmMLLmBQJmK7Y,301
18
18
  gigaspatial/generators/zonal/admin.py,sha256=rgOyQX3f_g9qnXqrf-NkR2GEdwOqjNuPNe1H7AUVsfg,3698
19
- gigaspatial/generators/zonal/base.py,sha256=DzIXcOMnUhlCZQ8AxCyGvVuTY8oTCw66xhyWn7tSO-U,22862
20
- gigaspatial/generators/zonal/geometry.py,sha256=P1vKJNvYpBQj_g-B-OzeJy4KCt8f_SI7h3H4WKTWXiU,19605
19
+ gigaspatial/generators/zonal/base.py,sha256=aoq9AmawPnxu04YI7hPmJzvBTyuUqBQuMZAEUdiVStg,22818
20
+ gigaspatial/generators/zonal/geometry.py,sha256=JbaQ4WS6g45g3uBaATfJd7DS3wW-GPW0GW9FtsvGH-c,21284
21
21
  gigaspatial/generators/zonal/mercator.py,sha256=fA02j30PWB5BVjrbNGCMjiOw-ds182yK7R27z8mWFug,5291
22
22
  gigaspatial/grid/__init__.py,sha256=ypSSyZ4fYtMNc4IG7chSD7NkUfS2bv9KWRsKR1D9pDI,80
23
23
  gigaspatial/grid/mercator_tiles.py,sha256=mAYZDBJ1U0l3z9i4rh5OqiPhOGWcBYzUOI1cvQG_Ff4,11240
24
- gigaspatial/handlers/__init__.py,sha256=R2rugXR5kF4lLkSO1fjpVDYK_jWdD8U2NbXbW71Ezv8,1523
25
- gigaspatial/handlers/base.py,sha256=rL94c3wDjsqzLp4na8FfYXW6tNjVGX6v4M-Ce4LrAro,26413
26
- gigaspatial/handlers/boundaries.py,sha256=TfqjtLE4VdJlUt7APLX16hzGuR1EH2MuRBn8u6R0k9A,18705
27
- gigaspatial/handlers/ghsl.py,sha256=NFjUSQrv-YrlfnX2erzd7r88PZhhyezLg3HzIikZwaM,30170
24
+ gigaspatial/handlers/__init__.py,sha256=T0_6OxXQ59yTu9g2P6P9vnepudOWp_85R4WQKxRF94c,1618
25
+ gigaspatial/handlers/base.py,sha256=ZcahOEMmS_uECBqOhEEdCoDcPCbVItA5mRS5zEUqR-s,27194
26
+ gigaspatial/handlers/boundaries.py,sha256=jtWyQt3iAzS77mbAOi7mjh3cv_YCV3uB_r1h56gCfeY,20729
27
+ gigaspatial/handlers/ghsl.py,sha256=aSEVQVANzJf8O8TiQYmfwyeM43ZaO65VJHmiuLSQfLs,30524
28
28
  gigaspatial/handlers/giga.py,sha256=F5ZfcE37a24X-c6Xhyt72C9eZZbyN_gV7w_InxKFMQQ,28348
29
29
  gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
30
30
  gigaspatial/handlers/hdx.py,sha256=LTEs_xZF1yPhD8dAdZ_YN8Vcan7iB5_tZ8NjF_ip6u0,18001
@@ -37,14 +37,14 @@ gigaspatial/handlers/osm.py,sha256=sLNMkOVh1v50jrWw7Z0-HILY5QTQjgKCHCeAfXj5jA8,1
37
37
  gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
38
38
  gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
39
39
  gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
40
- gigaspatial/handlers/worldpop.py,sha256=oJ39NGajXi0rn829ZoFiaeG4_wavyPvljdActpxs12I,9850
40
+ gigaspatial/handlers/worldpop.py,sha256=jV166EP02Xdj8jiT8aQi4sexds8Qd3KRGHXqq70_Sdk,30161
41
41
  gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
42
42
  gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
43
- gigaspatial/processing/geo.py,sha256=tAykXH5UwrXtfnyZ9CClvejo9Ae7Yw6ij7EbF-7WLhQ,40091
43
+ gigaspatial/processing/geo.py,sha256=8kD7-LQdGzKVfuZDWr3zK5uQhPzgxbZ3JBPosLRBJ5M,41390
44
44
  gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
45
45
  gigaspatial/processing/tif_processor.py,sha256=QLln9D-_zBhdYQL9NAL_bmo0bmmxE3sxDUQEglYQK94,27490
46
46
  gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
47
- giga_spatial-0.6.5.dist-info/METADATA,sha256=eZHvzPKaNla-npj8PwfruUNp0chqxxst_mugbde6OL8,7506
48
- giga_spatial-0.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
- giga_spatial-0.6.5.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
50
- giga_spatial-0.6.5.dist-info/RECORD,,
47
+ giga_spatial-0.6.7.dist-info/METADATA,sha256=Ra3gVT9Y_Zncp8qCLzQYnMLKF_Ff_MSShPKUsUtGSUE,7537
48
+ giga_spatial-0.6.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
+ giga_spatial-0.6.7.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
50
+ giga_spatial-0.6.7.dist-info/RECORD,,
gigaspatial/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.6.5"
1
+ __version__ = "0.6.7"
@@ -35,7 +35,9 @@ class GigaDataAPI:
35
35
  :param sort: Whether to sort the country list alphabetically (default is True).
36
36
  """
37
37
  country_list = [
38
- t.name for t in self.client.list_all_tables() if t.share == self.share_name
38
+ t.name
39
+ for t in self.client.list_all_tables()
40
+ if t.schema == self.schema_name
39
41
  ]
40
42
  if sort:
41
43
  country_list.sort()
@@ -272,6 +272,7 @@ class DBConnection:
272
272
  def read_sql_to_dask_dataframe(
273
273
  self,
274
274
  table_name: str,
275
+ index_col: str,
275
276
  columns: Optional[List[str]] = None,
276
277
  limit: Optional[int] = None,
277
278
  **kwargs,
@@ -310,7 +311,9 @@ class DBConnection:
310
311
  if limit:
311
312
  query = query.limit(limit)
312
313
 
313
- return dd.read_sql_query(sql=query, con=connection_string, **kwargs)
314
+ return dd.read_sql_query(
315
+ sql=query, con=connection_string, index_col=index_col, **kwargs
316
+ )
314
317
  except Exception as e:
315
318
  print(f"Error reading SQL to Dask DataFrame: {e}")
316
319
  raise ValueError(f"Failed to read SQL to Dask DataFrame: {e}") from e
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import List, Optional, Union, Tuple
2
+ from typing import List, Optional, Union, Tuple, Literal
3
3
  from pydantic.dataclasses import dataclass, Field
4
4
 
5
5
  import geopandas as gpd
@@ -13,6 +13,7 @@ from gigaspatial.config import config as global_config
13
13
  from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
14
14
  from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
15
15
  from gigaspatial.handlers.ghsl import GHSLDataHandler
16
+ from gigaspatial.handlers.worldpop import WPPopulationHandler
16
17
  from gigaspatial.processing.geo import (
17
18
  convert_to_geodataframe,
18
19
  buffer_geodataframe,
@@ -468,7 +469,7 @@ class PoiViewGenerator:
468
469
  map_radius_meters: Optional[float] = None,
469
470
  output_column: str = "zonal_stat",
470
471
  value_column: Optional[str] = None,
471
- area_weighted: bool = False,
472
+ predicate: Literal["intersects", "within", "fractional"] = "intersects",
472
473
  **kwargs,
473
474
  ) -> pd.DataFrame:
474
475
  """
@@ -496,9 +497,8 @@ class PoiViewGenerator:
496
497
  value_column (str, optional):
497
498
  For polygon data: Name of the column to aggregate. Required for polygon data.
498
499
  Not used for raster data.
499
- area_weighted (bool, optional):
500
- For polygon data: Whether to weight values by fractional area of
501
- intersection. Defaults to False.
500
+ predicate (Literal["intersects", "within", "fractional"], optional):
501
+ The spatial relationship to use for aggregation. Defaults to "intersects".
502
502
  **kwargs:
503
503
  Additional keyword arguments passed to the sampling/aggregation functions.
504
504
 
@@ -575,7 +575,7 @@ class PoiViewGenerator:
575
575
  )
576
576
 
577
577
  self.logger.info(
578
- f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
578
+ f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
579
579
  )
580
580
 
581
581
  # Create buffers around POIs
@@ -591,12 +591,17 @@ class PoiViewGenerator:
591
591
  zones=buffer_gdf,
592
592
  value_columns=value_column,
593
593
  aggregation=stat,
594
- area_weighted=area_weighted,
594
+ predicate=predicate,
595
595
  zone_id_column="poi_id",
596
+ output_suffix="",
597
+ drop_geometry=True,
596
598
  **kwargs,
597
599
  )
598
600
 
599
- results_df = aggregation_result_gdf[["poi_id", value_column]].copy()
601
+ results_df = aggregation_result_gdf[["poi_id", value_column]]
602
+
603
+ if output_column != "zonal_stat":
604
+ results_df = results_df.rename(columns={value_column: output_column})
600
605
 
601
606
  else:
602
607
  raise ValueError(
@@ -662,7 +667,6 @@ class PoiViewGenerator:
662
667
 
663
668
  def map_smod(
664
669
  self,
665
- stat="median",
666
670
  dataset_year=2020,
667
671
  dataset_resolution=1000,
668
672
  output_column="smod_class",
@@ -703,11 +707,67 @@ class PoiViewGenerator:
703
707
 
704
708
  return self.map_zonal_stats(
705
709
  data=tif_processors,
706
- stat=stat, # Use median for categorical data
707
710
  output_column=output_column,
708
711
  **kwargs,
709
712
  )
710
713
 
714
+ def map_wp_pop(
715
+ self,
716
+ country: Union[str, List[str]],
717
+ map_radius_meters: float,
718
+ resolution=1000,
719
+ predicate: Literal[
720
+ "centroid_within", "intersects", "fractional", "within"
721
+ ] = "fractional",
722
+ output_column: str = "population",
723
+ **kwargs,
724
+ ):
725
+ if isinstance(country, str):
726
+ country = [country]
727
+
728
+ handler = WPPopulationHandler(
729
+ project="pop", resolution=resolution, data_store=self.data_store, **kwargs
730
+ )
731
+
732
+ self.logger.info(
733
+ f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
734
+ )
735
+
736
+ if predicate == "fractional" and resolution == 100:
737
+ self.logger.warning(
738
+ "Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
739
+ )
740
+ predicate = "intersects"
741
+
742
+ if predicate == "centroid_within":
743
+ data = []
744
+ for c in country:
745
+ data.extend(
746
+ handler.load_data(c, ensure_available=self.config.ensure_available)
747
+ )
748
+ else:
749
+ data = pd.concat(
750
+ [
751
+ handler.load_into_geodataframe(
752
+ c, ensure_available=self.config.ensure_available
753
+ )
754
+ for c in country
755
+ ],
756
+ ignore_index=True,
757
+ )
758
+
759
+ self.logger.info(f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic")
760
+
761
+ return self.map_zonal_stats(
762
+ data,
763
+ stat="sum",
764
+ map_radius_meters=map_radius_meters,
765
+ value_column="pixel_value",
766
+ predicate=predicate,
767
+ output_column=output_column,
768
+ **kwargs
769
+ )
770
+
711
771
  def save_view(
712
772
  self,
713
773
  name: str,
@@ -778,8 +838,11 @@ class PoiViewGenerator:
778
838
  Returns:
779
839
  gpd.GeoDataFrame: The current view merged with point geometries.
780
840
  """
781
- return self.view.merge(
782
- self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
841
+ return gpd.GeoDataFrame(
842
+ self.view.merge(
843
+ self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
844
+ ),
845
+ crs="EPSG:4326",
783
846
  )
784
847
 
785
848
  def chain_operations(self, operations: List[dict]) -> "PoiViewGenerator":
@@ -13,7 +13,6 @@ from gigaspatial.core.io.local_data_store import LocalDataStore
13
13
  from gigaspatial.core.io.writers import write_dataset
14
14
  from gigaspatial.config import config as global_config
15
15
  from gigaspatial.processing.geo import (
16
- convert_to_geodataframe,
17
16
  aggregate_polygons_to_zones,
18
17
  aggregate_points_to_zones,
19
18
  )
@@ -242,36 +241,37 @@ class ZonalViewGenerator(ABC, Generic[T]):
242
241
  if mapping_function is not None:
243
242
  return mapping_function(self, points, **mapping_kwargs)
244
243
 
245
- else:
244
+ self.logger.warning(
245
+ "Using default points mapping implementation. Consider creating a specialized mapping function."
246
+ )
247
+ result = aggregate_points_to_zones(
248
+ points=points,
249
+ zones=self.zone_gdf,
250
+ value_columns=value_columns,
251
+ aggregation=aggregation,
252
+ point_zone_predicate=predicate,
253
+ zone_id_column="zone_id",
254
+ output_suffix=output_suffix,
255
+ )
256
+
257
+ if isinstance(value_columns, str):
258
+ return result.set_index("zone_id")[value_columns].to_dict()
259
+ elif isinstance(value_columns, list):
260
+ # If multiple value columns, return a dictionary of dictionaries
261
+ # Or, if preferred, a dictionary where values are lists/tuples of results
262
+ # For now, let's return a dict of series, which is common.
263
+ # The previous version implied a single dictionary result from map_points/polygons
264
+ # but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
265
+ # or {col_name: {zone_id: val}}
266
+ # In this version, it'll return a dictionary for each column.
267
+ return {
268
+ col: result.set_index("zone_id")[col].to_dict() for col in value_columns
269
+ }
270
+ else: # If value_columns is None, it should return point_count
246
271
  self.logger.warning(
247
- "Using default points mapping implementation. Consider creating a specialized mapping function."
248
- )
249
- result = aggregate_points_to_zones(
250
- points=points,
251
- zones=self.zone_gdf,
252
- value_columns=value_columns,
253
- aggregation=aggregation,
254
- point_zone_predicate=predicate,
255
- zone_id_column="zone_id",
256
- output_suffix=output_suffix,
272
+ "No `value_columns` provided. Mapping point counts. Consider passing `value_columns` and `aggregation` or `mapping_function`."
257
273
  )
258
-
259
- if isinstance(value_columns, str):
260
- return result.set_index("zone_id")[value_columns].to_dict()
261
- elif isinstance(value_columns, list):
262
- # If multiple value columns, return a dictionary of dictionaries
263
- # Or, if preferred, a dictionary where values are lists/tuples of results
264
- # For now, let's return a dict of series, which is common.
265
- # The previous version implied a single dictionary result from map_points/polygons
266
- # but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
267
- # or {col_name: {zone_id: val}}
268
- # In this version, it'll return a dictionary for each column.
269
- return {
270
- col: result.set_index("zone_id")[col].to_dict()
271
- for col in value_columns
272
- }
273
- else: # If value_columns is None, it should return point_count
274
- return result.set_index("zone_id")["point_count"].to_dict()
274
+ return result.set_index("zone_id")["point_count"].to_dict()
275
275
 
276
276
  def map_polygons(
277
277
  self,
@@ -415,10 +415,6 @@ class ZonalViewGenerator(ABC, Generic[T]):
415
415
  if mapping_function is not None:
416
416
  return mapping_function(self, tif_processors, **mapping_kwargs)
417
417
 
418
- self.logger.warning(
419
- "Using default raster mapping implementation. Consider creating a specialized mapping function."
420
- )
421
-
422
418
  raster_crs = tif_processors[0].crs
423
419
 
424
420
  if raster_crs != self.zone_gdf.crs:
@@ -522,6 +518,9 @@ class ZonalViewGenerator(ABC, Generic[T]):
522
518
  Returns:
523
519
  gpd.GeoDataFrame: The current view merged with zone geometries.
524
520
  """
525
- return self.view.merge(
526
- self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
521
+ return gpd.GeoDataFrame(
522
+ (self.view).merge(
523
+ self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
524
+ ),
525
+ crs=self.zone_gdf.crs,
527
526
  )
@@ -6,7 +6,6 @@ import pandas as pd
6
6
  import logging
7
7
 
8
8
  from gigaspatial.core.io.data_store import DataStore
9
- from gigaspatial.config import config as global_config
10
9
  from gigaspatial.processing.geo import (
11
10
  add_area_in_meters,
12
11
  get_centroids,
@@ -14,6 +13,7 @@ from gigaspatial.processing.geo import (
14
13
  from gigaspatial.handlers.ghsl import GHSLDataHandler
15
14
  from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
16
15
  from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
16
+ from gigaspatial.handlers.worldpop import WPPopulationHandler
17
17
  from gigaspatial.generators.zonal.base import (
18
18
  ZonalViewGenerator,
19
19
  ZonalViewGeneratorConfig,
@@ -156,7 +156,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
156
156
  year=2020,
157
157
  resolution=100,
158
158
  stat: str = "sum",
159
- name_prefix: str = "built_surface_m2_",
159
+ output_column: str = "built_surface_m2",
160
160
  **kwargs,
161
161
  ) -> pd.DataFrame:
162
162
  """Map GHSL Built-up Surface data to zones.
@@ -165,15 +165,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
165
165
  data using appropriate default parameters for built surface analysis.
166
166
 
167
167
  Args:
168
- ghsl_data_config (GHSLDataConfig): Configuration for GHSL Built-up Surface data.
169
- Defaults to GHS_BUILT_S product for 2020 at 100m resolution.
168
+ year: The year of the data (default: 2020)
169
+ resolution: The resolution in meters (default: 100)
170
170
  stat (str): Statistic to calculate for built surface values within each zone.
171
171
  Defaults to "sum" which gives total built surface area.
172
- name_prefix (str): Prefix for the output column name. Defaults to "built_surface_m2_".
173
-
172
+ output_column (str): The output column name. Defaults to "built_surface_m2".
174
173
  Returns:
175
- pd.DataFrame: Updated GeoDataFrame with zones and built surface metrics.
176
- Adds a column named "{name_prefix}{stat}" containing the aggregated values.
174
+ pd.DataFrame: Updated view DataFrame and settlement classification.
175
+ Adds a column with `output_column` containing the aggregated values.
177
176
  """
178
177
  handler = GHSLDataHandler(
179
178
  product="GHS_BUILT_S",
@@ -184,7 +183,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
184
183
  )
185
184
 
186
185
  return self.map_ghsl(
187
- handler=handler, stat=stat, name_prefix=name_prefix, **kwargs
186
+ handler=handler, stat=stat, output_column=output_column, **kwargs
188
187
  )
189
188
 
190
189
  def map_smod(
@@ -192,7 +191,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
192
191
  year=2020,
193
192
  resolution=1000,
194
193
  stat: str = "median",
195
- name_prefix: str = "smod_class_",
194
+ output_column: str = "smod_class",
196
195
  **kwargs,
197
196
  ) -> pd.DataFrame:
198
197
  """Map GHSL Settlement Model data to zones.
@@ -201,15 +200,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
201
200
  data using appropriate default parameters for settlement classification analysis.
202
201
 
203
202
  Args:
204
- ghsl_data_config (GHSLDataConfig): Configuration for GHSL Settlement Model data.
205
- Defaults to GHS_SMOD product for 2020 at 1000m resolution in Mollweide projection.
203
+ year: The year of the data (default: 2020)
204
+ resolution: The resolution in meters (default: 1000)
206
205
  stat (str): Statistic to calculate for settlement class values within each zone.
207
206
  Defaults to "median" which gives the predominant settlement class.
208
- name_prefix (str): Prefix for the output column name. Defaults to "smod_class_".
209
-
207
+ output_column (str): The output column name. Defaults to "smod_class".
210
208
  Returns:
211
- pd.DataFrame: Updated DataFrame with zones and settlement classification.
212
- Adds a column named "{name_prefix}{stat}" containing the aggregated values.
209
+ pd.DataFrame: Updated view DataFrame and settlement classification.
210
+ Adds a column with `output_column` containing the aggregated values.
213
211
  """
214
212
  handler = GHSLDataHandler(
215
213
  product="GHS_SMOD",
@@ -221,14 +219,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
221
219
  )
222
220
 
223
221
  return self.map_ghsl(
224
- handler=handler, stat=stat, name_prefix=name_prefix, **kwargs
222
+ handler=handler, stat=stat, output_column=output_column, **kwargs
225
223
  )
226
224
 
227
225
  def map_ghsl(
228
226
  self,
229
227
  handler: GHSLDataHandler,
230
228
  stat: str,
231
- name_prefix: Optional[str] = None,
229
+ output_column: Optional[str] = None,
232
230
  **kwargs,
233
231
  ) -> pd.DataFrame:
234
232
  """Map Global Human Settlement Layer data to zones.
@@ -237,16 +235,15 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
237
235
  the raster values within each zone using the specified statistic.
238
236
 
239
237
  Args:
240
- ghsl_data_config (GHSLDataConfig): Configuration specifying which GHSL
241
- product, year, resolution, and coordinate system to use.
238
+ hander (GHSLDataHandler): Handler for the GHSL data.
242
239
  stat (str): Statistic to calculate for raster values within each zone.
243
240
  Common options: "mean", "sum", "median", "min", "max".
244
- name_prefix (str, optional): Prefix for the output column name.
241
+ output_column (str): The output column name.
245
242
  If None, uses the GHSL product name in lowercase followed by underscore.
246
243
 
247
244
  Returns:
248
245
  pd.DataFrame: Updated DataFrame with GHSL metrics.
249
- Adds a column named "{name_prefix}{stat}" containing the sampled values.
246
+ Adds a column named as `output_column` containing the sampled values.
250
247
 
251
248
  Note:
252
249
  The method automatically determines which GHSL tiles intersect with the zones
@@ -265,10 +262,12 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
265
262
  )
266
263
  sampled_values = self.map_rasters(tif_processors=tif_processors, stat=stat)
267
264
 
268
- name_prefix = (
269
- name_prefix if name_prefix else handler.config.product.lower() + "_"
265
+ column_name = (
266
+ output_column
267
+ if output_column
268
+ else f"{handler.config.product.lower()}_{stat}"
270
269
  )
271
- column_name = f"{name_prefix}{stat}"
270
+
272
271
  self.add_variable_to_view(sampled_values, column_name)
273
272
 
274
273
  return self.view
@@ -442,16 +441,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
442
441
 
443
442
  def map_ghsl_pop(
444
443
  self,
445
- year=2020,
446
444
  resolution=100,
447
445
  stat: str = "sum",
448
- name_prefix: str = "ghsl_pop_",
446
+ output_column: str = "ghsl_pop",
449
447
  predicate: Literal["intersects", "fractional"] = "intersects",
450
448
  **kwargs,
451
449
  ):
452
450
  handler = GHSLDataHandler(
453
451
  product="GHS_POP",
454
- year=year,
455
452
  resolution=resolution,
456
453
  data_store=self.data_store,
457
454
  **kwargs,
@@ -464,7 +461,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
464
461
  )
465
462
  predicate = "intersects"
466
463
  else:
467
- gdf_pop = handler.load_into_geodataframe()
464
+ gdf_pop = handler.load_into_geodataframe(self.zone_gdf)
468
465
 
469
466
  result = self.map_polygons(
470
467
  gdf_pop,
@@ -473,10 +470,68 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
473
470
  predicate="fractional",
474
471
  )
475
472
 
476
- column_name = f"{name_prefix}{stat}"
477
- self.add_variable_to_view(result, column_name)
473
+ self.add_variable_to_view(result, output_column)
478
474
  return self.view
479
475
 
480
476
  return self.map_ghsl(
481
- handler=handler, stat=stat, name_prefix=name_prefix, **kwargs
477
+ handler=handler, stat=stat, output_column=output_column, **kwargs
478
+ )
479
+
480
+ def map_wp_pop(
481
+ self,
482
+ country: Union[str, List[str]],
483
+ resolution=1000,
484
+ predicate: Literal["intersects", "fractional"] = "intersects",
485
+ output_column: str = "population",
486
+ **kwargs,
487
+ ):
488
+ if isinstance(country, str):
489
+ country = [country]
490
+
491
+ handler = WPPopulationHandler(
492
+ project="pop", resolution=resolution, data_store=self.data_store, **kwargs
493
+ )
494
+
495
+ self.logger.info(
496
+ f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
482
497
  )
498
+
499
+ if predicate == "fractional":
500
+ if resolution == 100:
501
+ self.logger.warning(
502
+ "Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
503
+ )
504
+ predicate = "intersects"
505
+ else:
506
+ gdf_pop = pd.concat(
507
+ [
508
+ handler.load_into_geodataframe(
509
+ c, ensure_available=self.config.ensure_available
510
+ )
511
+ for c in country
512
+ ],
513
+ ignore_index=True,
514
+ )
515
+
516
+ result = self.map_polygons(
517
+ gdf_pop,
518
+ value_columns="pixel_value",
519
+ aggregation="sum",
520
+ predicate=predicate,
521
+ )
522
+
523
+ self.add_variable_to_view(result, output_column)
524
+ return self.view
525
+
526
+ tif_processors = []
527
+ for c in country:
528
+ tif_processors.extend(
529
+ handler.load_data(c, ensure_available=self.config.ensure_available)
530
+ )
531
+
532
+ self.logger.info(f"Sampling WorldPop Population data using 'sum' statistic")
533
+ sampled_values = self.map_rasters(tif_processors=tif_processors, stat="sum")
534
+
535
+ self.add_variable_to_view(sampled_values, output_column)
536
+
537
+ return self.view
@@ -21,7 +21,14 @@ from gigaspatial.handlers.osm import OSMLocationFetcher
21
21
  from gigaspatial.handlers.overture import OvertureAmenityFetcher
22
22
  from gigaspatial.handlers.mapbox_image import MapboxImageDownloader
23
23
  from gigaspatial.handlers.maxar_image import MaxarConfig, MaxarImageDownloader
24
- from gigaspatial.handlers.worldpop import WorldPopConfig, WorldPopDownloader
24
+
25
+ from gigaspatial.handlers.worldpop import (
26
+ WPPopulationConfig,
27
+ WPPopulationReader,
28
+ WPPopulationDownloader,
29
+ WPPopulationHandler,
30
+ WorldPopRestClient,
31
+ )
25
32
  from gigaspatial.handlers.ookla_speedtest import (
26
33
  OoklaSpeedtestTileConfig,
27
34
  OoklaSpeedtestConfig,