giga-spatial 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +2 -1
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/RECORD +17 -17
- gigaspatial/__init__.py +1 -1
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +4 -1
- gigaspatial/generators/poi.py +75 -12
- gigaspatial/generators/zonal/base.py +34 -35
- gigaspatial/generators/zonal/geometry.py +87 -32
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +50 -0
- gigaspatial/handlers/ghsl.py +15 -3
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/geo.py +127 -87
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.6
|
4
4
|
Summary: A package for spatial data download & processing
|
5
5
|
Home-page: https://github.com/unicef/giga-spatial
|
6
6
|
Author: Utku Can Ozturk
|
@@ -32,6 +32,7 @@ Requires-Dist: pydantic-settings>=2.7.1
|
|
32
32
|
Requires-Dist: hdx-python-api>=6.3.8
|
33
33
|
Requires-Dist: bs4==0.0.2
|
34
34
|
Requires-Dist: sqlalchemy-trino==0.5.0
|
35
|
+
Requires-Dist: dask>=2024.12.1
|
35
36
|
Dynamic: author
|
36
37
|
Dynamic: author-email
|
37
38
|
Dynamic: classifier
|
@@ -1,30 +1,30 @@
|
|
1
|
-
giga_spatial-0.6.
|
2
|
-
gigaspatial/__init__.py,sha256=
|
1
|
+
giga_spatial-0.6.6.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
2
|
+
gigaspatial/__init__.py,sha256=I3h5MyD10PkOUQEBnR6L9ja7s4WeTEg8rRjRKTCWYWQ,22
|
3
3
|
gigaspatial/config.py,sha256=pLbxGc08OHT2IfTBzZVuIJTPR2vvg3KTFfvciOtRswk,9304
|
4
4
|
gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
|
6
6
|
gigaspatial/core/io/adls_data_store.py,sha256=Zv-D_8d_2h57HnCUTJb0JWWjXqR_0XH4F8Nu_UFZK9E,11975
|
7
|
-
gigaspatial/core/io/data_api.py,sha256=
|
7
|
+
gigaspatial/core/io/data_api.py,sha256=0TtXEfqIz9m1uC8ktcUhZZtx6ZIPY_SIXNnlIodcry8,3970
|
8
8
|
gigaspatial/core/io/data_store.py,sha256=mi8fy78Dtwj4dpKkyDM6kTlna1lfCQ5ro2hUAOFr83A,3223
|
9
|
-
gigaspatial/core/io/database.py,sha256=
|
9
|
+
gigaspatial/core/io/database.py,sha256=kR9ZHuIpZEjXmyj_PnMn6ManQ504kl_f1kJRjHuNWwk,11378
|
10
10
|
gigaspatial/core/io/local_data_store.py,sha256=hcu7DNYa3AL6sEPMqguzxWal_bnP7CIpbwpoiyf5TCw,2933
|
11
11
|
gigaspatial/core/io/readers.py,sha256=gqFKGRCsAP_EBXipqGtT8MEV-x0u6SrCqaSiOC5YPTA,9284
|
12
12
|
gigaspatial/core/io/writers.py,sha256=asb56ZHQEWO2rdilIq7QywDRk8yfebecWv1KwzUpaXI,4367
|
13
13
|
gigaspatial/core/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
gigaspatial/core/schemas/entity.py,sha256=QAhEW0-JgdWh9pjKGbB5ArvqtVK85ayYZJPgjdb-IKw,8590
|
15
15
|
gigaspatial/generators/__init__.py,sha256=cKbMTW7Eh-oDPtM9OfGP14_ekVwc2_7Je7n_pr_anig,223
|
16
|
-
gigaspatial/generators/poi.py,sha256=
|
16
|
+
gigaspatial/generators/poi.py,sha256=FMYH2yPFuCeYiwj-pHYe83bAxXPkQP28VDQx5L2eEnQ,35869
|
17
17
|
gigaspatial/generators/zonal/__init__.py,sha256=egnpvGVeIOS2Zg516AT84tJnIqS4owxmMLLmBQJmK7Y,301
|
18
18
|
gigaspatial/generators/zonal/admin.py,sha256=rgOyQX3f_g9qnXqrf-NkR2GEdwOqjNuPNe1H7AUVsfg,3698
|
19
|
-
gigaspatial/generators/zonal/base.py,sha256=
|
20
|
-
gigaspatial/generators/zonal/geometry.py,sha256=
|
19
|
+
gigaspatial/generators/zonal/base.py,sha256=aoq9AmawPnxu04YI7hPmJzvBTyuUqBQuMZAEUdiVStg,22818
|
20
|
+
gigaspatial/generators/zonal/geometry.py,sha256=JbaQ4WS6g45g3uBaATfJd7DS3wW-GPW0GW9FtsvGH-c,21284
|
21
21
|
gigaspatial/generators/zonal/mercator.py,sha256=fA02j30PWB5BVjrbNGCMjiOw-ds182yK7R27z8mWFug,5291
|
22
22
|
gigaspatial/grid/__init__.py,sha256=ypSSyZ4fYtMNc4IG7chSD7NkUfS2bv9KWRsKR1D9pDI,80
|
23
23
|
gigaspatial/grid/mercator_tiles.py,sha256=mAYZDBJ1U0l3z9i4rh5OqiPhOGWcBYzUOI1cvQG_Ff4,11240
|
24
|
-
gigaspatial/handlers/__init__.py,sha256=
|
25
|
-
gigaspatial/handlers/base.py,sha256=
|
26
|
-
gigaspatial/handlers/boundaries.py,sha256=
|
27
|
-
gigaspatial/handlers/ghsl.py,sha256=
|
24
|
+
gigaspatial/handlers/__init__.py,sha256=T0_6OxXQ59yTu9g2P6P9vnepudOWp_85R4WQKxRF94c,1618
|
25
|
+
gigaspatial/handlers/base.py,sha256=ZcahOEMmS_uECBqOhEEdCoDcPCbVItA5mRS5zEUqR-s,27194
|
26
|
+
gigaspatial/handlers/boundaries.py,sha256=jtWyQt3iAzS77mbAOi7mjh3cv_YCV3uB_r1h56gCfeY,20729
|
27
|
+
gigaspatial/handlers/ghsl.py,sha256=aSEVQVANzJf8O8TiQYmfwyeM43ZaO65VJHmiuLSQfLs,30524
|
28
28
|
gigaspatial/handlers/giga.py,sha256=F5ZfcE37a24X-c6Xhyt72C9eZZbyN_gV7w_InxKFMQQ,28348
|
29
29
|
gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
|
30
30
|
gigaspatial/handlers/hdx.py,sha256=LTEs_xZF1yPhD8dAdZ_YN8Vcan7iB5_tZ8NjF_ip6u0,18001
|
@@ -37,14 +37,14 @@ gigaspatial/handlers/osm.py,sha256=sLNMkOVh1v50jrWw7Z0-HILY5QTQjgKCHCeAfXj5jA8,1
|
|
37
37
|
gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
|
38
38
|
gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
|
39
39
|
gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
|
40
|
-
gigaspatial/handlers/worldpop.py,sha256=
|
40
|
+
gigaspatial/handlers/worldpop.py,sha256=pkTmqb0k0vpa58t6tM3jfcpMHt1YuayLPFEFEULlrLs,30156
|
41
41
|
gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
|
42
42
|
gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
|
43
|
-
gigaspatial/processing/geo.py,sha256=
|
43
|
+
gigaspatial/processing/geo.py,sha256=8kD7-LQdGzKVfuZDWr3zK5uQhPzgxbZ3JBPosLRBJ5M,41390
|
44
44
|
gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
|
45
45
|
gigaspatial/processing/tif_processor.py,sha256=QLln9D-_zBhdYQL9NAL_bmo0bmmxE3sxDUQEglYQK94,27490
|
46
46
|
gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
|
47
|
-
giga_spatial-0.6.
|
48
|
-
giga_spatial-0.6.
|
49
|
-
giga_spatial-0.6.
|
50
|
-
giga_spatial-0.6.
|
47
|
+
giga_spatial-0.6.6.dist-info/METADATA,sha256=ZKoXmthabbL_5xJYHdQfk3ev4Dz02tWU6RAtpv0vWSU,7537
|
48
|
+
giga_spatial-0.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
49
|
+
giga_spatial-0.6.6.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
|
50
|
+
giga_spatial-0.6.6.dist-info/RECORD,,
|
gigaspatial/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.6.
|
1
|
+
__version__ = "0.6.6"
|
gigaspatial/core/io/data_api.py
CHANGED
@@ -35,7 +35,9 @@ class GigaDataAPI:
|
|
35
35
|
:param sort: Whether to sort the country list alphabetically (default is True).
|
36
36
|
"""
|
37
37
|
country_list = [
|
38
|
-
t.name
|
38
|
+
t.name
|
39
|
+
for t in self.client.list_all_tables()
|
40
|
+
if t.schema == self.schema_name
|
39
41
|
]
|
40
42
|
if sort:
|
41
43
|
country_list.sort()
|
gigaspatial/core/io/database.py
CHANGED
@@ -272,6 +272,7 @@ class DBConnection:
|
|
272
272
|
def read_sql_to_dask_dataframe(
|
273
273
|
self,
|
274
274
|
table_name: str,
|
275
|
+
index_col: str,
|
275
276
|
columns: Optional[List[str]] = None,
|
276
277
|
limit: Optional[int] = None,
|
277
278
|
**kwargs,
|
@@ -310,7 +311,9 @@ class DBConnection:
|
|
310
311
|
if limit:
|
311
312
|
query = query.limit(limit)
|
312
313
|
|
313
|
-
return dd.read_sql_query(
|
314
|
+
return dd.read_sql_query(
|
315
|
+
sql=query, con=connection_string, index_col=index_col, **kwargs
|
316
|
+
)
|
314
317
|
except Exception as e:
|
315
318
|
print(f"Error reading SQL to Dask DataFrame: {e}")
|
316
319
|
raise ValueError(f"Failed to read SQL to Dask DataFrame: {e}") from e
|
gigaspatial/generators/poi.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
from typing import List, Optional, Union, Tuple
|
2
|
+
from typing import List, Optional, Union, Tuple, Literal
|
3
3
|
from pydantic.dataclasses import dataclass, Field
|
4
4
|
|
5
5
|
import geopandas as gpd
|
@@ -13,6 +13,7 @@ from gigaspatial.config import config as global_config
|
|
13
13
|
from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
|
14
14
|
from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
|
15
15
|
from gigaspatial.handlers.ghsl import GHSLDataHandler
|
16
|
+
from gigaspatial.handlers.worldpop import WPPopulationHandler
|
16
17
|
from gigaspatial.processing.geo import (
|
17
18
|
convert_to_geodataframe,
|
18
19
|
buffer_geodataframe,
|
@@ -468,7 +469,7 @@ class PoiViewGenerator:
|
|
468
469
|
map_radius_meters: Optional[float] = None,
|
469
470
|
output_column: str = "zonal_stat",
|
470
471
|
value_column: Optional[str] = None,
|
471
|
-
|
472
|
+
predicate: Literal["intersects", "within", "fractional"] = "intersects",
|
472
473
|
**kwargs,
|
473
474
|
) -> pd.DataFrame:
|
474
475
|
"""
|
@@ -496,9 +497,8 @@ class PoiViewGenerator:
|
|
496
497
|
value_column (str, optional):
|
497
498
|
For polygon data: Name of the column to aggregate. Required for polygon data.
|
498
499
|
Not used for raster data.
|
499
|
-
|
500
|
-
|
501
|
-
intersection. Defaults to False.
|
500
|
+
predicate (Literal["intersects", "within", "fractional"], optional):
|
501
|
+
The spatial relationship to use for aggregation. Defaults to "intersects".
|
502
502
|
**kwargs:
|
503
503
|
Additional keyword arguments passed to the sampling/aggregation functions.
|
504
504
|
|
@@ -575,7 +575,7 @@ class PoiViewGenerator:
|
|
575
575
|
)
|
576
576
|
|
577
577
|
self.logger.info(
|
578
|
-
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
|
578
|
+
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
579
579
|
)
|
580
580
|
|
581
581
|
# Create buffers around POIs
|
@@ -591,12 +591,17 @@ class PoiViewGenerator:
|
|
591
591
|
zones=buffer_gdf,
|
592
592
|
value_columns=value_column,
|
593
593
|
aggregation=stat,
|
594
|
-
|
594
|
+
predicate=predicate,
|
595
595
|
zone_id_column="poi_id",
|
596
|
+
output_suffix="",
|
597
|
+
drop_geometry=True,
|
596
598
|
**kwargs,
|
597
599
|
)
|
598
600
|
|
599
|
-
results_df = aggregation_result_gdf[["poi_id", value_column]]
|
601
|
+
results_df = aggregation_result_gdf[["poi_id", value_column]]
|
602
|
+
|
603
|
+
if output_column != "zonal_stat":
|
604
|
+
results_df = results_df.rename(columns={value_column: output_column})
|
600
605
|
|
601
606
|
else:
|
602
607
|
raise ValueError(
|
@@ -662,7 +667,6 @@ class PoiViewGenerator:
|
|
662
667
|
|
663
668
|
def map_smod(
|
664
669
|
self,
|
665
|
-
stat="median",
|
666
670
|
dataset_year=2020,
|
667
671
|
dataset_resolution=1000,
|
668
672
|
output_column="smod_class",
|
@@ -703,11 +707,67 @@ class PoiViewGenerator:
|
|
703
707
|
|
704
708
|
return self.map_zonal_stats(
|
705
709
|
data=tif_processors,
|
706
|
-
stat=stat, # Use median for categorical data
|
707
710
|
output_column=output_column,
|
708
711
|
**kwargs,
|
709
712
|
)
|
710
713
|
|
714
|
+
def map_wp_pop(
|
715
|
+
self,
|
716
|
+
country: Union[str, List[str]],
|
717
|
+
map_radius_meters: float,
|
718
|
+
resolution=1000,
|
719
|
+
predicate: Literal[
|
720
|
+
"centroid_within", "intersects", "fractional", "within"
|
721
|
+
] = "fractional",
|
722
|
+
output_column: str = "population",
|
723
|
+
**kwargs,
|
724
|
+
):
|
725
|
+
if isinstance(country, str):
|
726
|
+
country = [country]
|
727
|
+
|
728
|
+
handler = WPPopulationHandler(
|
729
|
+
project="pop", resolution=resolution, data_store=self.data_store, **kwargs
|
730
|
+
)
|
731
|
+
|
732
|
+
self.logger.info(
|
733
|
+
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
734
|
+
)
|
735
|
+
|
736
|
+
if predicate == "fractional" and resolution == 100:
|
737
|
+
self.logger.warning(
|
738
|
+
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
739
|
+
)
|
740
|
+
predicate = "intersects"
|
741
|
+
|
742
|
+
if predicate == "centroid_within":
|
743
|
+
data = []
|
744
|
+
for c in country:
|
745
|
+
data.extend(
|
746
|
+
handler.load_data(c, ensure_available=self.config.ensure_available)
|
747
|
+
)
|
748
|
+
else:
|
749
|
+
data = pd.concat(
|
750
|
+
[
|
751
|
+
handler.load_into_geodataframe(
|
752
|
+
c, ensure_available=self.config.ensure_available
|
753
|
+
)
|
754
|
+
for c in country
|
755
|
+
],
|
756
|
+
ignore_index=True,
|
757
|
+
)
|
758
|
+
|
759
|
+
self.logger.info(f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic")
|
760
|
+
|
761
|
+
return self.map_zonal_stats(
|
762
|
+
data,
|
763
|
+
stat="sum",
|
764
|
+
map_radius_meters=map_radius_meters,
|
765
|
+
value_column="pixel_value",
|
766
|
+
predicate=predicate,
|
767
|
+
output_column=output_column,
|
768
|
+
**kwargs
|
769
|
+
)
|
770
|
+
|
711
771
|
def save_view(
|
712
772
|
self,
|
713
773
|
name: str,
|
@@ -778,8 +838,11 @@ class PoiViewGenerator:
|
|
778
838
|
Returns:
|
779
839
|
gpd.GeoDataFrame: The current view merged with point geometries.
|
780
840
|
"""
|
781
|
-
return
|
782
|
-
self.
|
841
|
+
return gpd.GeoDataFrame(
|
842
|
+
self.view.merge(
|
843
|
+
self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
|
844
|
+
),
|
845
|
+
crs="EPSG:4326",
|
783
846
|
)
|
784
847
|
|
785
848
|
def chain_operations(self, operations: List[dict]) -> "PoiViewGenerator":
|
@@ -13,7 +13,6 @@ from gigaspatial.core.io.local_data_store import LocalDataStore
|
|
13
13
|
from gigaspatial.core.io.writers import write_dataset
|
14
14
|
from gigaspatial.config import config as global_config
|
15
15
|
from gigaspatial.processing.geo import (
|
16
|
-
convert_to_geodataframe,
|
17
16
|
aggregate_polygons_to_zones,
|
18
17
|
aggregate_points_to_zones,
|
19
18
|
)
|
@@ -242,36 +241,37 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
242
241
|
if mapping_function is not None:
|
243
242
|
return mapping_function(self, points, **mapping_kwargs)
|
244
243
|
|
245
|
-
|
244
|
+
self.logger.warning(
|
245
|
+
"Using default points mapping implementation. Consider creating a specialized mapping function."
|
246
|
+
)
|
247
|
+
result = aggregate_points_to_zones(
|
248
|
+
points=points,
|
249
|
+
zones=self.zone_gdf,
|
250
|
+
value_columns=value_columns,
|
251
|
+
aggregation=aggregation,
|
252
|
+
point_zone_predicate=predicate,
|
253
|
+
zone_id_column="zone_id",
|
254
|
+
output_suffix=output_suffix,
|
255
|
+
)
|
256
|
+
|
257
|
+
if isinstance(value_columns, str):
|
258
|
+
return result.set_index("zone_id")[value_columns].to_dict()
|
259
|
+
elif isinstance(value_columns, list):
|
260
|
+
# If multiple value columns, return a dictionary of dictionaries
|
261
|
+
# Or, if preferred, a dictionary where values are lists/tuples of results
|
262
|
+
# For now, let's return a dict of series, which is common.
|
263
|
+
# The previous version implied a single dictionary result from map_points/polygons
|
264
|
+
# but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
|
265
|
+
# or {col_name: {zone_id: val}}
|
266
|
+
# In this version, it'll return a dictionary for each column.
|
267
|
+
return {
|
268
|
+
col: result.set_index("zone_id")[col].to_dict() for col in value_columns
|
269
|
+
}
|
270
|
+
else: # If value_columns is None, it should return point_count
|
246
271
|
self.logger.warning(
|
247
|
-
"
|
248
|
-
)
|
249
|
-
result = aggregate_points_to_zones(
|
250
|
-
points=points,
|
251
|
-
zones=self.zone_gdf,
|
252
|
-
value_columns=value_columns,
|
253
|
-
aggregation=aggregation,
|
254
|
-
point_zone_predicate=predicate,
|
255
|
-
zone_id_column="zone_id",
|
256
|
-
output_suffix=output_suffix,
|
272
|
+
"No `value_columns` provided. Mapping point counts. Consider passing `value_columns` and `aggregation` or `mapping_function`."
|
257
273
|
)
|
258
|
-
|
259
|
-
if isinstance(value_columns, str):
|
260
|
-
return result.set_index("zone_id")[value_columns].to_dict()
|
261
|
-
elif isinstance(value_columns, list):
|
262
|
-
# If multiple value columns, return a dictionary of dictionaries
|
263
|
-
# Or, if preferred, a dictionary where values are lists/tuples of results
|
264
|
-
# For now, let's return a dict of series, which is common.
|
265
|
-
# The previous version implied a single dictionary result from map_points/polygons
|
266
|
-
# but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
|
267
|
-
# or {col_name: {zone_id: val}}
|
268
|
-
# In this version, it'll return a dictionary for each column.
|
269
|
-
return {
|
270
|
-
col: result.set_index("zone_id")[col].to_dict()
|
271
|
-
for col in value_columns
|
272
|
-
}
|
273
|
-
else: # If value_columns is None, it should return point_count
|
274
|
-
return result.set_index("zone_id")["point_count"].to_dict()
|
274
|
+
return result.set_index("zone_id")["point_count"].to_dict()
|
275
275
|
|
276
276
|
def map_polygons(
|
277
277
|
self,
|
@@ -415,10 +415,6 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
415
415
|
if mapping_function is not None:
|
416
416
|
return mapping_function(self, tif_processors, **mapping_kwargs)
|
417
417
|
|
418
|
-
self.logger.warning(
|
419
|
-
"Using default raster mapping implementation. Consider creating a specialized mapping function."
|
420
|
-
)
|
421
|
-
|
422
418
|
raster_crs = tif_processors[0].crs
|
423
419
|
|
424
420
|
if raster_crs != self.zone_gdf.crs:
|
@@ -522,6 +518,9 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
522
518
|
Returns:
|
523
519
|
gpd.GeoDataFrame: The current view merged with zone geometries.
|
524
520
|
"""
|
525
|
-
return
|
526
|
-
self.
|
521
|
+
return gpd.GeoDataFrame(
|
522
|
+
(self.view).merge(
|
523
|
+
self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
|
524
|
+
),
|
525
|
+
crs=self.zone_gdf.crs,
|
527
526
|
)
|
@@ -6,7 +6,6 @@ import pandas as pd
|
|
6
6
|
import logging
|
7
7
|
|
8
8
|
from gigaspatial.core.io.data_store import DataStore
|
9
|
-
from gigaspatial.config import config as global_config
|
10
9
|
from gigaspatial.processing.geo import (
|
11
10
|
add_area_in_meters,
|
12
11
|
get_centroids,
|
@@ -14,6 +13,7 @@ from gigaspatial.processing.geo import (
|
|
14
13
|
from gigaspatial.handlers.ghsl import GHSLDataHandler
|
15
14
|
from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
|
16
15
|
from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
|
16
|
+
from gigaspatial.handlers.worldpop import WPPopulationHandler
|
17
17
|
from gigaspatial.generators.zonal.base import (
|
18
18
|
ZonalViewGenerator,
|
19
19
|
ZonalViewGeneratorConfig,
|
@@ -156,7 +156,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
156
156
|
year=2020,
|
157
157
|
resolution=100,
|
158
158
|
stat: str = "sum",
|
159
|
-
|
159
|
+
output_column: str = "built_surface_m2",
|
160
160
|
**kwargs,
|
161
161
|
) -> pd.DataFrame:
|
162
162
|
"""Map GHSL Built-up Surface data to zones.
|
@@ -165,15 +165,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
165
165
|
data using appropriate default parameters for built surface analysis.
|
166
166
|
|
167
167
|
Args:
|
168
|
-
|
169
|
-
|
168
|
+
year: The year of the data (default: 2020)
|
169
|
+
resolution: The resolution in meters (default: 100)
|
170
170
|
stat (str): Statistic to calculate for built surface values within each zone.
|
171
171
|
Defaults to "sum" which gives total built surface area.
|
172
|
-
|
173
|
-
|
172
|
+
output_column (str): The output column name. Defaults to "built_surface_m2".
|
174
173
|
Returns:
|
175
|
-
pd.DataFrame: Updated
|
176
|
-
Adds a column
|
174
|
+
pd.DataFrame: Updated view DataFrame and settlement classification.
|
175
|
+
Adds a column with `output_column` containing the aggregated values.
|
177
176
|
"""
|
178
177
|
handler = GHSLDataHandler(
|
179
178
|
product="GHS_BUILT_S",
|
@@ -184,7 +183,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
184
183
|
)
|
185
184
|
|
186
185
|
return self.map_ghsl(
|
187
|
-
handler=handler, stat=stat,
|
186
|
+
handler=handler, stat=stat, output_column=output_column, **kwargs
|
188
187
|
)
|
189
188
|
|
190
189
|
def map_smod(
|
@@ -192,7 +191,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
192
191
|
year=2020,
|
193
192
|
resolution=1000,
|
194
193
|
stat: str = "median",
|
195
|
-
|
194
|
+
output_column: str = "smod_class",
|
196
195
|
**kwargs,
|
197
196
|
) -> pd.DataFrame:
|
198
197
|
"""Map GHSL Settlement Model data to zones.
|
@@ -201,15 +200,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
201
200
|
data using appropriate default parameters for settlement classification analysis.
|
202
201
|
|
203
202
|
Args:
|
204
|
-
|
205
|
-
|
203
|
+
year: The year of the data (default: 2020)
|
204
|
+
resolution: The resolution in meters (default: 1000)
|
206
205
|
stat (str): Statistic to calculate for settlement class values within each zone.
|
207
206
|
Defaults to "median" which gives the predominant settlement class.
|
208
|
-
|
209
|
-
|
207
|
+
output_column (str): The output column name. Defaults to "smod_class".
|
210
208
|
Returns:
|
211
|
-
pd.DataFrame: Updated DataFrame
|
212
|
-
Adds a column
|
209
|
+
pd.DataFrame: Updated view DataFrame and settlement classification.
|
210
|
+
Adds a column with `output_column` containing the aggregated values.
|
213
211
|
"""
|
214
212
|
handler = GHSLDataHandler(
|
215
213
|
product="GHS_SMOD",
|
@@ -221,14 +219,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
221
219
|
)
|
222
220
|
|
223
221
|
return self.map_ghsl(
|
224
|
-
handler=handler, stat=stat,
|
222
|
+
handler=handler, stat=stat, output_column=output_column, **kwargs
|
225
223
|
)
|
226
224
|
|
227
225
|
def map_ghsl(
|
228
226
|
self,
|
229
227
|
handler: GHSLDataHandler,
|
230
228
|
stat: str,
|
231
|
-
|
229
|
+
output_column: Optional[str] = None,
|
232
230
|
**kwargs,
|
233
231
|
) -> pd.DataFrame:
|
234
232
|
"""Map Global Human Settlement Layer data to zones.
|
@@ -237,16 +235,15 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
237
235
|
the raster values within each zone using the specified statistic.
|
238
236
|
|
239
237
|
Args:
|
240
|
-
|
241
|
-
product, year, resolution, and coordinate system to use.
|
238
|
+
hander (GHSLDataHandler): Handler for the GHSL data.
|
242
239
|
stat (str): Statistic to calculate for raster values within each zone.
|
243
240
|
Common options: "mean", "sum", "median", "min", "max".
|
244
|
-
|
241
|
+
output_column (str): The output column name.
|
245
242
|
If None, uses the GHSL product name in lowercase followed by underscore.
|
246
243
|
|
247
244
|
Returns:
|
248
245
|
pd.DataFrame: Updated DataFrame with GHSL metrics.
|
249
|
-
Adds a column named
|
246
|
+
Adds a column named as `output_column` containing the sampled values.
|
250
247
|
|
251
248
|
Note:
|
252
249
|
The method automatically determines which GHSL tiles intersect with the zones
|
@@ -265,10 +262,12 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
265
262
|
)
|
266
263
|
sampled_values = self.map_rasters(tif_processors=tif_processors, stat=stat)
|
267
264
|
|
268
|
-
|
269
|
-
|
265
|
+
column_name = (
|
266
|
+
output_column
|
267
|
+
if output_column
|
268
|
+
else f"{handler.config.product.lower()}_{stat}"
|
270
269
|
)
|
271
|
-
|
270
|
+
|
272
271
|
self.add_variable_to_view(sampled_values, column_name)
|
273
272
|
|
274
273
|
return self.view
|
@@ -442,16 +441,14 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
442
441
|
|
443
442
|
def map_ghsl_pop(
|
444
443
|
self,
|
445
|
-
year=2020,
|
446
444
|
resolution=100,
|
447
445
|
stat: str = "sum",
|
448
|
-
|
446
|
+
output_column: str = "ghsl_pop",
|
449
447
|
predicate: Literal["intersects", "fractional"] = "intersects",
|
450
448
|
**kwargs,
|
451
449
|
):
|
452
450
|
handler = GHSLDataHandler(
|
453
451
|
product="GHS_POP",
|
454
|
-
year=year,
|
455
452
|
resolution=resolution,
|
456
453
|
data_store=self.data_store,
|
457
454
|
**kwargs,
|
@@ -464,7 +461,7 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
464
461
|
)
|
465
462
|
predicate = "intersects"
|
466
463
|
else:
|
467
|
-
gdf_pop = handler.load_into_geodataframe()
|
464
|
+
gdf_pop = handler.load_into_geodataframe(self.zone_gdf)
|
468
465
|
|
469
466
|
result = self.map_polygons(
|
470
467
|
gdf_pop,
|
@@ -473,10 +470,68 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
473
470
|
predicate="fractional",
|
474
471
|
)
|
475
472
|
|
476
|
-
|
477
|
-
self.add_variable_to_view(result, column_name)
|
473
|
+
self.add_variable_to_view(result, output_column)
|
478
474
|
return self.view
|
479
475
|
|
480
476
|
return self.map_ghsl(
|
481
|
-
handler=handler, stat=stat,
|
477
|
+
handler=handler, stat=stat, output_column=output_column, **kwargs
|
478
|
+
)
|
479
|
+
|
480
|
+
def map_wp_pop(
|
481
|
+
self,
|
482
|
+
country: Union[str, List[str]],
|
483
|
+
resolution=1000,
|
484
|
+
predicate: Literal["intersects", "fractional"] = "intersects",
|
485
|
+
output_column: str = "population",
|
486
|
+
**kwargs,
|
487
|
+
):
|
488
|
+
if isinstance(country, str):
|
489
|
+
country = [country]
|
490
|
+
|
491
|
+
handler = WPPopulationHandler(
|
492
|
+
project="pop", resolution=resolution, data_store=self.data_store, **kwargs
|
493
|
+
)
|
494
|
+
|
495
|
+
self.logger.info(
|
496
|
+
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
482
497
|
)
|
498
|
+
|
499
|
+
if predicate == "fractional":
|
500
|
+
if resolution == 100:
|
501
|
+
self.logger.warning(
|
502
|
+
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
503
|
+
)
|
504
|
+
predicate = "intersects"
|
505
|
+
else:
|
506
|
+
gdf_pop = pd.concat(
|
507
|
+
[
|
508
|
+
handler.load_into_geodataframe(
|
509
|
+
c, ensure_available=self.config.ensure_available
|
510
|
+
)
|
511
|
+
for c in country
|
512
|
+
],
|
513
|
+
ignore_index=True,
|
514
|
+
)
|
515
|
+
|
516
|
+
result = self.map_polygons(
|
517
|
+
gdf_pop,
|
518
|
+
value_columns="pixel_value",
|
519
|
+
aggregation="sum",
|
520
|
+
predicate=predicate,
|
521
|
+
)
|
522
|
+
|
523
|
+
self.add_variable_to_view(result, output_column)
|
524
|
+
return self.view
|
525
|
+
|
526
|
+
tif_processors = []
|
527
|
+
for c in country:
|
528
|
+
tif_processors.extend(
|
529
|
+
handler.load_data(c, ensure_available=self.config.ensure_available)
|
530
|
+
)
|
531
|
+
|
532
|
+
self.logger.info(f"Sampling WorldPop Population data using 'sum' statistic")
|
533
|
+
sampled_values = self.map_rasters(tif_processors=tif_processors, stat="sum")
|
534
|
+
|
535
|
+
self.add_variable_to_view(sampled_values, output_column)
|
536
|
+
|
537
|
+
return self.view
|
gigaspatial/handlers/__init__.py
CHANGED
@@ -21,7 +21,14 @@ from gigaspatial.handlers.osm import OSMLocationFetcher
|
|
21
21
|
from gigaspatial.handlers.overture import OvertureAmenityFetcher
|
22
22
|
from gigaspatial.handlers.mapbox_image import MapboxImageDownloader
|
23
23
|
from gigaspatial.handlers.maxar_image import MaxarConfig, MaxarImageDownloader
|
24
|
-
|
24
|
+
|
25
|
+
from gigaspatial.handlers.worldpop import (
|
26
|
+
WPPopulationConfig,
|
27
|
+
WPPopulationReader,
|
28
|
+
WPPopulationDownloader,
|
29
|
+
WPPopulationHandler,
|
30
|
+
WorldPopRestClient,
|
31
|
+
)
|
25
32
|
from gigaspatial.handlers.ookla_speedtest import (
|
26
33
|
OoklaSpeedtestTileConfig,
|
27
34
|
OoklaSpeedtestConfig,
|