giga-spatial 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/METADATA +1 -1
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/RECORD +18 -17
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +1 -0
- gigaspatial/generators/poi.py +226 -82
- gigaspatial/generators/zonal/base.py +41 -28
- gigaspatial/generators/zonal/geometry.py +79 -36
- gigaspatial/handlers/base.py +22 -8
- gigaspatial/handlers/ghsl.py +22 -8
- gigaspatial/handlers/giga.py +9 -4
- gigaspatial/handlers/healthsites.py +350 -0
- gigaspatial/handlers/osm.py +325 -105
- gigaspatial/handlers/worldpop.py +228 -9
- gigaspatial/processing/geo.py +1 -1
- gigaspatial/processing/tif_processor.py +831 -300
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/WHEEL +0 -0
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
giga_spatial-0.7.
|
2
|
-
gigaspatial/__init__.py,sha256=
|
3
|
-
gigaspatial/config.py,sha256=
|
1
|
+
giga_spatial-0.7.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
2
|
+
gigaspatial/__init__.py,sha256=2KJZDSMOG7KS82AxYOrZ4ZihYxX0wjfUjDsIZh3L024,22
|
3
|
+
gigaspatial/config.py,sha256=xqP800jaDMYE7-cgHXSGGwuF1fmo5Q56-DqpJ61p8a0,9382
|
4
4
|
gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
|
6
6
|
gigaspatial/core/io/adls_data_store.py,sha256=Pr1FKpzFb2TSeidMSGhhNSDk1tFxAi4G1aPZ_No_xUs,15783
|
@@ -13,39 +13,40 @@ gigaspatial/core/io/writers.py,sha256=asb56ZHQEWO2rdilIq7QywDRk8yfebecWv1KwzUpaX
|
|
13
13
|
gigaspatial/core/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
gigaspatial/core/schemas/entity.py,sha256=QAhEW0-JgdWh9pjKGbB5ArvqtVK85ayYZJPgjdb-IKw,8590
|
15
15
|
gigaspatial/generators/__init__.py,sha256=cKbMTW7Eh-oDPtM9OfGP14_ekVwc2_7Je7n_pr_anig,223
|
16
|
-
gigaspatial/generators/poi.py,sha256=
|
16
|
+
gigaspatial/generators/poi.py,sha256=tlLqVsXOjl83OL3Z6yajz9BLtZVbhZF67A-el-F2oU4,42356
|
17
17
|
gigaspatial/generators/zonal/__init__.py,sha256=egnpvGVeIOS2Zg516AT84tJnIqS4owxmMLLmBQJmK7Y,301
|
18
18
|
gigaspatial/generators/zonal/admin.py,sha256=rgOyQX3f_g9qnXqrf-NkR2GEdwOqjNuPNe1H7AUVsfg,3698
|
19
|
-
gigaspatial/generators/zonal/base.py,sha256=
|
20
|
-
gigaspatial/generators/zonal/geometry.py,sha256=
|
19
|
+
gigaspatial/generators/zonal/base.py,sha256=byyfv4dNpOMzszBMMbKQlZuHMlq2e84Hboh0W58f-8k,22959
|
20
|
+
gigaspatial/generators/zonal/geometry.py,sha256=0EF3VXAVOcj5AtcubGWU325biLaZ76zt3Sf1uyKEE88,23358
|
21
21
|
gigaspatial/generators/zonal/mercator.py,sha256=fA02j30PWB5BVjrbNGCMjiOw-ds182yK7R27z8mWFug,5291
|
22
22
|
gigaspatial/grid/__init__.py,sha256=ypSSyZ4fYtMNc4IG7chSD7NkUfS2bv9KWRsKR1D9pDI,80
|
23
23
|
gigaspatial/grid/h3.py,sha256=0d_rNNvwxc0LDD2qKCd2gTy_16_5sgPwJwk29z5pE9U,15310
|
24
24
|
gigaspatial/grid/mercator_tiles.py,sha256=dzUDbXe0QHEC4s334IVT76d41D35dbd-QiN2LnkDdvA,11240
|
25
25
|
gigaspatial/handlers/__init__.py,sha256=T0_6OxXQ59yTu9g2P6P9vnepudOWp_85R4WQKxRF94c,1618
|
26
|
-
gigaspatial/handlers/base.py,sha256=
|
26
|
+
gigaspatial/handlers/base.py,sha256=JSZ64eWRoF17bKvVj2Bq3lFA00uJ4i6qlwp72O4HSv4,27885
|
27
27
|
gigaspatial/handlers/boundaries.py,sha256=jtWyQt3iAzS77mbAOi7mjh3cv_YCV3uB_r1h56gCfeY,20729
|
28
|
-
gigaspatial/handlers/ghsl.py,sha256=
|
29
|
-
gigaspatial/handlers/giga.py,sha256=
|
28
|
+
gigaspatial/handlers/ghsl.py,sha256=QvW7GeHWQSrPdxXxmR_61UcfBIHziNpCHCxRXg1nnug,31285
|
29
|
+
gigaspatial/handlers/giga.py,sha256=tNy2O0YAdy4iYWjMNKUadV_W2HxcTsb-aG4te6wvMOk,28660
|
30
30
|
gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
|
31
31
|
gigaspatial/handlers/hdx.py,sha256=1m6oG1DeEC_RLFtb6CrTReWpbQ5uG2e8EIt-IUkZbaI,18122
|
32
|
+
gigaspatial/handlers/healthsites.py,sha256=gJCnKDXE4Gu-3Z6Io0-EknXfSdOxBHdr4-JV0z9bul0,12829
|
32
33
|
gigaspatial/handlers/mapbox_image.py,sha256=M_nkJ_b1PD8FG1ajVgSycCb0NRTAI_SLpHdzszNetKA,7786
|
33
34
|
gigaspatial/handlers/maxar_image.py,sha256=kcc8uGljQB0Yh0MKBA7lT7KwBbNZwFzuyBklR3db1P4,10204
|
34
35
|
gigaspatial/handlers/microsoft_global_buildings.py,sha256=bQ5WHIv3v0wWrZZUbZkKPRjgdlqIxlK7CV_0zSvdrTw,20292
|
35
36
|
gigaspatial/handlers/ookla_speedtest.py,sha256=EcvSAxJZ9GPfzYnT_C85Qgy2ecc9ndf70Pklk53OdC8,6506
|
36
37
|
gigaspatial/handlers/opencellid.py,sha256=KuJqd-5-RO5ZzyDaBSrTgCK2ib5N_m3RUcPlX5heWwI,10683
|
37
|
-
gigaspatial/handlers/osm.py,sha256=
|
38
|
+
gigaspatial/handlers/osm.py,sha256=gyY_a2OoRZjL14iOTEKLCPbKTcmSDHc72Q0hluaENrY,25464
|
38
39
|
gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
|
39
40
|
gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
|
40
41
|
gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
|
41
|
-
gigaspatial/handlers/worldpop.py,sha256=
|
42
|
+
gigaspatial/handlers/worldpop.py,sha256=5gSAhTPs3l8cVflx_Vp4ZxhvcUKKb_r2hGmc_sXCgio,39893
|
42
43
|
gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
|
43
44
|
gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
|
44
|
-
gigaspatial/processing/geo.py,sha256=
|
45
|
+
gigaspatial/processing/geo.py,sha256=ekGbU1sEn10zX3esYIEba6eZyhpx9WOjPlVEkspqcNk,41674
|
45
46
|
gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
|
46
|
-
gigaspatial/processing/tif_processor.py,sha256=
|
47
|
+
gigaspatial/processing/tif_processor.py,sha256=WzD3inLUdr50fl11WPF6czvG1d8s7y2qdTBOc5Yo2yw,66400
|
47
48
|
gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
|
48
|
-
giga_spatial-0.7.
|
49
|
-
giga_spatial-0.7.
|
50
|
-
giga_spatial-0.7.
|
51
|
-
giga_spatial-0.7.
|
49
|
+
giga_spatial-0.7.1.dist-info/METADATA,sha256=Fo_X_aPJg9Mv1GfX4esviiljEhcK4n7imbACWKrNo6s,9277
|
50
|
+
giga_spatial-0.7.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
51
|
+
giga_spatial-0.7.1.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
|
52
|
+
giga_spatial-0.7.1.dist-info/RECORD,,
|
gigaspatial/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.7.
|
1
|
+
__version__ = "0.7.1"
|
gigaspatial/config.py
CHANGED
@@ -38,6 +38,7 @@ class Config(BaseSettings):
|
|
38
38
|
GIGA_SCHOOL_MEASUREMENTS_API_KEY: str = Field(
|
39
39
|
default="", alias="GIGA_SCHOOL_MEASUREMENTS_API_KEY"
|
40
40
|
)
|
41
|
+
HEALTHSITES_API_KEY: str = Field(default="", alias="HEALTHSITES_API_KEY")
|
41
42
|
|
42
43
|
ROOT_DATA_DIR: Path = Field(
|
43
44
|
default=Path("."),
|
gigaspatial/generators/poi.py
CHANGED
@@ -18,14 +18,11 @@ from gigaspatial.processing.geo import (
|
|
18
18
|
convert_to_geodataframe,
|
19
19
|
buffer_geodataframe,
|
20
20
|
detect_coordinate_columns,
|
21
|
+
aggregate_points_to_zones,
|
21
22
|
aggregate_polygons_to_zones,
|
22
23
|
get_centroids,
|
23
24
|
)
|
24
|
-
from gigaspatial.processing.tif_processor import
|
25
|
-
sample_multiple_tifs_by_polygons,
|
26
|
-
sample_multiple_tifs_by_coordinates,
|
27
|
-
TifProcessor,
|
28
|
-
)
|
25
|
+
from gigaspatial.processing.tif_processor import TifProcessor
|
29
26
|
from scipy.spatial import cKDTree
|
30
27
|
|
31
28
|
|
@@ -165,7 +162,9 @@ class PoiViewGenerator:
|
|
165
162
|
raise ValueError(
|
166
163
|
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
167
164
|
)
|
168
|
-
return convert_to_geodataframe(
|
165
|
+
return convert_to_geodataframe(
|
166
|
+
points, lat_col="latitude", lon_col="longitude"
|
167
|
+
)
|
169
168
|
except ValueError as e:
|
170
169
|
raise ValueError(
|
171
170
|
f"Could not detect coordinate columns in DataFrame: {str(e)}"
|
@@ -202,7 +201,9 @@ class PoiViewGenerator:
|
|
202
201
|
raise ValueError(
|
203
202
|
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
204
203
|
)
|
205
|
-
return convert_to_geodataframe(
|
204
|
+
return convert_to_geodataframe(
|
205
|
+
df, lat_col="latitude", lon_col="longitude"
|
206
|
+
)
|
206
207
|
except ValueError as e:
|
207
208
|
raise ValueError(
|
208
209
|
f"Could not detect coordinate columns in dictionary list: {str(e)}"
|
@@ -365,11 +366,11 @@ class PoiViewGenerator:
|
|
365
366
|
f"{output_prefix}_distance": dist,
|
366
367
|
}
|
367
368
|
)
|
368
|
-
self._update_view(temp_result_df)
|
369
|
+
# self._update_view(temp_result_df) # Removed direct view update
|
369
370
|
self.logger.info(
|
370
371
|
f"Nearest points mapping complete with prefix '{output_prefix}'"
|
371
372
|
)
|
372
|
-
return
|
373
|
+
return temp_result_df # Return the DataFrame
|
373
374
|
|
374
375
|
def map_google_buildings(
|
375
376
|
self,
|
@@ -405,12 +406,14 @@ class PoiViewGenerator:
|
|
405
406
|
self.logger.info("No Google buildings data found for the provided POIs")
|
406
407
|
return self.view
|
407
408
|
|
408
|
-
|
409
|
+
mapped_data = self.map_nearest_points(
|
409
410
|
points_df=buildings_df,
|
410
411
|
id_column="full_plus_code",
|
411
412
|
output_prefix="nearest_google_building",
|
412
413
|
**kwargs,
|
413
414
|
)
|
415
|
+
self._update_view(mapped_data)
|
416
|
+
return self.view
|
414
417
|
|
415
418
|
def map_ms_buildings(
|
416
419
|
self,
|
@@ -455,12 +458,14 @@ class PoiViewGenerator:
|
|
455
458
|
axis=1,
|
456
459
|
)
|
457
460
|
|
458
|
-
|
461
|
+
mapped_data = self.map_nearest_points(
|
459
462
|
points_df=building_centroids,
|
460
463
|
id_column="building_id",
|
461
464
|
output_prefix="nearest_ms_building",
|
462
465
|
**kwargs,
|
463
466
|
)
|
467
|
+
self._update_view(mapped_data)
|
468
|
+
return self.view
|
464
469
|
|
465
470
|
def map_zonal_stats(
|
466
471
|
self,
|
@@ -481,9 +486,10 @@ class PoiViewGenerator:
|
|
481
486
|
3. Polygon aggregation: Aggregates polygon data to POI buffers with optional area weighting
|
482
487
|
|
483
488
|
Args:
|
484
|
-
data (Union[List[TifProcessor], gpd.GeoDataFrame]):
|
485
|
-
Either a list of TifProcessor objects
|
486
|
-
or a GeoDataFrame containing polygon
|
489
|
+
data (Union[TifProcessor, List[TifProcessor], gpd.GeoDataFrame]):
|
490
|
+
Either a TifProcessor object, a list of TifProcessor objects (which will be merged
|
491
|
+
into a single TifProcessor for processing), or a GeoDataFrame containing polygon
|
492
|
+
data to aggregate.
|
487
493
|
stat (str, optional):
|
488
494
|
For raster data: Statistic to calculate ("sum", "mean", "median", "min", "max").
|
489
495
|
For polygon data: Aggregation method to use.
|
@@ -512,20 +518,32 @@ class PoiViewGenerator:
|
|
512
518
|
or if required parameters (value_column) are missing for polygon data.
|
513
519
|
"""
|
514
520
|
|
515
|
-
|
516
|
-
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
521
|
+
raster_processor: Optional[TifProcessor] = None
|
517
522
|
|
518
|
-
|
523
|
+
if isinstance(data, TifProcessor):
|
524
|
+
raster_processor = data
|
525
|
+
elif isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
|
519
526
|
if not data:
|
520
|
-
self.logger.info("No valid raster data
|
527
|
+
self.logger.info("No valid raster data provided")
|
521
528
|
return self.view
|
522
529
|
|
523
|
-
|
530
|
+
if len(data) > 1:
|
531
|
+
all_source_paths = [tp.dataset_path for tp in data]
|
524
532
|
|
525
|
-
|
526
|
-
|
527
|
-
|
533
|
+
self.logger.info(
|
534
|
+
f"Merging {len(all_source_paths)} rasters into a single TifProcessor for zonal statistics."
|
535
|
+
)
|
536
|
+
raster_processor = TifProcessor(
|
537
|
+
dataset_path=all_source_paths,
|
538
|
+
data_store=self.data_store,
|
539
|
+
**kwargs,
|
528
540
|
)
|
541
|
+
else:
|
542
|
+
raster_processor = data[0]
|
543
|
+
|
544
|
+
if raster_processor:
|
545
|
+
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
546
|
+
raster_crs = raster_processor.crs
|
529
547
|
|
530
548
|
if map_radius_meters is not None:
|
531
549
|
self.logger.info(
|
@@ -539,11 +557,9 @@ class PoiViewGenerator:
|
|
539
557
|
)
|
540
558
|
|
541
559
|
# Calculate zonal statistics
|
542
|
-
sampled_values =
|
543
|
-
tif_processors=data,
|
560
|
+
sampled_values = raster_processor.sample_by_polygons(
|
544
561
|
polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
|
545
562
|
stat=stat,
|
546
|
-
**kwargs,
|
547
563
|
)
|
548
564
|
else:
|
549
565
|
self.logger.info(f"Sampling {stat} at POI locations")
|
@@ -551,8 +567,8 @@ class PoiViewGenerator:
|
|
551
567
|
coord_list = (
|
552
568
|
self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
|
553
569
|
)
|
554
|
-
sampled_values =
|
555
|
-
|
570
|
+
sampled_values = raster_processor.sample_by_coordinates(
|
571
|
+
coordinate_list=coord_list, **kwargs
|
556
572
|
)
|
557
573
|
|
558
574
|
results_df[output_column] = sampled_values
|
@@ -560,24 +576,16 @@ class PoiViewGenerator:
|
|
560
576
|
elif isinstance(data, gpd.GeoDataFrame):
|
561
577
|
# Handle polygon data
|
562
578
|
if data.empty:
|
563
|
-
self.logger.info("No valid
|
564
|
-
return
|
579
|
+
self.logger.info("No valid GeoDataFrame data provided")
|
580
|
+
return pd.DataFrame(
|
581
|
+
columns=["poi_id", output_column]
|
582
|
+
) # Return empty DataFrame
|
565
583
|
|
566
584
|
if map_radius_meters is None:
|
567
|
-
raise ValueError("map_radius_meters must be provided for polygon data")
|
568
|
-
|
569
|
-
if value_column is None:
|
570
|
-
raise ValueError("value_column must be provided for polygon data")
|
571
|
-
|
572
|
-
if value_column not in data.columns:
|
573
585
|
raise ValueError(
|
574
|
-
|
586
|
+
"map_radius_meters must be provided for for GeoDataFrame data"
|
575
587
|
)
|
576
588
|
|
577
|
-
self.logger.info(
|
578
|
-
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
579
|
-
)
|
580
|
-
|
581
589
|
# Create buffers around POIs
|
582
590
|
buffer_gdf = buffer_geodataframe(
|
583
591
|
self.points_gdf,
|
@@ -585,34 +593,92 @@ class PoiViewGenerator:
|
|
585
593
|
cap_style="round",
|
586
594
|
)
|
587
595
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
596
|
+
if any(data.geom_type.isin(["MultiPoint", "Point"])):
|
597
|
+
|
598
|
+
self.logger.info(
|
599
|
+
f"Aggregating point data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
600
|
+
)
|
601
|
+
|
602
|
+
# If no value_column, default to 'count'
|
603
|
+
if value_column is None:
|
604
|
+
actual_stat = "count"
|
605
|
+
self.logger.warning(
|
606
|
+
"No value_column provided for point data. Defaulting to 'count' aggregation."
|
607
|
+
)
|
608
|
+
else:
|
609
|
+
actual_stat = stat
|
610
|
+
if value_column not in data.columns:
|
611
|
+
raise ValueError(
|
612
|
+
f"Value column '{value_column}' not found in input GeoDataFrame."
|
613
|
+
)
|
614
|
+
|
615
|
+
aggregation_result_gdf = aggregate_points_to_zones(
|
616
|
+
points=data,
|
617
|
+
zones=buffer_gdf,
|
618
|
+
value_columns=value_column,
|
619
|
+
aggregation=actual_stat,
|
620
|
+
point_zone_predicate=predicate, # can't be `fractional``
|
621
|
+
zone_id_column="poi_id",
|
622
|
+
output_suffix="",
|
623
|
+
drop_geometry=True,
|
624
|
+
**kwargs,
|
625
|
+
)
|
626
|
+
|
627
|
+
output_col_from_agg = (
|
628
|
+
f"{value_column}_{actual_stat}" if value_column else "point_count"
|
629
|
+
)
|
630
|
+
results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
|
631
|
+
|
632
|
+
if output_column != "zonal_stat":
|
633
|
+
results_df = results_df.rename(
|
634
|
+
columns={output_col_from_agg: output_column}
|
635
|
+
)
|
636
|
+
|
637
|
+
else:
|
638
|
+
if value_column is None:
|
639
|
+
raise ValueError(
|
640
|
+
"value_column must be provided for polygon data aggregation."
|
641
|
+
)
|
642
|
+
if value_column not in data.columns:
|
643
|
+
raise ValueError(
|
644
|
+
f"Value column '{value_column}' not found in input GeoDataFrame."
|
645
|
+
)
|
646
|
+
self.logger.info(
|
647
|
+
f"Aggregating polygon data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
648
|
+
)
|
600
649
|
|
601
|
-
|
650
|
+
# Aggregate polygons to buffers
|
651
|
+
aggregation_result_gdf = aggregate_polygons_to_zones(
|
652
|
+
polygons=data,
|
653
|
+
zones=buffer_gdf,
|
654
|
+
value_columns=value_column,
|
655
|
+
aggregation=stat,
|
656
|
+
predicate=predicate,
|
657
|
+
zone_id_column="poi_id",
|
658
|
+
output_suffix="",
|
659
|
+
drop_geometry=True,
|
660
|
+
**kwargs,
|
661
|
+
)
|
662
|
+
|
663
|
+
output_col_from_agg = value_column
|
664
|
+
|
665
|
+
results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
|
602
666
|
|
603
667
|
if output_column != "zonal_stat":
|
604
|
-
results_df = results_df.rename(
|
668
|
+
results_df = results_df.rename(
|
669
|
+
columns={output_col_from_agg: output_column}
|
670
|
+
)
|
605
671
|
|
606
672
|
else:
|
607
673
|
raise ValueError(
|
608
674
|
"data must be either a list of TifProcessor objects or a GeoDataFrame"
|
609
675
|
)
|
610
676
|
|
611
|
-
self._update_view(results_df)
|
677
|
+
# self._update_view(results_df) # Removed direct view update
|
612
678
|
self.logger.info(
|
613
679
|
f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
|
614
680
|
)
|
615
|
-
return
|
681
|
+
return results_df # Return the DataFrame
|
616
682
|
|
617
683
|
def map_built_s(
|
618
684
|
self,
|
@@ -654,16 +720,20 @@ class PoiViewGenerator:
|
|
654
720
|
)
|
655
721
|
self.logger.info("Loading GHSL Built Surface raster tiles")
|
656
722
|
tif_processors = handler.load_data(
|
657
|
-
self.points_gdf.copy(),
|
723
|
+
self.points_gdf.copy(),
|
724
|
+
ensure_available=self.config.ensure_available,
|
725
|
+
merge_rasters=True,
|
658
726
|
)
|
659
727
|
|
660
|
-
|
728
|
+
mapped_data = self.map_zonal_stats(
|
661
729
|
data=tif_processors,
|
662
730
|
stat=stat,
|
663
731
|
map_radius_meters=map_radius_meters,
|
664
732
|
output_column=output_column,
|
665
733
|
**kwargs,
|
666
734
|
)
|
735
|
+
self._update_view(mapped_data)
|
736
|
+
return self.view
|
667
737
|
|
668
738
|
def map_smod(
|
669
739
|
self,
|
@@ -702,14 +772,18 @@ class PoiViewGenerator:
|
|
702
772
|
|
703
773
|
self.logger.info("Loading GHSL SMOD raster tiles")
|
704
774
|
tif_processors = handler.load_data(
|
705
|
-
self.points_gdf.copy(),
|
775
|
+
self.points_gdf.copy(),
|
776
|
+
ensure_available=self.config.ensure_available,
|
777
|
+
merge_rasters=True,
|
706
778
|
)
|
707
779
|
|
708
|
-
|
780
|
+
mapped_data = self.map_zonal_stats(
|
709
781
|
data=tif_processors,
|
710
782
|
output_column=output_column,
|
711
783
|
**kwargs,
|
712
784
|
)
|
785
|
+
self._update_view(mapped_data)
|
786
|
+
return self.view
|
713
787
|
|
714
788
|
def map_wp_pop(
|
715
789
|
self,
|
@@ -718,17 +792,25 @@ class PoiViewGenerator:
|
|
718
792
|
resolution=1000,
|
719
793
|
predicate: Literal[
|
720
794
|
"centroid_within", "intersects", "fractional", "within"
|
721
|
-
] = "
|
795
|
+
] = "intersects",
|
722
796
|
output_column: str = "population",
|
723
797
|
**kwargs,
|
724
798
|
):
|
725
|
-
|
726
|
-
|
799
|
+
# Ensure country is always a list for consistent handling
|
800
|
+
countries_list = [country] if isinstance(country, str) else country
|
727
801
|
|
728
802
|
handler = WPPopulationHandler(
|
729
|
-
|
803
|
+
resolution=resolution,
|
804
|
+
data_store=self.data_store,
|
805
|
+
**kwargs,
|
730
806
|
)
|
731
807
|
|
808
|
+
# Restrict to single country for age_structures project
|
809
|
+
if handler.config.project == "age_structures" and len(countries_list) > 1:
|
810
|
+
raise ValueError(
|
811
|
+
"For 'age_structures' project, only a single country can be processed at a time."
|
812
|
+
)
|
813
|
+
|
732
814
|
self.logger.info(
|
733
815
|
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
734
816
|
)
|
@@ -738,36 +820,98 @@ class PoiViewGenerator:
|
|
738
820
|
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
739
821
|
)
|
740
822
|
predicate = "intersects"
|
741
|
-
|
823
|
+
|
824
|
+
data_to_process: Union[List[TifProcessor], gpd.GeoDataFrame, pd.DataFrame]
|
825
|
+
|
742
826
|
if predicate == "centroid_within":
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
827
|
+
if handler.config.project == "age_structures":
|
828
|
+
# Load individual tif processors for the single country
|
829
|
+
all_tif_processors = handler.load_data(
|
830
|
+
countries_list[0],
|
831
|
+
ensure_available=self.config.ensure_available,
|
832
|
+
**kwargs,
|
747
833
|
)
|
834
|
+
|
835
|
+
# Sum results from each tif_processor separately
|
836
|
+
summed_results_by_poi = {
|
837
|
+
poi_id: 0.0 for poi_id in self.points_gdf["poi_id"].unique()
|
838
|
+
}
|
839
|
+
|
840
|
+
self.logger.info(
|
841
|
+
f"Sampling individual age_structures rasters using 'sum' statistic and summing per POI."
|
842
|
+
)
|
843
|
+
for tif_processor in all_tif_processors:
|
844
|
+
single_raster_df = self.map_zonal_stats(
|
845
|
+
data=tif_processor,
|
846
|
+
stat="sum",
|
847
|
+
map_radius_meters=map_radius_meters,
|
848
|
+
value_column="pixel_value",
|
849
|
+
predicate=predicate,
|
850
|
+
output_column=output_column, # This output_column will be in the temporary DF
|
851
|
+
**kwargs,
|
852
|
+
)
|
853
|
+
# Add values from this single raster to the cumulative sum
|
854
|
+
for _, row in single_raster_df.iterrows():
|
855
|
+
summed_results_by_poi[row["poi_id"]] += row[output_column]
|
856
|
+
|
857
|
+
# Convert the summed dictionary back to a DataFrame
|
858
|
+
data_to_process = pd.DataFrame(
|
859
|
+
list(summed_results_by_poi.items()),
|
860
|
+
columns=["poi_id", output_column],
|
861
|
+
)
|
862
|
+
|
863
|
+
else:
|
864
|
+
# Existing behavior for non-age_structures projects or if merging is fine
|
865
|
+
# 'data_to_process' will be a list of TifProcessor objects, which map_zonal_stats will merge
|
866
|
+
data_to_process = []
|
867
|
+
for c in countries_list:
|
868
|
+
data_to_process.extend(
|
869
|
+
handler.load_data(
|
870
|
+
c, ensure_available=self.config.ensure_available, **kwargs
|
871
|
+
)
|
872
|
+
)
|
748
873
|
else:
|
749
|
-
|
874
|
+
# 'data_to_process' will be a GeoDataFrame
|
875
|
+
data_to_process = pd.concat(
|
750
876
|
[
|
751
877
|
handler.load_into_geodataframe(
|
752
|
-
c, ensure_available=self.config.ensure_available
|
878
|
+
c, ensure_available=self.config.ensure_available, **kwargs
|
753
879
|
)
|
754
|
-
for c in
|
880
|
+
for c in countries_list # Original iteration over countries_list
|
755
881
|
],
|
756
882
|
ignore_index=True,
|
757
883
|
)
|
758
884
|
|
759
|
-
self.logger.info(
|
760
|
-
|
761
|
-
return self.map_zonal_stats(
|
762
|
-
data,
|
763
|
-
stat="sum",
|
764
|
-
map_radius_meters=map_radius_meters,
|
765
|
-
value_column="pixel_value",
|
766
|
-
predicate=predicate,
|
767
|
-
output_column=output_column,
|
768
|
-
**kwargs
|
885
|
+
self.logger.info(
|
886
|
+
f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic"
|
769
887
|
)
|
770
888
|
|
889
|
+
final_mapped_df: pd.DataFrame
|
890
|
+
|
891
|
+
# If 'data_to_process' is already the summed DataFrame (from age_structures/centroid_within branch),
|
892
|
+
# use it directly.
|
893
|
+
if (
|
894
|
+
isinstance(data_to_process, pd.DataFrame)
|
895
|
+
and output_column in data_to_process.columns
|
896
|
+
and "poi_id" in data_to_process.columns
|
897
|
+
):
|
898
|
+
final_mapped_df = data_to_process
|
899
|
+
else:
|
900
|
+
# For other cases, proceed with the original call to map_zonal_stats
|
901
|
+
final_mapped_df = self.map_zonal_stats(
|
902
|
+
data=data_to_process,
|
903
|
+
stat="sum",
|
904
|
+
map_radius_meters=map_radius_meters,
|
905
|
+
value_column="pixel_value",
|
906
|
+
predicate=predicate,
|
907
|
+
output_column=output_column,
|
908
|
+
**kwargs,
|
909
|
+
)
|
910
|
+
self._update_view(
|
911
|
+
final_mapped_df
|
912
|
+
) # Update the view with the final mapped DataFrame
|
913
|
+
return self.view
|
914
|
+
|
771
915
|
def save_view(
|
772
916
|
self,
|
773
917
|
name: str,
|