giga-spatial 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: A package for spatial data download & processing
5
5
  Author: Utku Can Ozturk
6
6
  Author-email: utkucanozturk@gmail.com
@@ -1,6 +1,6 @@
1
- giga_spatial-0.7.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
- gigaspatial/__init__.py,sha256=RaANGbRu5e-vehwXI1-Qe2ggPPfs1TQaZj072JdbLk4,22
3
- gigaspatial/config.py,sha256=pLbxGc08OHT2IfTBzZVuIJTPR2vvg3KTFfvciOtRswk,9304
1
+ giga_spatial-0.7.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
+ gigaspatial/__init__.py,sha256=2KJZDSMOG7KS82AxYOrZ4ZihYxX0wjfUjDsIZh3L024,22
3
+ gigaspatial/config.py,sha256=xqP800jaDMYE7-cgHXSGGwuF1fmo5Q56-DqpJ61p8a0,9382
4
4
  gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
6
6
  gigaspatial/core/io/adls_data_store.py,sha256=Pr1FKpzFb2TSeidMSGhhNSDk1tFxAi4G1aPZ_No_xUs,15783
@@ -13,39 +13,40 @@ gigaspatial/core/io/writers.py,sha256=asb56ZHQEWO2rdilIq7QywDRk8yfebecWv1KwzUpaX
13
13
  gigaspatial/core/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  gigaspatial/core/schemas/entity.py,sha256=QAhEW0-JgdWh9pjKGbB5ArvqtVK85ayYZJPgjdb-IKw,8590
15
15
  gigaspatial/generators/__init__.py,sha256=cKbMTW7Eh-oDPtM9OfGP14_ekVwc2_7Je7n_pr_anig,223
16
- gigaspatial/generators/poi.py,sha256=FMYH2yPFuCeYiwj-pHYe83bAxXPkQP28VDQx5L2eEnQ,35869
16
+ gigaspatial/generators/poi.py,sha256=tlLqVsXOjl83OL3Z6yajz9BLtZVbhZF67A-el-F2oU4,42356
17
17
  gigaspatial/generators/zonal/__init__.py,sha256=egnpvGVeIOS2Zg516AT84tJnIqS4owxmMLLmBQJmK7Y,301
18
18
  gigaspatial/generators/zonal/admin.py,sha256=rgOyQX3f_g9qnXqrf-NkR2GEdwOqjNuPNe1H7AUVsfg,3698
19
- gigaspatial/generators/zonal/base.py,sha256=aoq9AmawPnxu04YI7hPmJzvBTyuUqBQuMZAEUdiVStg,22818
20
- gigaspatial/generators/zonal/geometry.py,sha256=iDpITqTTU_Tzd0eEO9suuoOA9Dpc2kbtwRgOX8C5zAE,21485
19
+ gigaspatial/generators/zonal/base.py,sha256=byyfv4dNpOMzszBMMbKQlZuHMlq2e84Hboh0W58f-8k,22959
20
+ gigaspatial/generators/zonal/geometry.py,sha256=0EF3VXAVOcj5AtcubGWU325biLaZ76zt3Sf1uyKEE88,23358
21
21
  gigaspatial/generators/zonal/mercator.py,sha256=fA02j30PWB5BVjrbNGCMjiOw-ds182yK7R27z8mWFug,5291
22
22
  gigaspatial/grid/__init__.py,sha256=ypSSyZ4fYtMNc4IG7chSD7NkUfS2bv9KWRsKR1D9pDI,80
23
23
  gigaspatial/grid/h3.py,sha256=0d_rNNvwxc0LDD2qKCd2gTy_16_5sgPwJwk29z5pE9U,15310
24
24
  gigaspatial/grid/mercator_tiles.py,sha256=dzUDbXe0QHEC4s334IVT76d41D35dbd-QiN2LnkDdvA,11240
25
25
  gigaspatial/handlers/__init__.py,sha256=T0_6OxXQ59yTu9g2P6P9vnepudOWp_85R4WQKxRF94c,1618
26
- gigaspatial/handlers/base.py,sha256=wxkB8fniOIzgsg0WF9WkaZbAmFhNkqtZ_LGD8Ch53Mw,27238
26
+ gigaspatial/handlers/base.py,sha256=JSZ64eWRoF17bKvVj2Bq3lFA00uJ4i6qlwp72O4HSv4,27885
27
27
  gigaspatial/handlers/boundaries.py,sha256=jtWyQt3iAzS77mbAOi7mjh3cv_YCV3uB_r1h56gCfeY,20729
28
- gigaspatial/handlers/ghsl.py,sha256=aSEVQVANzJf8O8TiQYmfwyeM43ZaO65VJHmiuLSQfLs,30524
29
- gigaspatial/handlers/giga.py,sha256=F5ZfcE37a24X-c6Xhyt72C9eZZbyN_gV7w_InxKFMQQ,28348
28
+ gigaspatial/handlers/ghsl.py,sha256=QvW7GeHWQSrPdxXxmR_61UcfBIHziNpCHCxRXg1nnug,31285
29
+ gigaspatial/handlers/giga.py,sha256=tNy2O0YAdy4iYWjMNKUadV_W2HxcTsb-aG4te6wvMOk,28660
30
30
  gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
31
31
  gigaspatial/handlers/hdx.py,sha256=1m6oG1DeEC_RLFtb6CrTReWpbQ5uG2e8EIt-IUkZbaI,18122
32
+ gigaspatial/handlers/healthsites.py,sha256=gJCnKDXE4Gu-3Z6Io0-EknXfSdOxBHdr4-JV0z9bul0,12829
32
33
  gigaspatial/handlers/mapbox_image.py,sha256=M_nkJ_b1PD8FG1ajVgSycCb0NRTAI_SLpHdzszNetKA,7786
33
34
  gigaspatial/handlers/maxar_image.py,sha256=kcc8uGljQB0Yh0MKBA7lT7KwBbNZwFzuyBklR3db1P4,10204
34
35
  gigaspatial/handlers/microsoft_global_buildings.py,sha256=bQ5WHIv3v0wWrZZUbZkKPRjgdlqIxlK7CV_0zSvdrTw,20292
35
36
  gigaspatial/handlers/ookla_speedtest.py,sha256=EcvSAxJZ9GPfzYnT_C85Qgy2ecc9ndf70Pklk53OdC8,6506
36
37
  gigaspatial/handlers/opencellid.py,sha256=KuJqd-5-RO5ZzyDaBSrTgCK2ib5N_m3RUcPlX5heWwI,10683
37
- gigaspatial/handlers/osm.py,sha256=vUbdUm6lO2f8YyU7o4qUSkWMxlZElp7EPBFlneRaeo0,16641
38
+ gigaspatial/handlers/osm.py,sha256=gyY_a2OoRZjL14iOTEKLCPbKTcmSDHc72Q0hluaENrY,25464
38
39
  gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
39
40
  gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
40
41
  gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
41
- gigaspatial/handlers/worldpop.py,sha256=jV166EP02Xdj8jiT8aQi4sexds8Qd3KRGHXqq70_Sdk,30161
42
+ gigaspatial/handlers/worldpop.py,sha256=5gSAhTPs3l8cVflx_Vp4ZxhvcUKKb_r2hGmc_sXCgio,39893
42
43
  gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
43
44
  gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
44
- gigaspatial/processing/geo.py,sha256=i8MMXrf05dik-LkmeSZ45siWLT9Dgk0M-tjWKx5jVdk,41686
45
+ gigaspatial/processing/geo.py,sha256=ekGbU1sEn10zX3esYIEba6eZyhpx9WOjPlVEkspqcNk,41674
45
46
  gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
46
- gigaspatial/processing/tif_processor.py,sha256=NSTxxeRtLS70Ki_AjaOfSGYpAFevkQnM-ZfYaE1RF7M,46793
47
+ gigaspatial/processing/tif_processor.py,sha256=WzD3inLUdr50fl11WPF6czvG1d8s7y2qdTBOc5Yo2yw,66400
47
48
  gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
48
- giga_spatial-0.7.0.dist-info/METADATA,sha256=PZ1IWjRm10ol2wfFifI1Sp5tm2v-dhdr-ip0NiRKB_s,9277
49
- giga_spatial-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
- giga_spatial-0.7.0.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
51
- giga_spatial-0.7.0.dist-info/RECORD,,
49
+ giga_spatial-0.7.1.dist-info/METADATA,sha256=Fo_X_aPJg9Mv1GfX4esviiljEhcK4n7imbACWKrNo6s,9277
50
+ giga_spatial-0.7.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
51
+ giga_spatial-0.7.1.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
52
+ giga_spatial-0.7.1.dist-info/RECORD,,
gigaspatial/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.7.0"
1
+ __version__ = "0.7.1"
gigaspatial/config.py CHANGED
@@ -38,6 +38,7 @@ class Config(BaseSettings):
38
38
  GIGA_SCHOOL_MEASUREMENTS_API_KEY: str = Field(
39
39
  default="", alias="GIGA_SCHOOL_MEASUREMENTS_API_KEY"
40
40
  )
41
+ HEALTHSITES_API_KEY: str = Field(default="", alias="HEALTHSITES_API_KEY")
41
42
 
42
43
  ROOT_DATA_DIR: Path = Field(
43
44
  default=Path("."),
@@ -18,14 +18,11 @@ from gigaspatial.processing.geo import (
18
18
  convert_to_geodataframe,
19
19
  buffer_geodataframe,
20
20
  detect_coordinate_columns,
21
+ aggregate_points_to_zones,
21
22
  aggregate_polygons_to_zones,
22
23
  get_centroids,
23
24
  )
24
- from gigaspatial.processing.tif_processor import (
25
- sample_multiple_tifs_by_polygons,
26
- sample_multiple_tifs_by_coordinates,
27
- TifProcessor,
28
- )
25
+ from gigaspatial.processing.tif_processor import TifProcessor
29
26
  from scipy.spatial import cKDTree
30
27
 
31
28
 
@@ -165,7 +162,9 @@ class PoiViewGenerator:
165
162
  raise ValueError(
166
163
  f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
167
164
  )
168
- return convert_to_geodataframe(points)
165
+ return convert_to_geodataframe(
166
+ points, lat_col="latitude", lon_col="longitude"
167
+ )
169
168
  except ValueError as e:
170
169
  raise ValueError(
171
170
  f"Could not detect coordinate columns in DataFrame: {str(e)}"
@@ -202,7 +201,9 @@ class PoiViewGenerator:
202
201
  raise ValueError(
203
202
  f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
204
203
  )
205
- return convert_to_geodataframe(df)
204
+ return convert_to_geodataframe(
205
+ df, lat_col="latitude", lon_col="longitude"
206
+ )
206
207
  except ValueError as e:
207
208
  raise ValueError(
208
209
  f"Could not detect coordinate columns in dictionary list: {str(e)}"
@@ -365,11 +366,11 @@ class PoiViewGenerator:
365
366
  f"{output_prefix}_distance": dist,
366
367
  }
367
368
  )
368
- self._update_view(temp_result_df)
369
+ # self._update_view(temp_result_df) # Removed direct view update
369
370
  self.logger.info(
370
371
  f"Nearest points mapping complete with prefix '{output_prefix}'"
371
372
  )
372
- return self.view
373
+ return temp_result_df # Return the DataFrame
373
374
 
374
375
  def map_google_buildings(
375
376
  self,
@@ -405,12 +406,14 @@ class PoiViewGenerator:
405
406
  self.logger.info("No Google buildings data found for the provided POIs")
406
407
  return self.view
407
408
 
408
- return self.map_nearest_points(
409
+ mapped_data = self.map_nearest_points(
409
410
  points_df=buildings_df,
410
411
  id_column="full_plus_code",
411
412
  output_prefix="nearest_google_building",
412
413
  **kwargs,
413
414
  )
415
+ self._update_view(mapped_data)
416
+ return self.view
414
417
 
415
418
  def map_ms_buildings(
416
419
  self,
@@ -455,12 +458,14 @@ class PoiViewGenerator:
455
458
  axis=1,
456
459
  )
457
460
 
458
- return self.map_nearest_points(
461
+ mapped_data = self.map_nearest_points(
459
462
  points_df=building_centroids,
460
463
  id_column="building_id",
461
464
  output_prefix="nearest_ms_building",
462
465
  **kwargs,
463
466
  )
467
+ self._update_view(mapped_data)
468
+ return self.view
464
469
 
465
470
  def map_zonal_stats(
466
471
  self,
@@ -481,9 +486,10 @@ class PoiViewGenerator:
481
486
  3. Polygon aggregation: Aggregates polygon data to POI buffers with optional area weighting
482
487
 
483
488
  Args:
484
- data (Union[List[TifProcessor], gpd.GeoDataFrame]):
485
- Either a list of TifProcessor objects containing raster data to sample,
486
- or a GeoDataFrame containing polygon data to aggregate.
489
+ data (Union[TifProcessor, List[TifProcessor], gpd.GeoDataFrame]):
490
+ Either a TifProcessor object, a list of TifProcessor objects (which will be merged
491
+ into a single TifProcessor for processing), or a GeoDataFrame containing polygon
492
+ data to aggregate.
487
493
  stat (str, optional):
488
494
  For raster data: Statistic to calculate ("sum", "mean", "median", "min", "max").
489
495
  For polygon data: Aggregation method to use.
@@ -512,20 +518,32 @@ class PoiViewGenerator:
512
518
  or if required parameters (value_column) are missing for polygon data.
513
519
  """
514
520
 
515
- if isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
516
- results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
521
+ raster_processor: Optional[TifProcessor] = None
517
522
 
518
- # Handle raster data
523
+ if isinstance(data, TifProcessor):
524
+ raster_processor = data
525
+ elif isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
519
526
  if not data:
520
- self.logger.info("No valid raster data found for the provided POIs")
527
+ self.logger.info("No valid raster data provided")
521
528
  return self.view
522
529
 
523
- raster_crs = data[0].crs
530
+ if len(data) > 1:
531
+ all_source_paths = [tp.dataset_path for tp in data]
524
532
 
525
- if not all(tp.crs == raster_crs for tp in data):
526
- raise ValueError(
527
- "All TifProcessors must have the same CRS for zonal statistics."
533
+ self.logger.info(
534
+ f"Merging {len(all_source_paths)} rasters into a single TifProcessor for zonal statistics."
535
+ )
536
+ raster_processor = TifProcessor(
537
+ dataset_path=all_source_paths,
538
+ data_store=self.data_store,
539
+ **kwargs,
528
540
  )
541
+ else:
542
+ raster_processor = data[0]
543
+
544
+ if raster_processor:
545
+ results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
546
+ raster_crs = raster_processor.crs
529
547
 
530
548
  if map_radius_meters is not None:
531
549
  self.logger.info(
@@ -539,11 +557,9 @@ class PoiViewGenerator:
539
557
  )
540
558
 
541
559
  # Calculate zonal statistics
542
- sampled_values = sample_multiple_tifs_by_polygons(
543
- tif_processors=data,
560
+ sampled_values = raster_processor.sample_by_polygons(
544
561
  polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
545
562
  stat=stat,
546
- **kwargs,
547
563
  )
548
564
  else:
549
565
  self.logger.info(f"Sampling {stat} at POI locations")
@@ -551,8 +567,8 @@ class PoiViewGenerator:
551
567
  coord_list = (
552
568
  self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
553
569
  )
554
- sampled_values = sample_multiple_tifs_by_coordinates(
555
- tif_processors=data, coordinate_list=coord_list, **kwargs
570
+ sampled_values = raster_processor.sample_by_coordinates(
571
+ coordinate_list=coord_list, **kwargs
556
572
  )
557
573
 
558
574
  results_df[output_column] = sampled_values
@@ -560,24 +576,16 @@ class PoiViewGenerator:
560
576
  elif isinstance(data, gpd.GeoDataFrame):
561
577
  # Handle polygon data
562
578
  if data.empty:
563
- self.logger.info("No valid polygon data found for the provided POIs")
564
- return self.points_gdf.copy()
579
+ self.logger.info("No valid GeoDataFrame data provided")
580
+ return pd.DataFrame(
581
+ columns=["poi_id", output_column]
582
+ ) # Return empty DataFrame
565
583
 
566
584
  if map_radius_meters is None:
567
- raise ValueError("map_radius_meters must be provided for polygon data")
568
-
569
- if value_column is None:
570
- raise ValueError("value_column must be provided for polygon data")
571
-
572
- if value_column not in data.columns:
573
585
  raise ValueError(
574
- f"Value column '{value_column}' not found in input polygon GeoDataFrame."
586
+ "map_radius_meters must be provided for for GeoDataFrame data"
575
587
  )
576
588
 
577
- self.logger.info(
578
- f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
579
- )
580
-
581
589
  # Create buffers around POIs
582
590
  buffer_gdf = buffer_geodataframe(
583
591
  self.points_gdf,
@@ -585,34 +593,92 @@ class PoiViewGenerator:
585
593
  cap_style="round",
586
594
  )
587
595
 
588
- # Aggregate polygons to buffers
589
- aggregation_result_gdf = aggregate_polygons_to_zones(
590
- polygons=data,
591
- zones=buffer_gdf,
592
- value_columns=value_column,
593
- aggregation=stat,
594
- predicate=predicate,
595
- zone_id_column="poi_id",
596
- output_suffix="",
597
- drop_geometry=True,
598
- **kwargs,
599
- )
596
+ if any(data.geom_type.isin(["MultiPoint", "Point"])):
597
+
598
+ self.logger.info(
599
+ f"Aggregating point data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
600
+ )
601
+
602
+ # If no value_column, default to 'count'
603
+ if value_column is None:
604
+ actual_stat = "count"
605
+ self.logger.warning(
606
+ "No value_column provided for point data. Defaulting to 'count' aggregation."
607
+ )
608
+ else:
609
+ actual_stat = stat
610
+ if value_column not in data.columns:
611
+ raise ValueError(
612
+ f"Value column '{value_column}' not found in input GeoDataFrame."
613
+ )
614
+
615
+ aggregation_result_gdf = aggregate_points_to_zones(
616
+ points=data,
617
+ zones=buffer_gdf,
618
+ value_columns=value_column,
619
+ aggregation=actual_stat,
620
+ point_zone_predicate=predicate, # can't be `fractional``
621
+ zone_id_column="poi_id",
622
+ output_suffix="",
623
+ drop_geometry=True,
624
+ **kwargs,
625
+ )
626
+
627
+ output_col_from_agg = (
628
+ f"{value_column}_{actual_stat}" if value_column else "point_count"
629
+ )
630
+ results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
631
+
632
+ if output_column != "zonal_stat":
633
+ results_df = results_df.rename(
634
+ columns={output_col_from_agg: output_column}
635
+ )
636
+
637
+ else:
638
+ if value_column is None:
639
+ raise ValueError(
640
+ "value_column must be provided for polygon data aggregation."
641
+ )
642
+ if value_column not in data.columns:
643
+ raise ValueError(
644
+ f"Value column '{value_column}' not found in input GeoDataFrame."
645
+ )
646
+ self.logger.info(
647
+ f"Aggregating polygon data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
648
+ )
600
649
 
601
- results_df = aggregation_result_gdf[["poi_id", value_column]]
650
+ # Aggregate polygons to buffers
651
+ aggregation_result_gdf = aggregate_polygons_to_zones(
652
+ polygons=data,
653
+ zones=buffer_gdf,
654
+ value_columns=value_column,
655
+ aggregation=stat,
656
+ predicate=predicate,
657
+ zone_id_column="poi_id",
658
+ output_suffix="",
659
+ drop_geometry=True,
660
+ **kwargs,
661
+ )
662
+
663
+ output_col_from_agg = value_column
664
+
665
+ results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
602
666
 
603
667
  if output_column != "zonal_stat":
604
- results_df = results_df.rename(columns={value_column: output_column})
668
+ results_df = results_df.rename(
669
+ columns={output_col_from_agg: output_column}
670
+ )
605
671
 
606
672
  else:
607
673
  raise ValueError(
608
674
  "data must be either a list of TifProcessor objects or a GeoDataFrame"
609
675
  )
610
676
 
611
- self._update_view(results_df)
677
+ # self._update_view(results_df) # Removed direct view update
612
678
  self.logger.info(
613
679
  f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
614
680
  )
615
- return self.view
681
+ return results_df # Return the DataFrame
616
682
 
617
683
  def map_built_s(
618
684
  self,
@@ -654,16 +720,20 @@ class PoiViewGenerator:
654
720
  )
655
721
  self.logger.info("Loading GHSL Built Surface raster tiles")
656
722
  tif_processors = handler.load_data(
657
- self.points_gdf.copy(), ensure_available=self.config.ensure_available
723
+ self.points_gdf.copy(),
724
+ ensure_available=self.config.ensure_available,
725
+ merge_rasters=True,
658
726
  )
659
727
 
660
- return self.map_zonal_stats(
728
+ mapped_data = self.map_zonal_stats(
661
729
  data=tif_processors,
662
730
  stat=stat,
663
731
  map_radius_meters=map_radius_meters,
664
732
  output_column=output_column,
665
733
  **kwargs,
666
734
  )
735
+ self._update_view(mapped_data)
736
+ return self.view
667
737
 
668
738
  def map_smod(
669
739
  self,
@@ -702,14 +772,18 @@ class PoiViewGenerator:
702
772
 
703
773
  self.logger.info("Loading GHSL SMOD raster tiles")
704
774
  tif_processors = handler.load_data(
705
- self.points_gdf.copy(), ensure_available=self.config.ensure_available
775
+ self.points_gdf.copy(),
776
+ ensure_available=self.config.ensure_available,
777
+ merge_rasters=True,
706
778
  )
707
779
 
708
- return self.map_zonal_stats(
780
+ mapped_data = self.map_zonal_stats(
709
781
  data=tif_processors,
710
782
  output_column=output_column,
711
783
  **kwargs,
712
784
  )
785
+ self._update_view(mapped_data)
786
+ return self.view
713
787
 
714
788
  def map_wp_pop(
715
789
  self,
@@ -718,17 +792,25 @@ class PoiViewGenerator:
718
792
  resolution=1000,
719
793
  predicate: Literal[
720
794
  "centroid_within", "intersects", "fractional", "within"
721
- ] = "fractional",
795
+ ] = "intersects",
722
796
  output_column: str = "population",
723
797
  **kwargs,
724
798
  ):
725
- if isinstance(country, str):
726
- country = [country]
799
+ # Ensure country is always a list for consistent handling
800
+ countries_list = [country] if isinstance(country, str) else country
727
801
 
728
802
  handler = WPPopulationHandler(
729
- project="pop", resolution=resolution, data_store=self.data_store, **kwargs
803
+ resolution=resolution,
804
+ data_store=self.data_store,
805
+ **kwargs,
730
806
  )
731
807
 
808
+ # Restrict to single country for age_structures project
809
+ if handler.config.project == "age_structures" and len(countries_list) > 1:
810
+ raise ValueError(
811
+ "For 'age_structures' project, only a single country can be processed at a time."
812
+ )
813
+
732
814
  self.logger.info(
733
815
  f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
734
816
  )
@@ -738,36 +820,98 @@ class PoiViewGenerator:
738
820
  "Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
739
821
  )
740
822
  predicate = "intersects"
741
-
823
+
824
+ data_to_process: Union[List[TifProcessor], gpd.GeoDataFrame, pd.DataFrame]
825
+
742
826
  if predicate == "centroid_within":
743
- data = []
744
- for c in country:
745
- data.extend(
746
- handler.load_data(c, ensure_available=self.config.ensure_available)
827
+ if handler.config.project == "age_structures":
828
+ # Load individual tif processors for the single country
829
+ all_tif_processors = handler.load_data(
830
+ countries_list[0],
831
+ ensure_available=self.config.ensure_available,
832
+ **kwargs,
747
833
  )
834
+
835
+ # Sum results from each tif_processor separately
836
+ summed_results_by_poi = {
837
+ poi_id: 0.0 for poi_id in self.points_gdf["poi_id"].unique()
838
+ }
839
+
840
+ self.logger.info(
841
+ f"Sampling individual age_structures rasters using 'sum' statistic and summing per POI."
842
+ )
843
+ for tif_processor in all_tif_processors:
844
+ single_raster_df = self.map_zonal_stats(
845
+ data=tif_processor,
846
+ stat="sum",
847
+ map_radius_meters=map_radius_meters,
848
+ value_column="pixel_value",
849
+ predicate=predicate,
850
+ output_column=output_column, # This output_column will be in the temporary DF
851
+ **kwargs,
852
+ )
853
+ # Add values from this single raster to the cumulative sum
854
+ for _, row in single_raster_df.iterrows():
855
+ summed_results_by_poi[row["poi_id"]] += row[output_column]
856
+
857
+ # Convert the summed dictionary back to a DataFrame
858
+ data_to_process = pd.DataFrame(
859
+ list(summed_results_by_poi.items()),
860
+ columns=["poi_id", output_column],
861
+ )
862
+
863
+ else:
864
+ # Existing behavior for non-age_structures projects or if merging is fine
865
+ # 'data_to_process' will be a list of TifProcessor objects, which map_zonal_stats will merge
866
+ data_to_process = []
867
+ for c in countries_list:
868
+ data_to_process.extend(
869
+ handler.load_data(
870
+ c, ensure_available=self.config.ensure_available, **kwargs
871
+ )
872
+ )
748
873
  else:
749
- data = pd.concat(
874
+ # 'data_to_process' will be a GeoDataFrame
875
+ data_to_process = pd.concat(
750
876
  [
751
877
  handler.load_into_geodataframe(
752
- c, ensure_available=self.config.ensure_available
878
+ c, ensure_available=self.config.ensure_available, **kwargs
753
879
  )
754
- for c in country
880
+ for c in countries_list # Original iteration over countries_list
755
881
  ],
756
882
  ignore_index=True,
757
883
  )
758
884
 
759
- self.logger.info(f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic")
760
-
761
- return self.map_zonal_stats(
762
- data,
763
- stat="sum",
764
- map_radius_meters=map_radius_meters,
765
- value_column="pixel_value",
766
- predicate=predicate,
767
- output_column=output_column,
768
- **kwargs
885
+ self.logger.info(
886
+ f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic"
769
887
  )
770
888
 
889
+ final_mapped_df: pd.DataFrame
890
+
891
+ # If 'data_to_process' is already the summed DataFrame (from age_structures/centroid_within branch),
892
+ # use it directly.
893
+ if (
894
+ isinstance(data_to_process, pd.DataFrame)
895
+ and output_column in data_to_process.columns
896
+ and "poi_id" in data_to_process.columns
897
+ ):
898
+ final_mapped_df = data_to_process
899
+ else:
900
+ # For other cases, proceed with the original call to map_zonal_stats
901
+ final_mapped_df = self.map_zonal_stats(
902
+ data=data_to_process,
903
+ stat="sum",
904
+ map_radius_meters=map_radius_meters,
905
+ value_column="pixel_value",
906
+ predicate=predicate,
907
+ output_column=output_column,
908
+ **kwargs,
909
+ )
910
+ self._update_view(
911
+ final_mapped_df
912
+ ) # Update the view with the final mapped DataFrame
913
+ return self.view
914
+
771
915
  def save_view(
772
916
  self,
773
917
  name: str,