giga-spatial 0.6.9__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/METADATA +30 -4
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/RECORD +22 -20
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +1 -0
- gigaspatial/core/io/adls_data_store.py +104 -11
- gigaspatial/core/io/local_data_store.py +8 -0
- gigaspatial/generators/poi.py +226 -82
- gigaspatial/generators/zonal/base.py +41 -28
- gigaspatial/generators/zonal/geometry.py +91 -41
- gigaspatial/grid/h3.py +417 -0
- gigaspatial/grid/mercator_tiles.py +1 -1
- gigaspatial/handlers/base.py +22 -8
- gigaspatial/handlers/ghsl.py +22 -8
- gigaspatial/handlers/giga.py +9 -4
- gigaspatial/handlers/healthsites.py +350 -0
- gigaspatial/handlers/osm.py +325 -105
- gigaspatial/handlers/worldpop.py +228 -9
- gigaspatial/processing/geo.py +11 -6
- gigaspatial/processing/tif_processor.py +1183 -496
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/top_level.txt +0 -0
gigaspatial/generators/poi.py
CHANGED
@@ -18,14 +18,11 @@ from gigaspatial.processing.geo import (
|
|
18
18
|
convert_to_geodataframe,
|
19
19
|
buffer_geodataframe,
|
20
20
|
detect_coordinate_columns,
|
21
|
+
aggregate_points_to_zones,
|
21
22
|
aggregate_polygons_to_zones,
|
22
23
|
get_centroids,
|
23
24
|
)
|
24
|
-
from gigaspatial.processing.tif_processor import
|
25
|
-
sample_multiple_tifs_by_polygons,
|
26
|
-
sample_multiple_tifs_by_coordinates,
|
27
|
-
TifProcessor,
|
28
|
-
)
|
25
|
+
from gigaspatial.processing.tif_processor import TifProcessor
|
29
26
|
from scipy.spatial import cKDTree
|
30
27
|
|
31
28
|
|
@@ -165,7 +162,9 @@ class PoiViewGenerator:
|
|
165
162
|
raise ValueError(
|
166
163
|
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
167
164
|
)
|
168
|
-
return convert_to_geodataframe(
|
165
|
+
return convert_to_geodataframe(
|
166
|
+
points, lat_col="latitude", lon_col="longitude"
|
167
|
+
)
|
169
168
|
except ValueError as e:
|
170
169
|
raise ValueError(
|
171
170
|
f"Could not detect coordinate columns in DataFrame: {str(e)}"
|
@@ -202,7 +201,9 @@ class PoiViewGenerator:
|
|
202
201
|
raise ValueError(
|
203
202
|
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
204
203
|
)
|
205
|
-
return convert_to_geodataframe(
|
204
|
+
return convert_to_geodataframe(
|
205
|
+
df, lat_col="latitude", lon_col="longitude"
|
206
|
+
)
|
206
207
|
except ValueError as e:
|
207
208
|
raise ValueError(
|
208
209
|
f"Could not detect coordinate columns in dictionary list: {str(e)}"
|
@@ -365,11 +366,11 @@ class PoiViewGenerator:
|
|
365
366
|
f"{output_prefix}_distance": dist,
|
366
367
|
}
|
367
368
|
)
|
368
|
-
self._update_view(temp_result_df)
|
369
|
+
# self._update_view(temp_result_df) # Removed direct view update
|
369
370
|
self.logger.info(
|
370
371
|
f"Nearest points mapping complete with prefix '{output_prefix}'"
|
371
372
|
)
|
372
|
-
return
|
373
|
+
return temp_result_df # Return the DataFrame
|
373
374
|
|
374
375
|
def map_google_buildings(
|
375
376
|
self,
|
@@ -405,12 +406,14 @@ class PoiViewGenerator:
|
|
405
406
|
self.logger.info("No Google buildings data found for the provided POIs")
|
406
407
|
return self.view
|
407
408
|
|
408
|
-
|
409
|
+
mapped_data = self.map_nearest_points(
|
409
410
|
points_df=buildings_df,
|
410
411
|
id_column="full_plus_code",
|
411
412
|
output_prefix="nearest_google_building",
|
412
413
|
**kwargs,
|
413
414
|
)
|
415
|
+
self._update_view(mapped_data)
|
416
|
+
return self.view
|
414
417
|
|
415
418
|
def map_ms_buildings(
|
416
419
|
self,
|
@@ -455,12 +458,14 @@ class PoiViewGenerator:
|
|
455
458
|
axis=1,
|
456
459
|
)
|
457
460
|
|
458
|
-
|
461
|
+
mapped_data = self.map_nearest_points(
|
459
462
|
points_df=building_centroids,
|
460
463
|
id_column="building_id",
|
461
464
|
output_prefix="nearest_ms_building",
|
462
465
|
**kwargs,
|
463
466
|
)
|
467
|
+
self._update_view(mapped_data)
|
468
|
+
return self.view
|
464
469
|
|
465
470
|
def map_zonal_stats(
|
466
471
|
self,
|
@@ -481,9 +486,10 @@ class PoiViewGenerator:
|
|
481
486
|
3. Polygon aggregation: Aggregates polygon data to POI buffers with optional area weighting
|
482
487
|
|
483
488
|
Args:
|
484
|
-
data (Union[List[TifProcessor], gpd.GeoDataFrame]):
|
485
|
-
Either a list of TifProcessor objects
|
486
|
-
or a GeoDataFrame containing polygon
|
489
|
+
data (Union[TifProcessor, List[TifProcessor], gpd.GeoDataFrame]):
|
490
|
+
Either a TifProcessor object, a list of TifProcessor objects (which will be merged
|
491
|
+
into a single TifProcessor for processing), or a GeoDataFrame containing polygon
|
492
|
+
data to aggregate.
|
487
493
|
stat (str, optional):
|
488
494
|
For raster data: Statistic to calculate ("sum", "mean", "median", "min", "max").
|
489
495
|
For polygon data: Aggregation method to use.
|
@@ -512,20 +518,32 @@ class PoiViewGenerator:
|
|
512
518
|
or if required parameters (value_column) are missing for polygon data.
|
513
519
|
"""
|
514
520
|
|
515
|
-
|
516
|
-
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
521
|
+
raster_processor: Optional[TifProcessor] = None
|
517
522
|
|
518
|
-
|
523
|
+
if isinstance(data, TifProcessor):
|
524
|
+
raster_processor = data
|
525
|
+
elif isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
|
519
526
|
if not data:
|
520
|
-
self.logger.info("No valid raster data
|
527
|
+
self.logger.info("No valid raster data provided")
|
521
528
|
return self.view
|
522
529
|
|
523
|
-
|
530
|
+
if len(data) > 1:
|
531
|
+
all_source_paths = [tp.dataset_path for tp in data]
|
524
532
|
|
525
|
-
|
526
|
-
|
527
|
-
|
533
|
+
self.logger.info(
|
534
|
+
f"Merging {len(all_source_paths)} rasters into a single TifProcessor for zonal statistics."
|
535
|
+
)
|
536
|
+
raster_processor = TifProcessor(
|
537
|
+
dataset_path=all_source_paths,
|
538
|
+
data_store=self.data_store,
|
539
|
+
**kwargs,
|
528
540
|
)
|
541
|
+
else:
|
542
|
+
raster_processor = data[0]
|
543
|
+
|
544
|
+
if raster_processor:
|
545
|
+
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
546
|
+
raster_crs = raster_processor.crs
|
529
547
|
|
530
548
|
if map_radius_meters is not None:
|
531
549
|
self.logger.info(
|
@@ -539,11 +557,9 @@ class PoiViewGenerator:
|
|
539
557
|
)
|
540
558
|
|
541
559
|
# Calculate zonal statistics
|
542
|
-
sampled_values =
|
543
|
-
tif_processors=data,
|
560
|
+
sampled_values = raster_processor.sample_by_polygons(
|
544
561
|
polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
|
545
562
|
stat=stat,
|
546
|
-
**kwargs,
|
547
563
|
)
|
548
564
|
else:
|
549
565
|
self.logger.info(f"Sampling {stat} at POI locations")
|
@@ -551,8 +567,8 @@ class PoiViewGenerator:
|
|
551
567
|
coord_list = (
|
552
568
|
self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
|
553
569
|
)
|
554
|
-
sampled_values =
|
555
|
-
|
570
|
+
sampled_values = raster_processor.sample_by_coordinates(
|
571
|
+
coordinate_list=coord_list, **kwargs
|
556
572
|
)
|
557
573
|
|
558
574
|
results_df[output_column] = sampled_values
|
@@ -560,24 +576,16 @@ class PoiViewGenerator:
|
|
560
576
|
elif isinstance(data, gpd.GeoDataFrame):
|
561
577
|
# Handle polygon data
|
562
578
|
if data.empty:
|
563
|
-
self.logger.info("No valid
|
564
|
-
return
|
579
|
+
self.logger.info("No valid GeoDataFrame data provided")
|
580
|
+
return pd.DataFrame(
|
581
|
+
columns=["poi_id", output_column]
|
582
|
+
) # Return empty DataFrame
|
565
583
|
|
566
584
|
if map_radius_meters is None:
|
567
|
-
raise ValueError("map_radius_meters must be provided for polygon data")
|
568
|
-
|
569
|
-
if value_column is None:
|
570
|
-
raise ValueError("value_column must be provided for polygon data")
|
571
|
-
|
572
|
-
if value_column not in data.columns:
|
573
585
|
raise ValueError(
|
574
|
-
|
586
|
+
"map_radius_meters must be provided for for GeoDataFrame data"
|
575
587
|
)
|
576
588
|
|
577
|
-
self.logger.info(
|
578
|
-
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
579
|
-
)
|
580
|
-
|
581
589
|
# Create buffers around POIs
|
582
590
|
buffer_gdf = buffer_geodataframe(
|
583
591
|
self.points_gdf,
|
@@ -585,34 +593,92 @@ class PoiViewGenerator:
|
|
585
593
|
cap_style="round",
|
586
594
|
)
|
587
595
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
596
|
+
if any(data.geom_type.isin(["MultiPoint", "Point"])):
|
597
|
+
|
598
|
+
self.logger.info(
|
599
|
+
f"Aggregating point data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
600
|
+
)
|
601
|
+
|
602
|
+
# If no value_column, default to 'count'
|
603
|
+
if value_column is None:
|
604
|
+
actual_stat = "count"
|
605
|
+
self.logger.warning(
|
606
|
+
"No value_column provided for point data. Defaulting to 'count' aggregation."
|
607
|
+
)
|
608
|
+
else:
|
609
|
+
actual_stat = stat
|
610
|
+
if value_column not in data.columns:
|
611
|
+
raise ValueError(
|
612
|
+
f"Value column '{value_column}' not found in input GeoDataFrame."
|
613
|
+
)
|
614
|
+
|
615
|
+
aggregation_result_gdf = aggregate_points_to_zones(
|
616
|
+
points=data,
|
617
|
+
zones=buffer_gdf,
|
618
|
+
value_columns=value_column,
|
619
|
+
aggregation=actual_stat,
|
620
|
+
point_zone_predicate=predicate, # can't be `fractional``
|
621
|
+
zone_id_column="poi_id",
|
622
|
+
output_suffix="",
|
623
|
+
drop_geometry=True,
|
624
|
+
**kwargs,
|
625
|
+
)
|
626
|
+
|
627
|
+
output_col_from_agg = (
|
628
|
+
f"{value_column}_{actual_stat}" if value_column else "point_count"
|
629
|
+
)
|
630
|
+
results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
|
631
|
+
|
632
|
+
if output_column != "zonal_stat":
|
633
|
+
results_df = results_df.rename(
|
634
|
+
columns={output_col_from_agg: output_column}
|
635
|
+
)
|
636
|
+
|
637
|
+
else:
|
638
|
+
if value_column is None:
|
639
|
+
raise ValueError(
|
640
|
+
"value_column must be provided for polygon data aggregation."
|
641
|
+
)
|
642
|
+
if value_column not in data.columns:
|
643
|
+
raise ValueError(
|
644
|
+
f"Value column '{value_column}' not found in input GeoDataFrame."
|
645
|
+
)
|
646
|
+
self.logger.info(
|
647
|
+
f"Aggregating polygon data within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
648
|
+
)
|
600
649
|
|
601
|
-
|
650
|
+
# Aggregate polygons to buffers
|
651
|
+
aggregation_result_gdf = aggregate_polygons_to_zones(
|
652
|
+
polygons=data,
|
653
|
+
zones=buffer_gdf,
|
654
|
+
value_columns=value_column,
|
655
|
+
aggregation=stat,
|
656
|
+
predicate=predicate,
|
657
|
+
zone_id_column="poi_id",
|
658
|
+
output_suffix="",
|
659
|
+
drop_geometry=True,
|
660
|
+
**kwargs,
|
661
|
+
)
|
662
|
+
|
663
|
+
output_col_from_agg = value_column
|
664
|
+
|
665
|
+
results_df = aggregation_result_gdf[["poi_id", output_col_from_agg]]
|
602
666
|
|
603
667
|
if output_column != "zonal_stat":
|
604
|
-
results_df = results_df.rename(
|
668
|
+
results_df = results_df.rename(
|
669
|
+
columns={output_col_from_agg: output_column}
|
670
|
+
)
|
605
671
|
|
606
672
|
else:
|
607
673
|
raise ValueError(
|
608
674
|
"data must be either a list of TifProcessor objects or a GeoDataFrame"
|
609
675
|
)
|
610
676
|
|
611
|
-
self._update_view(results_df)
|
677
|
+
# self._update_view(results_df) # Removed direct view update
|
612
678
|
self.logger.info(
|
613
679
|
f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
|
614
680
|
)
|
615
|
-
return
|
681
|
+
return results_df # Return the DataFrame
|
616
682
|
|
617
683
|
def map_built_s(
|
618
684
|
self,
|
@@ -654,16 +720,20 @@ class PoiViewGenerator:
|
|
654
720
|
)
|
655
721
|
self.logger.info("Loading GHSL Built Surface raster tiles")
|
656
722
|
tif_processors = handler.load_data(
|
657
|
-
self.points_gdf.copy(),
|
723
|
+
self.points_gdf.copy(),
|
724
|
+
ensure_available=self.config.ensure_available,
|
725
|
+
merge_rasters=True,
|
658
726
|
)
|
659
727
|
|
660
|
-
|
728
|
+
mapped_data = self.map_zonal_stats(
|
661
729
|
data=tif_processors,
|
662
730
|
stat=stat,
|
663
731
|
map_radius_meters=map_radius_meters,
|
664
732
|
output_column=output_column,
|
665
733
|
**kwargs,
|
666
734
|
)
|
735
|
+
self._update_view(mapped_data)
|
736
|
+
return self.view
|
667
737
|
|
668
738
|
def map_smod(
|
669
739
|
self,
|
@@ -702,14 +772,18 @@ class PoiViewGenerator:
|
|
702
772
|
|
703
773
|
self.logger.info("Loading GHSL SMOD raster tiles")
|
704
774
|
tif_processors = handler.load_data(
|
705
|
-
self.points_gdf.copy(),
|
775
|
+
self.points_gdf.copy(),
|
776
|
+
ensure_available=self.config.ensure_available,
|
777
|
+
merge_rasters=True,
|
706
778
|
)
|
707
779
|
|
708
|
-
|
780
|
+
mapped_data = self.map_zonal_stats(
|
709
781
|
data=tif_processors,
|
710
782
|
output_column=output_column,
|
711
783
|
**kwargs,
|
712
784
|
)
|
785
|
+
self._update_view(mapped_data)
|
786
|
+
return self.view
|
713
787
|
|
714
788
|
def map_wp_pop(
|
715
789
|
self,
|
@@ -718,17 +792,25 @@ class PoiViewGenerator:
|
|
718
792
|
resolution=1000,
|
719
793
|
predicate: Literal[
|
720
794
|
"centroid_within", "intersects", "fractional", "within"
|
721
|
-
] = "
|
795
|
+
] = "intersects",
|
722
796
|
output_column: str = "population",
|
723
797
|
**kwargs,
|
724
798
|
):
|
725
|
-
|
726
|
-
|
799
|
+
# Ensure country is always a list for consistent handling
|
800
|
+
countries_list = [country] if isinstance(country, str) else country
|
727
801
|
|
728
802
|
handler = WPPopulationHandler(
|
729
|
-
|
803
|
+
resolution=resolution,
|
804
|
+
data_store=self.data_store,
|
805
|
+
**kwargs,
|
730
806
|
)
|
731
807
|
|
808
|
+
# Restrict to single country for age_structures project
|
809
|
+
if handler.config.project == "age_structures" and len(countries_list) > 1:
|
810
|
+
raise ValueError(
|
811
|
+
"For 'age_structures' project, only a single country can be processed at a time."
|
812
|
+
)
|
813
|
+
|
732
814
|
self.logger.info(
|
733
815
|
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
734
816
|
)
|
@@ -738,36 +820,98 @@ class PoiViewGenerator:
|
|
738
820
|
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
739
821
|
)
|
740
822
|
predicate = "intersects"
|
741
|
-
|
823
|
+
|
824
|
+
data_to_process: Union[List[TifProcessor], gpd.GeoDataFrame, pd.DataFrame]
|
825
|
+
|
742
826
|
if predicate == "centroid_within":
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
827
|
+
if handler.config.project == "age_structures":
|
828
|
+
# Load individual tif processors for the single country
|
829
|
+
all_tif_processors = handler.load_data(
|
830
|
+
countries_list[0],
|
831
|
+
ensure_available=self.config.ensure_available,
|
832
|
+
**kwargs,
|
747
833
|
)
|
834
|
+
|
835
|
+
# Sum results from each tif_processor separately
|
836
|
+
summed_results_by_poi = {
|
837
|
+
poi_id: 0.0 for poi_id in self.points_gdf["poi_id"].unique()
|
838
|
+
}
|
839
|
+
|
840
|
+
self.logger.info(
|
841
|
+
f"Sampling individual age_structures rasters using 'sum' statistic and summing per POI."
|
842
|
+
)
|
843
|
+
for tif_processor in all_tif_processors:
|
844
|
+
single_raster_df = self.map_zonal_stats(
|
845
|
+
data=tif_processor,
|
846
|
+
stat="sum",
|
847
|
+
map_radius_meters=map_radius_meters,
|
848
|
+
value_column="pixel_value",
|
849
|
+
predicate=predicate,
|
850
|
+
output_column=output_column, # This output_column will be in the temporary DF
|
851
|
+
**kwargs,
|
852
|
+
)
|
853
|
+
# Add values from this single raster to the cumulative sum
|
854
|
+
for _, row in single_raster_df.iterrows():
|
855
|
+
summed_results_by_poi[row["poi_id"]] += row[output_column]
|
856
|
+
|
857
|
+
# Convert the summed dictionary back to a DataFrame
|
858
|
+
data_to_process = pd.DataFrame(
|
859
|
+
list(summed_results_by_poi.items()),
|
860
|
+
columns=["poi_id", output_column],
|
861
|
+
)
|
862
|
+
|
863
|
+
else:
|
864
|
+
# Existing behavior for non-age_structures projects or if merging is fine
|
865
|
+
# 'data_to_process' will be a list of TifProcessor objects, which map_zonal_stats will merge
|
866
|
+
data_to_process = []
|
867
|
+
for c in countries_list:
|
868
|
+
data_to_process.extend(
|
869
|
+
handler.load_data(
|
870
|
+
c, ensure_available=self.config.ensure_available, **kwargs
|
871
|
+
)
|
872
|
+
)
|
748
873
|
else:
|
749
|
-
|
874
|
+
# 'data_to_process' will be a GeoDataFrame
|
875
|
+
data_to_process = pd.concat(
|
750
876
|
[
|
751
877
|
handler.load_into_geodataframe(
|
752
|
-
c, ensure_available=self.config.ensure_available
|
878
|
+
c, ensure_available=self.config.ensure_available, **kwargs
|
753
879
|
)
|
754
|
-
for c in
|
880
|
+
for c in countries_list # Original iteration over countries_list
|
755
881
|
],
|
756
882
|
ignore_index=True,
|
757
883
|
)
|
758
884
|
|
759
|
-
self.logger.info(
|
760
|
-
|
761
|
-
return self.map_zonal_stats(
|
762
|
-
data,
|
763
|
-
stat="sum",
|
764
|
-
map_radius_meters=map_radius_meters,
|
765
|
-
value_column="pixel_value",
|
766
|
-
predicate=predicate,
|
767
|
-
output_column=output_column,
|
768
|
-
**kwargs
|
885
|
+
self.logger.info(
|
886
|
+
f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic"
|
769
887
|
)
|
770
888
|
|
889
|
+
final_mapped_df: pd.DataFrame
|
890
|
+
|
891
|
+
# If 'data_to_process' is already the summed DataFrame (from age_structures/centroid_within branch),
|
892
|
+
# use it directly.
|
893
|
+
if (
|
894
|
+
isinstance(data_to_process, pd.DataFrame)
|
895
|
+
and output_column in data_to_process.columns
|
896
|
+
and "poi_id" in data_to_process.columns
|
897
|
+
):
|
898
|
+
final_mapped_df = data_to_process
|
899
|
+
else:
|
900
|
+
# For other cases, proceed with the original call to map_zonal_stats
|
901
|
+
final_mapped_df = self.map_zonal_stats(
|
902
|
+
data=data_to_process,
|
903
|
+
stat="sum",
|
904
|
+
map_radius_meters=map_radius_meters,
|
905
|
+
value_column="pixel_value",
|
906
|
+
predicate=predicate,
|
907
|
+
output_column=output_column,
|
908
|
+
**kwargs,
|
909
|
+
)
|
910
|
+
self._update_view(
|
911
|
+
final_mapped_df
|
912
|
+
) # Update the view with the final mapped DataFrame
|
913
|
+
return self.view
|
914
|
+
|
771
915
|
def save_view(
|
772
916
|
self,
|
773
917
|
name: str,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from pydantic import BaseModel, Field
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Dict, List, Optional, Union,
|
4
|
+
from typing import Dict, List, Optional, Union, TypeVar, Generic
|
5
5
|
from shapely.geometry import Polygon
|
6
6
|
|
7
7
|
import geopandas as gpd
|
@@ -16,10 +16,8 @@ from gigaspatial.processing.geo import (
|
|
16
16
|
aggregate_polygons_to_zones,
|
17
17
|
aggregate_points_to_zones,
|
18
18
|
)
|
19
|
-
from gigaspatial.processing.tif_processor import
|
20
|
-
|
21
|
-
sample_multiple_tifs_by_polygons,
|
22
|
-
)
|
19
|
+
from gigaspatial.processing.tif_processor import TifProcessor
|
20
|
+
|
23
21
|
from functools import lru_cache
|
24
22
|
import logging
|
25
23
|
|
@@ -209,8 +207,6 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
209
207
|
aggregation: Union[str, Dict[str, str]] = "count",
|
210
208
|
predicate: str = "within",
|
211
209
|
output_suffix: str = "",
|
212
|
-
mapping_function: Optional[Callable] = None,
|
213
|
-
**mapping_kwargs,
|
214
210
|
) -> Dict:
|
215
211
|
"""Map point data to zones with spatial aggregation.
|
216
212
|
|
@@ -228,18 +224,12 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
228
224
|
predicate (str): Spatial predicate for point-to-zone relationship.
|
229
225
|
Options include "within", "intersects", "contains". Defaults to "within".
|
230
226
|
output_suffix (str): Suffix to add to output column names. Defaults to empty string.
|
231
|
-
mapping_function (Callable, optional): Custom function for mapping points to zones.
|
232
|
-
If provided, signature should be mapping_function(self, points, **mapping_kwargs).
|
233
|
-
When used, all other parameters except mapping_kwargs are ignored.
|
234
|
-
**mapping_kwargs: Additional keyword arguments passed to the mapping function.
|
235
227
|
|
236
228
|
Returns:
|
237
229
|
Dict: Dictionary with zone IDs as keys and aggregated values as values.
|
238
230
|
If value_columns is None, returns point counts per zone.
|
239
231
|
If value_columns is specified, returns aggregated values per zone.
|
240
232
|
"""
|
241
|
-
if mapping_function is not None:
|
242
|
-
return mapping_function(self, points, **mapping_kwargs)
|
243
233
|
|
244
234
|
self.logger.warning(
|
245
235
|
"Using default points mapping implementation. Consider creating a specialized mapping function."
|
@@ -382,40 +372,63 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
382
372
|
|
383
373
|
def map_rasters(
|
384
374
|
self,
|
385
|
-
|
386
|
-
mapping_function: Optional[Callable] = None,
|
375
|
+
raster_data: Union[TifProcessor, List[TifProcessor]],
|
387
376
|
stat: str = "mean",
|
388
|
-
**
|
389
|
-
) ->
|
377
|
+
**kwargs,
|
378
|
+
) -> Dict:
|
390
379
|
"""Map raster data to zones using zonal statistics.
|
391
380
|
|
392
381
|
Samples raster values within each zone and computes statistics. Automatically
|
393
382
|
handles coordinate reference system transformations between raster and zone data.
|
394
383
|
|
395
384
|
Args:
|
396
|
-
|
397
|
-
|
385
|
+
raster_data (Union[TifProcessor, List[TifProcessor]]):
|
386
|
+
Either a TifProcessor object or a list of TifProcessor objects (which will be merged
|
387
|
+
into a single TifProcessor for processing).
|
398
388
|
mapping_function (Callable, optional): Custom function for mapping rasters
|
399
389
|
to zones. If provided, signature should be mapping_function(self, tif_processors, **mapping_kwargs).
|
400
390
|
When used, stat and other parameters except mapping_kwargs are ignored.
|
401
391
|
stat (str): Statistic to calculate when aggregating raster values within
|
402
392
|
each zone. Options include "mean", "sum", "min", "max", "std", etc.
|
403
393
|
Defaults to "mean".
|
404
|
-
**mapping_kwargs: Additional keyword arguments
|
394
|
+
**mapping_kwargs: Additional keyword arguments for raster data.
|
405
395
|
|
406
396
|
Returns:
|
407
|
-
|
408
|
-
with
|
409
|
-
Custom mapping functions may return different data structures.
|
397
|
+
Dict: By default, returns a dictionary of sampled values
|
398
|
+
with zone IDs as keys.
|
410
399
|
|
411
400
|
Note:
|
412
401
|
If the coordinate reference system of the rasters differs from the zones,
|
413
402
|
the zone geometries will be automatically transformed to match the raster CRS.
|
414
403
|
"""
|
415
|
-
|
416
|
-
|
404
|
+
raster_processor: Optional[TifProcessor] = None
|
405
|
+
|
406
|
+
if isinstance(raster_data, TifProcessor):
|
407
|
+
raster_processor = raster_data
|
408
|
+
elif isinstance(raster_data, list) and all(
|
409
|
+
isinstance(x, TifProcessor) for x in raster_data
|
410
|
+
):
|
411
|
+
if not raster_data:
|
412
|
+
self.logger.info("No valid raster data provided")
|
413
|
+
return self.view
|
414
|
+
|
415
|
+
if len(raster_data) > 1:
|
416
|
+
all_source_paths = [tp.dataset_path for tp in raster_data]
|
417
|
+
|
418
|
+
self.logger.info(
|
419
|
+
f"Merging {len(all_source_paths)} rasters into a single TifProcessor for zonal statistics."
|
420
|
+
)
|
421
|
+
raster_processor = TifProcessor(
|
422
|
+
dataset_path=all_source_paths, data_store=self.data_store, **kwargs
|
423
|
+
)
|
424
|
+
else:
|
425
|
+
raster_processor = raster_data[0]
|
426
|
+
else:
|
427
|
+
raise ValueError(
|
428
|
+
"raster_data must be a TifProcessor object or a list of TifProcessor objects."
|
429
|
+
)
|
417
430
|
|
418
|
-
raster_crs =
|
431
|
+
raster_crs = raster_processor.crs
|
419
432
|
|
420
433
|
if raster_crs != self.zone_gdf.crs:
|
421
434
|
self.logger.info(f"Projecting zones to raster CRS: {raster_crs}")
|
@@ -424,8 +437,8 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
424
437
|
zone_geoms = self.get_zonal_geometries()
|
425
438
|
|
426
439
|
# Sample raster values
|
427
|
-
sampled_values =
|
428
|
-
|
440
|
+
sampled_values = raster_processor.sample_by_polygons(
|
441
|
+
polygon_list=zone_geoms, stat=stat
|
429
442
|
)
|
430
443
|
|
431
444
|
zone_ids = self.get_zone_identifiers()
|