giga-spatial 0.6.9__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/METADATA +30 -4
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/RECORD +22 -20
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +1 -0
- gigaspatial/core/io/adls_data_store.py +104 -11
- gigaspatial/core/io/local_data_store.py +8 -0
- gigaspatial/generators/poi.py +226 -82
- gigaspatial/generators/zonal/base.py +41 -28
- gigaspatial/generators/zonal/geometry.py +91 -41
- gigaspatial/grid/h3.py +417 -0
- gigaspatial/grid/mercator_tiles.py +1 -1
- gigaspatial/handlers/base.py +22 -8
- gigaspatial/handlers/ghsl.py +22 -8
- gigaspatial/handlers/giga.py +9 -4
- gigaspatial/handlers/healthsites.py +350 -0
- gigaspatial/handlers/osm.py +325 -105
- gigaspatial/handlers/worldpop.py +228 -9
- gigaspatial/processing/geo.py +11 -6
- gigaspatial/processing/tif_processor.py +1183 -496
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.9.dist-info → giga_spatial-0.7.1.dist-info}/top_level.txt +0 -0
gigaspatial/handlers/worldpop.py
CHANGED
@@ -19,6 +19,8 @@ from shapely.geometry.base import BaseGeometry
|
|
19
19
|
from shapely.geometry import Point
|
20
20
|
from tqdm import tqdm
|
21
21
|
import logging
|
22
|
+
import zipfile
|
23
|
+
import tempfile
|
22
24
|
|
23
25
|
from gigaspatial.core.io.data_store import DataStore
|
24
26
|
from gigaspatial.processing.tif_processor import TifProcessor
|
@@ -563,6 +565,132 @@ class WPPopulationConfig(BaseHandlerConfig):
|
|
563
565
|
"""
|
564
566
|
return self.base_path / unit.split("GIS/")[1]
|
565
567
|
|
568
|
+
def get_data_unit_paths(self, units: Union[List[str], str], **kwargs) -> list:
|
569
|
+
"""
|
570
|
+
Given WP file url(s), return the corresponding local file paths.
|
571
|
+
|
572
|
+
- For school_age age_structures (zip resources), if extracted .tif files are present
|
573
|
+
in the target directory, return those; otherwise, return the zip path(s) to allow
|
574
|
+
the downloader to fetch and extract them.
|
575
|
+
- For non-school_age age_structures (individual .tif URLs), you can filter by sex and age
|
576
|
+
using kwargs: sex, ages, min_age, max_age.
|
577
|
+
"""
|
578
|
+
if not isinstance(units, list):
|
579
|
+
units = [units]
|
580
|
+
|
581
|
+
# Extract optional filters
|
582
|
+
sex = kwargs.get("sex")
|
583
|
+
education_level = kwargs.get("education_level") or kwargs.get("level")
|
584
|
+
|
585
|
+
def _to_set(v):
|
586
|
+
if v is None:
|
587
|
+
return None
|
588
|
+
if isinstance(v, (list, tuple, set)):
|
589
|
+
return {str(x).upper() for x in v}
|
590
|
+
return {str(v).upper()}
|
591
|
+
|
592
|
+
sex_filters = _to_set(sex)
|
593
|
+
level_filters = _to_set(education_level)
|
594
|
+
|
595
|
+
# 1) School-age branch (zip → extracted tifs)
|
596
|
+
if self.project == "age_structures" and self.school_age:
|
597
|
+
resolved_paths: List[Path] = []
|
598
|
+
for url in units:
|
599
|
+
output_dir = self.get_data_unit_path(url).parent
|
600
|
+
|
601
|
+
if self.data_store.is_dir(str(output_dir)):
|
602
|
+
try:
|
603
|
+
for f in self.data_store.list_files(str(output_dir)):
|
604
|
+
if f.lower().endswith(".tif"):
|
605
|
+
p = Path(f)
|
606
|
+
name = p.name.upper()
|
607
|
+
# Apply filters on extracted tif names
|
608
|
+
if sex_filters:
|
609
|
+
# Explicit matching: F matches only F-only; M matches only M-only;
|
610
|
+
# F_M matches only combined. No implicit inclusion of combined for F or M.
|
611
|
+
is_combined = "_F_M_" in name
|
612
|
+
is_f_only = ("_F_" in name) and not is_combined
|
613
|
+
is_m_only = ("_M_" in name) and not is_combined
|
614
|
+
|
615
|
+
wants_f = "F" in sex_filters
|
616
|
+
wants_m = "M" in sex_filters
|
617
|
+
wants_both = "F_M" in sex_filters
|
618
|
+
|
619
|
+
sex_ok = (
|
620
|
+
(wants_both and is_combined)
|
621
|
+
or (wants_f and is_f_only)
|
622
|
+
or (wants_m and is_m_only)
|
623
|
+
)
|
624
|
+
if not sex_ok:
|
625
|
+
continue
|
626
|
+
if level_filters:
|
627
|
+
if not any(lvl in name for lvl in level_filters):
|
628
|
+
continue
|
629
|
+
resolved_paths.append(p)
|
630
|
+
except Exception:
|
631
|
+
resolved_paths.append(self.get_data_unit_path(url))
|
632
|
+
else:
|
633
|
+
resolved_paths.append(self.get_data_unit_path(url))
|
634
|
+
|
635
|
+
return resolved_paths
|
636
|
+
|
637
|
+
# 2) Non-school_age age_structures (individual tif URLs) with sex/age filters
|
638
|
+
if self.project == "age_structures" and not self.school_age:
|
639
|
+
# optional filters
|
640
|
+
sex_filters = _to_set(kwargs.get("sex"))
|
641
|
+
ages_filter = kwargs.get("ages")
|
642
|
+
min_age = kwargs.get("min_age")
|
643
|
+
max_age = kwargs.get("max_age")
|
644
|
+
|
645
|
+
if ages_filter is not None and not isinstance(
|
646
|
+
ages_filter, (list, tuple, set)
|
647
|
+
):
|
648
|
+
ages_filter = {int(ages_filter)}
|
649
|
+
elif isinstance(ages_filter, (list, tuple, set)):
|
650
|
+
ages_filter = {int(x) for x in ages_filter}
|
651
|
+
|
652
|
+
def _parse_meta(u: str):
|
653
|
+
# Expected basename pattern: ISO3_SEX_AGE_YEAR.tif
|
654
|
+
# e.g., RWA_F_25_2020.tif (case-insensitive possible)
|
655
|
+
bn = os.path.basename(u)
|
656
|
+
stem = os.path.splitext(bn)[0]
|
657
|
+
parts = stem.split("_")
|
658
|
+
# Be defensive about various casings/orderings
|
659
|
+
# Heuristic: country(0), sex(1), age(2), year(3+)
|
660
|
+
if len(parts) >= 4:
|
661
|
+
sex_val = parts[1].upper()
|
662
|
+
try:
|
663
|
+
age_val = int(parts[2])
|
664
|
+
except Exception:
|
665
|
+
age_val = None
|
666
|
+
else:
|
667
|
+
sex_val, age_val = None, None
|
668
|
+
return sex_val, age_val
|
669
|
+
|
670
|
+
filtered_units = []
|
671
|
+
for u in units:
|
672
|
+
sex_val, age_val = _parse_meta(u)
|
673
|
+
|
674
|
+
# sex filter
|
675
|
+
if sex_filters and sex_val not in sex_filters:
|
676
|
+
continue
|
677
|
+
|
678
|
+
# age filters: ages exact, or min/max bounds
|
679
|
+
if age_val is not None:
|
680
|
+
if ages_filter is not None and age_val not in ages_filter:
|
681
|
+
continue
|
682
|
+
if min_age is not None and age_val < int(min_age):
|
683
|
+
continue
|
684
|
+
if max_age is not None and age_val > int(max_age):
|
685
|
+
continue
|
686
|
+
|
687
|
+
filtered_units.append(u)
|
688
|
+
|
689
|
+
return [self.get_data_unit_path(unit) for unit in filtered_units]
|
690
|
+
|
691
|
+
# Default behavior
|
692
|
+
return [self.get_data_unit_path(unit) for unit in units]
|
693
|
+
|
566
694
|
def __repr__(self) -> str:
|
567
695
|
|
568
696
|
return (
|
@@ -601,7 +729,70 @@ class WPPopulationDownloader(BaseHandlerDownloader):
|
|
601
729
|
super().__init__(config=config, data_store=data_store, logger=logger)
|
602
730
|
|
603
731
|
def download_data_unit(self, url, **kwargs):
|
604
|
-
"""Download data file for a url."""
|
732
|
+
"""Download data file for a url. If a zip, extract contained .tif files."""
|
733
|
+
# If the resource is a zip (e.g., school age datasets), download to temp and extract .tif files
|
734
|
+
if url.lower().endswith(".zip"):
|
735
|
+
temp_downloaded_path: Optional[Path] = None
|
736
|
+
try:
|
737
|
+
with tempfile.NamedTemporaryFile(
|
738
|
+
delete=False, suffix=".zip"
|
739
|
+
) as temp_file:
|
740
|
+
temp_downloaded_path = Path(temp_file.name)
|
741
|
+
response = self.config.client.session.get(
|
742
|
+
url, stream=True, timeout=self.config.client.timeout
|
743
|
+
)
|
744
|
+
response.raise_for_status()
|
745
|
+
|
746
|
+
total_size = int(response.headers.get("content-length", 0))
|
747
|
+
|
748
|
+
with tqdm(
|
749
|
+
total=total_size,
|
750
|
+
unit="B",
|
751
|
+
unit_scale=True,
|
752
|
+
desc=f"Downloading {os.path.basename(temp_downloaded_path)}",
|
753
|
+
) as pbar:
|
754
|
+
for chunk in response.iter_content(chunk_size=8192):
|
755
|
+
if chunk:
|
756
|
+
temp_file.write(chunk)
|
757
|
+
pbar.update(len(chunk))
|
758
|
+
|
759
|
+
extracted_files: List[Path] = []
|
760
|
+
output_dir = self.config.get_data_unit_path(url).parent
|
761
|
+
with zipfile.ZipFile(str(temp_downloaded_path), "r") as zip_ref:
|
762
|
+
members = [
|
763
|
+
m for m in zip_ref.namelist() if m.lower().endswith(".tif")
|
764
|
+
]
|
765
|
+
for member in members:
|
766
|
+
extracted_path = output_dir / Path(member).name
|
767
|
+
with zip_ref.open(member) as source:
|
768
|
+
file_content = source.read()
|
769
|
+
self.data_store.write_file(
|
770
|
+
str(extracted_path), file_content
|
771
|
+
)
|
772
|
+
extracted_files.append(extracted_path)
|
773
|
+
self.logger.info(f"Extracted {member} to {extracted_path}")
|
774
|
+
|
775
|
+
return extracted_files
|
776
|
+
|
777
|
+
except requests.RequestException as e:
|
778
|
+
self.logger.error(f"Failed to download {url}: {e}")
|
779
|
+
return None
|
780
|
+
except zipfile.BadZipFile:
|
781
|
+
self.logger.error("Downloaded file is not a valid zip archive.")
|
782
|
+
return None
|
783
|
+
except Exception as e:
|
784
|
+
self.logger.error(f"Unexpected error processing zip for {url}: {e}")
|
785
|
+
return None
|
786
|
+
finally:
|
787
|
+
if temp_downloaded_path and temp_downloaded_path.exists():
|
788
|
+
try:
|
789
|
+
temp_downloaded_path.unlink()
|
790
|
+
except OSError as e:
|
791
|
+
self.logger.warning(
|
792
|
+
f"Could not delete temporary file {temp_downloaded_path}: {e}"
|
793
|
+
)
|
794
|
+
|
795
|
+
# Otherwise, download as a regular file (e.g., .tif)
|
605
796
|
try:
|
606
797
|
response = self.config.client.session.get(
|
607
798
|
url, stream=True, timeout=self.config.client.timeout
|
@@ -636,12 +827,13 @@ class WPPopulationDownloader(BaseHandlerDownloader):
|
|
636
827
|
def download_data_units(
|
637
828
|
self,
|
638
829
|
urls: List[str],
|
830
|
+
**kwargs,
|
639
831
|
) -> List[str]:
|
640
832
|
"""Download data files for multiple urls."""
|
641
833
|
|
642
834
|
with multiprocessing.Pool(self.config.n_workers) as pool:
|
643
835
|
download_func = functools.partial(self.download_data_unit)
|
644
|
-
|
836
|
+
results = list(
|
645
837
|
tqdm(
|
646
838
|
pool.imap(download_func, urls),
|
647
839
|
total=len(urls),
|
@@ -649,7 +841,17 @@ class WPPopulationDownloader(BaseHandlerDownloader):
|
|
649
841
|
)
|
650
842
|
)
|
651
843
|
|
652
|
-
|
844
|
+
# Flatten results and filter out None
|
845
|
+
flattened: List[Path] = []
|
846
|
+
for item in results:
|
847
|
+
if item is None:
|
848
|
+
continue
|
849
|
+
if isinstance(item, list):
|
850
|
+
flattened.extend(item)
|
851
|
+
else:
|
852
|
+
flattened.append(item)
|
853
|
+
|
854
|
+
return flattened
|
653
855
|
|
654
856
|
def download(self, source: str, **kwargs) -> List[str]:
|
655
857
|
"""Download data for a source"""
|
@@ -681,16 +883,27 @@ class WPPopulationReader(BaseHandlerReader):
|
|
681
883
|
super().__init__(config=config, data_store=data_store, logger=logger)
|
682
884
|
|
683
885
|
def load_from_paths(
|
684
|
-
self,
|
685
|
-
|
886
|
+
self,
|
887
|
+
source_data_path: List[Union[str, Path]],
|
888
|
+
merge_rasters: bool = False,
|
889
|
+
**kwargs,
|
890
|
+
) -> Union[List[TifProcessor], TifProcessor]:
|
686
891
|
"""
|
687
892
|
Load TifProcessors of WP datasets.
|
688
893
|
Args:
|
689
894
|
source_data_path: List of file paths to load
|
895
|
+
merge_rasters: If True, all rasters will be merged into a single TifProcessor.
|
896
|
+
Defaults to False.
|
690
897
|
Returns:
|
691
|
-
List[TifProcessor]: List of TifProcessor objects for accessing the raster data
|
898
|
+
Union[List[TifProcessor], TifProcessor]: List of TifProcessor objects for accessing the raster data or a single
|
899
|
+
TifProcessor if merge_rasters is True.
|
692
900
|
"""
|
693
|
-
return self._load_raster_data(
|
901
|
+
return self._load_raster_data(
|
902
|
+
raster_paths=source_data_path, merge_rasters=merge_rasters
|
903
|
+
)
|
904
|
+
|
905
|
+
def load(self, source, merge_rasters: bool = False, **kwargs):
|
906
|
+
return super().load(source=source, merge_rasters=merge_rasters, **kwargs)
|
694
907
|
|
695
908
|
|
696
909
|
class WPPopulationHandler(BaseHandler):
|
@@ -822,8 +1035,11 @@ class WPPopulationHandler(BaseHandler):
|
|
822
1035
|
tif_processors = self.load_data(
|
823
1036
|
source=source, ensure_available=ensure_available, **kwargs
|
824
1037
|
)
|
1038
|
+
if isinstance(tif_processors, TifProcessor):
|
1039
|
+
return tif_processors.to_dataframe(**kwargs)
|
1040
|
+
|
825
1041
|
return pd.concat(
|
826
|
-
[tp.to_dataframe() for tp in tif_processors], ignore_index=True
|
1042
|
+
[tp.to_dataframe(**kwargs) for tp in tif_processors], ignore_index=True
|
827
1043
|
)
|
828
1044
|
|
829
1045
|
def load_into_geodataframe(
|
@@ -846,6 +1062,9 @@ class WPPopulationHandler(BaseHandler):
|
|
846
1062
|
tif_processors = self.load_data(
|
847
1063
|
source=source, ensure_available=ensure_available, **kwargs
|
848
1064
|
)
|
1065
|
+
if isinstance(tif_processors, TifProcessor):
|
1066
|
+
return tif_processors.to_geodataframe(**kwargs)
|
1067
|
+
|
849
1068
|
return pd.concat(
|
850
|
-
[tp.to_geodataframe() for tp in tif_processors], ignore_index=True
|
1069
|
+
[tp.to_geodataframe(**kwargs) for tp in tif_processors], ignore_index=True
|
851
1070
|
)
|
gigaspatial/processing/geo.py
CHANGED
@@ -232,7 +232,7 @@ def convert_to_geodataframe(
|
|
232
232
|
|
233
233
|
def buffer_geodataframe(
|
234
234
|
gdf: gpd.GeoDataFrame,
|
235
|
-
buffer_distance_meters: float,
|
235
|
+
buffer_distance_meters: Union[float, np.array, pd.Series],
|
236
236
|
cap_style: Literal["round", "square", "flat"] = "round",
|
237
237
|
copy=True,
|
238
238
|
) -> gpd.GeoDataFrame:
|
@@ -256,9 +256,6 @@ def buffer_geodataframe(
|
|
256
256
|
if not isinstance(gdf, gpd.GeoDataFrame):
|
257
257
|
raise TypeError("Input must be a GeoDataFrame")
|
258
258
|
|
259
|
-
if not isinstance(buffer_distance_meters, (float, int)):
|
260
|
-
raise TypeError("Buffer distance must be a number")
|
261
|
-
|
262
259
|
if cap_style not in ["round", "square", "flat"]:
|
263
260
|
raise ValueError("cap_style must be round, flat or square.")
|
264
261
|
|
@@ -283,7 +280,7 @@ def buffer_geodataframe(
|
|
283
280
|
# Transform to UTM, create buffer, and transform back
|
284
281
|
gdf_work = gdf_work.to_crs(utm_crs)
|
285
282
|
gdf_work["geometry"] = gdf_work["geometry"].buffer(
|
286
|
-
buffer_distance_meters, cap_style=cap_style
|
283
|
+
distance=buffer_distance_meters, cap_style=cap_style
|
287
284
|
)
|
288
285
|
gdf_work = gdf_work.to_crs(input_crs)
|
289
286
|
|
@@ -607,7 +604,7 @@ def aggregate_points_to_zones(
|
|
607
604
|
- Single string: Use same method for all columns ("count", "mean", "sum", "min", "max")
|
608
605
|
- Dict: Map column names to aggregation methods
|
609
606
|
point_zone_predicate (str): Spatial predicate for point-to-zone relationship
|
610
|
-
Options: "within", "intersects"
|
607
|
+
Options: "within", "intersects"
|
611
608
|
zone_id_column (str): Column in zones containing zone identifiers
|
612
609
|
output_suffix (str): Suffix to add to output column names
|
613
610
|
drop_geometry (bool): Whether to drop the geometry column from output
|
@@ -995,6 +992,14 @@ def aggregate_polygons_to_zones(
|
|
995
992
|
if missing_cols:
|
996
993
|
raise ValueError(f"Value columns not found in polygons data: {missing_cols}")
|
997
994
|
|
995
|
+
# Check for column name conflicts with zone_id_column
|
996
|
+
if zone_id_column in polygons_gdf.columns:
|
997
|
+
raise ValueError(
|
998
|
+
f"Column name conflict: polygons DataFrame contains column '{zone_id_column}' "
|
999
|
+
f"which conflicts with the zone identifier column. Please rename this column "
|
1000
|
+
f"in the polygons data to avoid confusion."
|
1001
|
+
)
|
1002
|
+
|
998
1003
|
# Ensure CRS match
|
999
1004
|
if polygons_gdf.crs != zones.crs:
|
1000
1005
|
polygons_gdf = polygons_gdf.to_crs(zones.crs)
|