giga-spatial 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +2 -1
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/RECORD +17 -17
- gigaspatial/__init__.py +1 -1
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +4 -1
- gigaspatial/generators/poi.py +75 -12
- gigaspatial/generators/zonal/base.py +34 -35
- gigaspatial/generators/zonal/geometry.py +87 -32
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +50 -0
- gigaspatial/handlers/ghsl.py +15 -3
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/geo.py +127 -87
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
gigaspatial/handlers/base.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
+
from dataclasses import dataclass, field
|
2
3
|
from pathlib import Path
|
3
4
|
from typing import Any, List, Optional, Union, Tuple, Callable, Iterable
|
4
5
|
import pandas as pd
|
@@ -13,7 +14,6 @@ from gigaspatial.core.io.data_store import DataStore
|
|
13
14
|
from gigaspatial.core.io.local_data_store import LocalDataStore
|
14
15
|
from gigaspatial.core.io.readers import read_dataset
|
15
16
|
from gigaspatial.processing.tif_processor import TifProcessor
|
16
|
-
from dataclasses import dataclass, field
|
17
17
|
|
18
18
|
|
19
19
|
@dataclass
|
@@ -584,6 +584,8 @@ class BaseHandler(ABC):
|
|
584
584
|
bool: True if data is available after this operation
|
585
585
|
"""
|
586
586
|
try:
|
587
|
+
data_units = None
|
588
|
+
data_paths = None
|
587
589
|
# Resolve what data units are needed
|
588
590
|
if hasattr(self.config, "get_relevant_data_units"):
|
589
591
|
data_units = self.config.get_relevant_data_units(source, **kwargs)
|
@@ -606,11 +608,29 @@ class BaseHandler(ABC):
|
|
606
608
|
if not missing_paths:
|
607
609
|
self.logger.info("All required data is already available")
|
608
610
|
return True
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
611
|
+
else:
|
612
|
+
# If force_download, treat all as missing
|
613
|
+
missing_paths = data_paths
|
614
|
+
|
615
|
+
if not missing_paths:
|
616
|
+
self.logger.info("No missing data to download.")
|
617
|
+
return True
|
618
|
+
|
619
|
+
# Download logic
|
620
|
+
if data_units is not None:
|
621
|
+
# Map data_units to their paths and select only those that are missing
|
622
|
+
unit_to_path = dict(zip(data_units, data_paths))
|
623
|
+
if force_download:
|
624
|
+
# Download all units if force_download
|
625
|
+
self.downloader.download_data_units(data_units, **kwargs)
|
626
|
+
else:
|
627
|
+
missing_units = [
|
628
|
+
unit
|
629
|
+
for unit, path in unit_to_path.items()
|
630
|
+
if path in missing_paths
|
631
|
+
]
|
632
|
+
if missing_units:
|
633
|
+
self.downloader.download_data_units(missing_units, **kwargs)
|
614
634
|
else:
|
615
635
|
self.downloader.download(source, **kwargs)
|
616
636
|
|
@@ -291,6 +291,56 @@ class AdminBoundaries(BaseModel):
|
|
291
291
|
country_code, admin_level, "geoBoundaries"
|
292
292
|
)
|
293
293
|
|
294
|
+
@classmethod
|
295
|
+
def from_global_country_boundaries(cls, scale: str = "medium") -> "AdminBoundaries":
|
296
|
+
"""
|
297
|
+
Load global country boundaries from Natural Earth Data.
|
298
|
+
|
299
|
+
Args:
|
300
|
+
scale (str): One of 'large', 'medium', 'small'.
|
301
|
+
- 'large' -> 10m
|
302
|
+
- 'medium' -> 50m
|
303
|
+
- 'small' -> 110m
|
304
|
+
Returns:
|
305
|
+
AdminBoundaries: All country boundaries at admin_level=0
|
306
|
+
"""
|
307
|
+
scale_map = {
|
308
|
+
"large": "10m",
|
309
|
+
"medium": "50m",
|
310
|
+
"small": "110m",
|
311
|
+
}
|
312
|
+
if scale not in scale_map:
|
313
|
+
raise ValueError(
|
314
|
+
f"Invalid scale '{scale}'. Choose from 'large', 'medium', 'small'."
|
315
|
+
)
|
316
|
+
scale_folder = scale_map[scale]
|
317
|
+
url = f"https://naciscdn.org/naturalearth/{scale_folder}/cultural/ne_{scale_folder}_admin_0_countries.zip"
|
318
|
+
cls.logger.info(f"Loading Natural Earth global country boundaries from {url}")
|
319
|
+
try:
|
320
|
+
gdf = gpd.read_file(url)
|
321
|
+
# Map fields to AdminBoundary schema
|
322
|
+
boundaries = []
|
323
|
+
for _, row in gdf.iterrows():
|
324
|
+
iso_a3 = row.get("ISO_A3_EH") or row.get("ISO_A3") or row.get("ADM0_A3")
|
325
|
+
name = row.get("NAME") or row.get("ADMIN") or row.get("SOVEREIGNT")
|
326
|
+
geometry = row.get("geometry")
|
327
|
+
if not iso_a3 or not name or geometry is None:
|
328
|
+
continue
|
329
|
+
boundary = AdminBoundary(
|
330
|
+
id=iso_a3,
|
331
|
+
name=name,
|
332
|
+
geometry=geometry,
|
333
|
+
country_code=iso_a3,
|
334
|
+
)
|
335
|
+
boundaries.append(boundary)
|
336
|
+
cls.logger.info(
|
337
|
+
f"Loaded {len(boundaries)} country boundaries from Natural Earth."
|
338
|
+
)
|
339
|
+
return cls(boundaries=boundaries, level=0)
|
340
|
+
except Exception as e:
|
341
|
+
cls.logger.error(f"Failed to load Natural Earth global boundaries: {e}")
|
342
|
+
raise
|
343
|
+
|
294
344
|
@classmethod
|
295
345
|
def create(
|
296
346
|
cls,
|
gigaspatial/handlers/ghsl.py
CHANGED
@@ -597,7 +597,7 @@ class GHSLDataReader(BaseHandlerReader):
|
|
597
597
|
logger: Optional[logging.Logger] = None,
|
598
598
|
):
|
599
599
|
"""
|
600
|
-
Initialize the
|
600
|
+
Initialize the reader.
|
601
601
|
|
602
602
|
Args:
|
603
603
|
config: Configuration for the GHSL dataset, either as a GHSLDataConfig object or a dictionary of parameters
|
@@ -804,7 +804,7 @@ class GHSLDataHandler(BaseHandler):
|
|
804
804
|
return pd.concat(
|
805
805
|
[tp.to_dataframe() for tp in tif_processors], ignore_index=True
|
806
806
|
)
|
807
|
-
|
807
|
+
|
808
808
|
def load_into_geodataframe(
|
809
809
|
self,
|
810
810
|
source: Union[
|
@@ -817,7 +817,7 @@ class GHSLDataHandler(BaseHandler):
|
|
817
817
|
],
|
818
818
|
ensure_available: bool = True,
|
819
819
|
**kwargs,
|
820
|
-
) ->
|
820
|
+
) -> gpd.GeoDataFrame:
|
821
821
|
"""
|
822
822
|
Load GHSL data into a geopandas GeoDataFrame.
|
823
823
|
|
@@ -835,3 +835,15 @@ class GHSLDataHandler(BaseHandler):
|
|
835
835
|
return pd.concat(
|
836
836
|
[tp.to_geodataframe() for tp in tif_processors], ignore_index=True
|
837
837
|
)
|
838
|
+
|
839
|
+
def get_available_data_info(
|
840
|
+
self,
|
841
|
+
source: Union[
|
842
|
+
str, # country
|
843
|
+
List[Union[tuple, Point]], # points
|
844
|
+
BaseGeometry, # geometry
|
845
|
+
gpd.GeoDataFrame, # geodataframe
|
846
|
+
],
|
847
|
+
**kwargs,
|
848
|
+
) -> dict:
|
849
|
+
return super().get_available_data_info(source, file_ext=".tif", **kwargs)
|