giga-spatial 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
2
3
  from pathlib import Path
3
4
  from typing import Any, List, Optional, Union, Tuple, Callable, Iterable
4
5
  import pandas as pd
@@ -13,7 +14,6 @@ from gigaspatial.core.io.data_store import DataStore
13
14
  from gigaspatial.core.io.local_data_store import LocalDataStore
14
15
  from gigaspatial.core.io.readers import read_dataset
15
16
  from gigaspatial.processing.tif_processor import TifProcessor
16
- from dataclasses import dataclass, field
17
17
 
18
18
 
19
19
  @dataclass
@@ -584,6 +584,8 @@ class BaseHandler(ABC):
584
584
  bool: True if data is available after this operation
585
585
  """
586
586
  try:
587
+ data_units = None
588
+ data_paths = None
587
589
  # Resolve what data units are needed
588
590
  if hasattr(self.config, "get_relevant_data_units"):
589
591
  data_units = self.config.get_relevant_data_units(source, **kwargs)
@@ -606,11 +608,29 @@ class BaseHandler(ABC):
606
608
  if not missing_paths:
607
609
  self.logger.info("All required data is already available")
608
610
  return True
609
-
610
- # Download missing or all data
611
- if hasattr(self.config, "get_relevant_data_units"):
612
- data_units = self.config.get_relevant_data_units(source, **kwargs)
613
- self.downloader.download_data_units(data_units, **kwargs)
611
+ else:
612
+ # If force_download, treat all as missing
613
+ missing_paths = data_paths
614
+
615
+ if not missing_paths:
616
+ self.logger.info("No missing data to download.")
617
+ return True
618
+
619
+ # Download logic
620
+ if data_units is not None:
621
+ # Map data_units to their paths and select only those that are missing
622
+ unit_to_path = dict(zip(data_units, data_paths))
623
+ if force_download:
624
+ # Download all units if force_download
625
+ self.downloader.download_data_units(data_units, **kwargs)
626
+ else:
627
+ missing_units = [
628
+ unit
629
+ for unit, path in unit_to_path.items()
630
+ if path in missing_paths
631
+ ]
632
+ if missing_units:
633
+ self.downloader.download_data_units(missing_units, **kwargs)
614
634
  else:
615
635
  self.downloader.download(source, **kwargs)
616
636
 
@@ -291,6 +291,56 @@ class AdminBoundaries(BaseModel):
291
291
  country_code, admin_level, "geoBoundaries"
292
292
  )
293
293
 
294
+ @classmethod
295
+ def from_global_country_boundaries(cls, scale: str = "medium") -> "AdminBoundaries":
296
+ """
297
+ Load global country boundaries from Natural Earth Data.
298
+
299
+ Args:
300
+ scale (str): One of 'large', 'medium', 'small'.
301
+ - 'large' -> 10m
302
+ - 'medium' -> 50m
303
+ - 'small' -> 110m
304
+ Returns:
305
+ AdminBoundaries: All country boundaries at admin_level=0
306
+ """
307
+ scale_map = {
308
+ "large": "10m",
309
+ "medium": "50m",
310
+ "small": "110m",
311
+ }
312
+ if scale not in scale_map:
313
+ raise ValueError(
314
+ f"Invalid scale '{scale}'. Choose from 'large', 'medium', 'small'."
315
+ )
316
+ scale_folder = scale_map[scale]
317
+ url = f"https://naciscdn.org/naturalearth/{scale_folder}/cultural/ne_{scale_folder}_admin_0_countries.zip"
318
+ cls.logger.info(f"Loading Natural Earth global country boundaries from {url}")
319
+ try:
320
+ gdf = gpd.read_file(url)
321
+ # Map fields to AdminBoundary schema
322
+ boundaries = []
323
+ for _, row in gdf.iterrows():
324
+ iso_a3 = row.get("ISO_A3_EH") or row.get("ISO_A3") or row.get("ADM0_A3")
325
+ name = row.get("NAME") or row.get("ADMIN") or row.get("SOVEREIGNT")
326
+ geometry = row.get("geometry")
327
+ if not iso_a3 or not name or geometry is None:
328
+ continue
329
+ boundary = AdminBoundary(
330
+ id=iso_a3,
331
+ name=name,
332
+ geometry=geometry,
333
+ country_code=iso_a3,
334
+ )
335
+ boundaries.append(boundary)
336
+ cls.logger.info(
337
+ f"Loaded {len(boundaries)} country boundaries from Natural Earth."
338
+ )
339
+ return cls(boundaries=boundaries, level=0)
340
+ except Exception as e:
341
+ cls.logger.error(f"Failed to load Natural Earth global boundaries: {e}")
342
+ raise
343
+
294
344
  @classmethod
295
345
  def create(
296
346
  cls,
@@ -597,7 +597,7 @@ class GHSLDataReader(BaseHandlerReader):
597
597
  logger: Optional[logging.Logger] = None,
598
598
  ):
599
599
  """
600
- Initialize the downloader.
600
+ Initialize the reader.
601
601
 
602
602
  Args:
603
603
  config: Configuration for the GHSL dataset, either as a GHSLDataConfig object or a dictionary of parameters
@@ -804,7 +804,7 @@ class GHSLDataHandler(BaseHandler):
804
804
  return pd.concat(
805
805
  [tp.to_dataframe() for tp in tif_processors], ignore_index=True
806
806
  )
807
-
807
+
808
808
  def load_into_geodataframe(
809
809
  self,
810
810
  source: Union[
@@ -817,7 +817,7 @@ class GHSLDataHandler(BaseHandler):
817
817
  ],
818
818
  ensure_available: bool = True,
819
819
  **kwargs,
820
- ) -> pd.DataFrame:
820
+ ) -> gpd.GeoDataFrame:
821
821
  """
822
822
  Load GHSL data into a geopandas GeoDataFrame.
823
823
 
@@ -835,3 +835,15 @@ class GHSLDataHandler(BaseHandler):
835
835
  return pd.concat(
836
836
  [tp.to_geodataframe() for tp in tif_processors], ignore_index=True
837
837
  )
838
+
839
+ def get_available_data_info(
840
+ self,
841
+ source: Union[
842
+ str, # country
843
+ List[Union[tuple, Point]], # points
844
+ BaseGeometry, # geometry
845
+ gpd.GeoDataFrame, # geodataframe
846
+ ],
847
+ **kwargs,
848
+ ) -> dict:
849
+ return super().get_available_data_info(source, file_ext=".tif", **kwargs)