giga-spatial 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +3 -1
- giga_spatial-0.6.6.dist-info/RECORD +50 -0
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +29 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +319 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +300 -52
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +237 -81
- gigaspatial/generators/zonal/geometry.py +151 -53
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +93 -18
- gigaspatial/handlers/ghsl.py +92 -15
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +204 -102
- gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.4.dist-info/RECORD +0 -47
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@ import pycountry
|
|
10
10
|
from gigaspatial.core.io.data_store import DataStore
|
11
11
|
from gigaspatial.core.io.readers import read_dataset
|
12
12
|
from gigaspatial.handlers.hdx import HDXConfig
|
13
|
-
from gigaspatial.config import config
|
13
|
+
from gigaspatial.config import config as global_config
|
14
14
|
|
15
15
|
|
16
16
|
class AdminBoundary(BaseModel):
|
@@ -33,7 +33,6 @@ class AdminBoundary(BaseModel):
|
|
33
33
|
)
|
34
34
|
|
35
35
|
class Config:
|
36
|
-
# extra = "allow"
|
37
36
|
arbitrary_types_allowed = True
|
38
37
|
|
39
38
|
|
@@ -48,7 +47,7 @@ class AdminBoundaries(BaseModel):
|
|
48
47
|
description="Administrative level (e.g., 0=country, 1=state, etc.)",
|
49
48
|
)
|
50
49
|
|
51
|
-
logger: ClassVar =
|
50
|
+
logger: ClassVar = global_config.get_logger("AdminBoundaries")
|
52
51
|
|
53
52
|
_schema_config: ClassVar[Dict[str, Dict[str, str]]] = {
|
54
53
|
"gadm": {
|
@@ -292,6 +291,56 @@ class AdminBoundaries(BaseModel):
|
|
292
291
|
country_code, admin_level, "geoBoundaries"
|
293
292
|
)
|
294
293
|
|
294
|
+
@classmethod
|
295
|
+
def from_global_country_boundaries(cls, scale: str = "medium") -> "AdminBoundaries":
|
296
|
+
"""
|
297
|
+
Load global country boundaries from Natural Earth Data.
|
298
|
+
|
299
|
+
Args:
|
300
|
+
scale (str): One of 'large', 'medium', 'small'.
|
301
|
+
- 'large' -> 10m
|
302
|
+
- 'medium' -> 50m
|
303
|
+
- 'small' -> 110m
|
304
|
+
Returns:
|
305
|
+
AdminBoundaries: All country boundaries at admin_level=0
|
306
|
+
"""
|
307
|
+
scale_map = {
|
308
|
+
"large": "10m",
|
309
|
+
"medium": "50m",
|
310
|
+
"small": "110m",
|
311
|
+
}
|
312
|
+
if scale not in scale_map:
|
313
|
+
raise ValueError(
|
314
|
+
f"Invalid scale '{scale}'. Choose from 'large', 'medium', 'small'."
|
315
|
+
)
|
316
|
+
scale_folder = scale_map[scale]
|
317
|
+
url = f"https://naciscdn.org/naturalearth/{scale_folder}/cultural/ne_{scale_folder}_admin_0_countries.zip"
|
318
|
+
cls.logger.info(f"Loading Natural Earth global country boundaries from {url}")
|
319
|
+
try:
|
320
|
+
gdf = gpd.read_file(url)
|
321
|
+
# Map fields to AdminBoundary schema
|
322
|
+
boundaries = []
|
323
|
+
for _, row in gdf.iterrows():
|
324
|
+
iso_a3 = row.get("ISO_A3_EH") or row.get("ISO_A3") or row.get("ADM0_A3")
|
325
|
+
name = row.get("NAME") or row.get("ADMIN") or row.get("SOVEREIGNT")
|
326
|
+
geometry = row.get("geometry")
|
327
|
+
if not iso_a3 or not name or geometry is None:
|
328
|
+
continue
|
329
|
+
boundary = AdminBoundary(
|
330
|
+
id=iso_a3,
|
331
|
+
name=name,
|
332
|
+
geometry=geometry,
|
333
|
+
country_code=iso_a3,
|
334
|
+
)
|
335
|
+
boundaries.append(boundary)
|
336
|
+
cls.logger.info(
|
337
|
+
f"Loaded {len(boundaries)} country boundaries from Natural Earth."
|
338
|
+
)
|
339
|
+
return cls(boundaries=boundaries, level=0)
|
340
|
+
except Exception as e:
|
341
|
+
cls.logger.error(f"Failed to load Natural Earth global boundaries: {e}")
|
342
|
+
raise
|
343
|
+
|
295
344
|
@classmethod
|
296
345
|
def create(
|
297
346
|
cls,
|
@@ -301,28 +350,50 @@ class AdminBoundaries(BaseModel):
|
|
301
350
|
path: Optional[Union[str, "Path"]] = None,
|
302
351
|
**kwargs,
|
303
352
|
) -> "AdminBoundaries":
|
304
|
-
"""
|
353
|
+
"""
|
354
|
+
Factory method to create an AdminBoundaries instance using various data sources,
|
355
|
+
depending on the provided parameters and global configuration.
|
356
|
+
|
357
|
+
Loading Logic:
|
358
|
+
1. If a `data_store` is provided and either a `path` is given or
|
359
|
+
`global_config.ADMIN_BOUNDARIES_DATA_DIR` is set:
|
360
|
+
- If `path` is not provided but `country_code` is, the path is constructed
|
361
|
+
using `global_config.get_admin_path()`.
|
362
|
+
- Loads boundaries from the specified data store and path.
|
363
|
+
|
364
|
+
2. If only `country_code` is provided (no data_store):
|
365
|
+
- Attempts to load boundaries from GeoRepo (if available).
|
366
|
+
- If GeoRepo is unavailable, attempts to load from GADM.
|
367
|
+
- If GADM fails, falls back to geoBoundaries.
|
368
|
+
- Raises an error if all sources fail.
|
369
|
+
|
370
|
+
3. If neither `country_code` nor `data_store` is provided:
|
371
|
+
- Raises a ValueError.
|
305
372
|
|
306
373
|
Args:
|
307
|
-
country_code: ISO country code (2 or 3 letter) or country name
|
308
|
-
admin_level: Administrative level (0=country, 1=state/province, etc.)
|
309
|
-
data_store: Optional data store instance for loading from existing data
|
310
|
-
path: Optional path to data file (used with data_store)
|
311
|
-
**kwargs: Additional arguments passed to the underlying creation methods
|
374
|
+
country_code (Optional[str]): ISO country code (2 or 3 letter) or country name.
|
375
|
+
admin_level (int): Administrative level (0=country, 1=state/province, etc.).
|
376
|
+
data_store (Optional[DataStore]): Optional data store instance for loading from existing data.
|
377
|
+
path (Optional[Union[str, Path]]): Optional path to data file (used with data_store).
|
378
|
+
**kwargs: Additional arguments passed to the underlying creation methods.
|
312
379
|
|
313
380
|
Returns:
|
314
|
-
AdminBoundaries: Configured instance
|
381
|
+
AdminBoundaries: Configured instance.
|
315
382
|
|
316
383
|
Raises:
|
317
384
|
ValueError: If neither country_code nor (data_store, path) are provided,
|
318
|
-
|
385
|
+
or if country_code lookup fails.
|
386
|
+
RuntimeError: If all data sources fail to load boundaries.
|
319
387
|
|
320
|
-
|
321
|
-
#
|
322
|
-
boundaries = AdminBoundaries.create(country_code="USA", admin_level=1)
|
388
|
+
Examples:
|
389
|
+
# Load from a data store (path auto-generated if not provided)
|
390
|
+
boundaries = AdminBoundaries.create(country_code="USA", admin_level=1, data_store=store)
|
323
391
|
|
324
|
-
#
|
392
|
+
# Load from a specific file in a data store
|
325
393
|
boundaries = AdminBoundaries.create(data_store=store, path="data.shp")
|
394
|
+
|
395
|
+
# Load from online sources (GeoRepo, GADM, geoBoundaries)
|
396
|
+
boundaries = AdminBoundaries.create(country_code="USA", admin_level=1)
|
326
397
|
"""
|
327
398
|
cls.logger.info(
|
328
399
|
f"Creating AdminBoundaries instance. Country: {country_code}, "
|
@@ -330,17 +401,21 @@ class AdminBoundaries(BaseModel):
|
|
330
401
|
f"path provided: {path is not None}"
|
331
402
|
)
|
332
403
|
|
404
|
+
from_data_store = data_store is not None and (
|
405
|
+
global_config.ADMIN_BOUNDARIES_DATA_DIR is not None or path is not None
|
406
|
+
)
|
407
|
+
|
333
408
|
# Validate input parameters
|
334
409
|
if not country_code and not data_store:
|
335
410
|
raise ValueError("Either country_code or data_store must be provided.")
|
336
411
|
|
337
|
-
if
|
412
|
+
if from_data_store and not path and not country_code:
|
338
413
|
raise ValueError(
|
339
414
|
"If data_store is provided, either path or country_code must also be specified."
|
340
415
|
)
|
341
416
|
|
342
417
|
# Handle data store path first
|
343
|
-
if
|
418
|
+
if from_data_store:
|
344
419
|
iso3_code = None
|
345
420
|
if country_code:
|
346
421
|
try:
|
@@ -350,7 +425,7 @@ class AdminBoundaries(BaseModel):
|
|
350
425
|
|
351
426
|
# Generate path if not provided
|
352
427
|
if path is None and iso3_code:
|
353
|
-
path =
|
428
|
+
path = global_config.get_admin_path(
|
354
429
|
country_code=iso3_code,
|
355
430
|
admin_level=admin_level,
|
356
431
|
)
|
gigaspatial/handlers/ghsl.py
CHANGED
@@ -14,7 +14,6 @@ import requests
|
|
14
14
|
from tqdm import tqdm
|
15
15
|
import zipfile
|
16
16
|
import tempfile
|
17
|
-
import shutil
|
18
17
|
from pydantic import (
|
19
18
|
HttpUrl,
|
20
19
|
Field,
|
@@ -25,8 +24,6 @@ from pydantic import (
|
|
25
24
|
import logging
|
26
25
|
|
27
26
|
from gigaspatial.core.io.data_store import DataStore
|
28
|
-
from gigaspatial.core.io.local_data_store import LocalDataStore
|
29
|
-
from gigaspatial.handlers.boundaries import AdminBoundaries
|
30
27
|
from gigaspatial.processing.tif_processor import TifProcessor
|
31
28
|
from gigaspatial.handlers.base import (
|
32
29
|
BaseHandlerConfig,
|
@@ -241,8 +238,8 @@ class GHSLDataConfig(BaseHandlerConfig):
|
|
241
238
|
ValueError: If the input `source` is not one of the supported types.
|
242
239
|
"""
|
243
240
|
if isinstance(source, gpd.GeoDataFrame):
|
244
|
-
|
245
|
-
|
241
|
+
if source.crs != crs:
|
242
|
+
source = source.to_crs(crs)
|
246
243
|
search_geom = source.geometry.unary_union
|
247
244
|
elif isinstance(
|
248
245
|
source,
|
@@ -273,7 +270,9 @@ class GHSLDataConfig(BaseHandlerConfig):
|
|
273
270
|
tile_geom.intersects(search_geom) for tile_geom in self.tiles_gdf.geometry
|
274
271
|
)
|
275
272
|
|
276
|
-
|
273
|
+
intersecting_tiles = self.tiles_gdf.loc[mask, "tile_id"].to_list()
|
274
|
+
|
275
|
+
return intersecting_tiles
|
277
276
|
|
278
277
|
def _get_product_info(self) -> dict:
|
279
278
|
"""Generate and return common product information used in multiple methods."""
|
@@ -340,7 +339,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
340
339
|
|
341
340
|
Args:
|
342
341
|
tile_id: tile ID to process.
|
343
|
-
extract: If True and the downloaded file is a zip, extract its contents. Defaults to
|
342
|
+
extract: If True and the downloaded file is a zip, extract its contents. Defaults to True.
|
344
343
|
file_pattern: Optional regex pattern to filter extracted files (if extract=True).
|
345
344
|
**kwargs: Additional parameters passed to download methods
|
346
345
|
|
@@ -356,14 +355,34 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
356
355
|
return self._download_file(url, output_path)
|
357
356
|
|
358
357
|
extracted_files: List[Path] = []
|
358
|
+
temp_downloaded_path: Optional[Path] = None
|
359
359
|
|
360
360
|
try:
|
361
361
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as temp_file:
|
362
|
-
|
363
|
-
|
364
|
-
|
362
|
+
temp_downloaded_path = Path(temp_file.name)
|
363
|
+
self.logger.debug(
|
364
|
+
f"Downloading {url} to temporary file: {temp_downloaded_path}"
|
365
|
+
)
|
366
|
+
|
367
|
+
response = requests.get(url, stream=True)
|
368
|
+
response.raise_for_status()
|
369
|
+
|
370
|
+
total_size = int(response.headers.get("content-length", 0))
|
371
|
+
|
372
|
+
with tqdm(
|
373
|
+
total=total_size,
|
374
|
+
unit="B",
|
375
|
+
unit_scale=True,
|
376
|
+
desc=f"Downloading {tile_id}",
|
377
|
+
) as pbar:
|
378
|
+
for chunk in response.iter_content(chunk_size=8192):
|
379
|
+
if chunk:
|
380
|
+
temp_file.write(chunk)
|
381
|
+
pbar.update(len(chunk))
|
382
|
+
|
383
|
+
self.logger.info(f"Successfully downloaded temporary file!")
|
365
384
|
|
366
|
-
with zipfile.ZipFile(str(
|
385
|
+
with zipfile.ZipFile(str(temp_downloaded_path), "r") as zip_ref:
|
367
386
|
if file_pattern:
|
368
387
|
import re
|
369
388
|
|
@@ -385,9 +404,24 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
385
404
|
Path(temp_file.name).unlink()
|
386
405
|
return extracted_files
|
387
406
|
|
407
|
+
except requests.exceptions.RequestException as e:
|
408
|
+
self.logger.error(f"Failed to download {url} to temporary file: {e}")
|
409
|
+
return None
|
410
|
+
except zipfile.BadZipFile:
|
411
|
+
self.logger.error(f"Downloaded file for {tile_id} is not a valid zip file.")
|
412
|
+
return None
|
388
413
|
except Exception as e:
|
389
414
|
self.logger.error(f"Error downloading/extracting tile {tile_id}: {e}")
|
390
415
|
return None
|
416
|
+
finally:
|
417
|
+
if temp_downloaded_path and temp_downloaded_path.exists():
|
418
|
+
try:
|
419
|
+
temp_downloaded_path.unlink()
|
420
|
+
self.logger.debug(f"Deleted temporary file: {temp_downloaded_path}")
|
421
|
+
except OSError as e:
|
422
|
+
self.logger.warning(
|
423
|
+
f"Could not delete temporary file {temp_downloaded_path}: {e}"
|
424
|
+
)
|
391
425
|
|
392
426
|
def download_data_units(
|
393
427
|
self,
|
@@ -401,7 +435,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
401
435
|
|
402
436
|
Args:
|
403
437
|
tile_ids: A list of tile IDs to download.
|
404
|
-
extract: If True and the downloaded files are zips, extract their contents. Defaults to
|
438
|
+
extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
|
405
439
|
file_pattern: Optional regex pattern to filter extracted files (if extract=True).
|
406
440
|
**kwargs: Additional parameters passed to download methods
|
407
441
|
|
@@ -456,7 +490,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
456
490
|
- A list of (latitude, longitude) tuples or Shapely Point objects.
|
457
491
|
- A Shapely BaseGeometry object (e.g., Polygon, MultiPolygon).
|
458
492
|
- A GeoDataFrame with geometry column in EPSG:4326.
|
459
|
-
extract: If True and the downloaded files are zips, extract their contents. Defaults to
|
493
|
+
extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
|
460
494
|
file_pattern: Optional regex pattern to filter extracted files (if extract=True).
|
461
495
|
**kwargs: Additional keyword arguments. These will be passed down to
|
462
496
|
`AdminBoundaries.create()` (if `source` is a country)
|
@@ -496,7 +530,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
|
|
496
530
|
country_geom_path: Optional path to a GeoJSON file containing the
|
497
531
|
country boundary. If provided, this boundary is used
|
498
532
|
instead of the default from `AdminBoundaries`.
|
499
|
-
extract: If True and the downloaded files are zips, extract their contents. Defaults to
|
533
|
+
extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
|
500
534
|
file_pattern: Optional regex pattern to filter extracted files (if extract=True).
|
501
535
|
**kwargs: Additional keyword arguments that are passed to
|
502
536
|
`download_data_units`. For example, `extract` to download and extract.
|
@@ -563,7 +597,7 @@ class GHSLDataReader(BaseHandlerReader):
|
|
563
597
|
logger: Optional[logging.Logger] = None,
|
564
598
|
):
|
565
599
|
"""
|
566
|
-
Initialize the
|
600
|
+
Initialize the reader.
|
567
601
|
|
568
602
|
Args:
|
569
603
|
config: Configuration for the GHSL dataset, either as a GHSLDataConfig object or a dictionary of parameters
|
@@ -770,3 +804,46 @@ class GHSLDataHandler(BaseHandler):
|
|
770
804
|
return pd.concat(
|
771
805
|
[tp.to_dataframe() for tp in tif_processors], ignore_index=True
|
772
806
|
)
|
807
|
+
|
808
|
+
def load_into_geodataframe(
|
809
|
+
self,
|
810
|
+
source: Union[
|
811
|
+
str, # country
|
812
|
+
List[Union[tuple, Point]], # points
|
813
|
+
BaseGeometry, # geometry
|
814
|
+
gpd.GeoDataFrame, # geodataframe
|
815
|
+
Path, # path
|
816
|
+
List[Union[str, Path]], # list of paths
|
817
|
+
],
|
818
|
+
ensure_available: bool = True,
|
819
|
+
**kwargs,
|
820
|
+
) -> gpd.GeoDataFrame:
|
821
|
+
"""
|
822
|
+
Load GHSL data into a geopandas GeoDataFrame.
|
823
|
+
|
824
|
+
Args:
|
825
|
+
source: The data source specification
|
826
|
+
ensure_available: If True, ensure data is downloaded before loading
|
827
|
+
**kwargs: Additional parameters passed to load methods
|
828
|
+
|
829
|
+
Returns:
|
830
|
+
GeoDataFrame containing the GHSL data
|
831
|
+
"""
|
832
|
+
tif_processors = self.load_data(
|
833
|
+
source=source, ensure_available=ensure_available, **kwargs
|
834
|
+
)
|
835
|
+
return pd.concat(
|
836
|
+
[tp.to_geodataframe() for tp in tif_processors], ignore_index=True
|
837
|
+
)
|
838
|
+
|
839
|
+
def get_available_data_info(
|
840
|
+
self,
|
841
|
+
source: Union[
|
842
|
+
str, # country
|
843
|
+
List[Union[tuple, Point]], # points
|
844
|
+
BaseGeometry, # geometry
|
845
|
+
gpd.GeoDataFrame, # geodataframe
|
846
|
+
],
|
847
|
+
**kwargs,
|
848
|
+
) -> dict:
|
849
|
+
return super().get_available_data_info(source, file_ext=".tif", **kwargs)
|
gigaspatial/handlers/rwi.py
CHANGED
@@ -2,6 +2,7 @@ import logging
|
|
2
2
|
from typing import List, Optional, Union, Literal
|
3
3
|
from pydantic.dataclasses import dataclass
|
4
4
|
from datetime import datetime
|
5
|
+
import pycountry
|
5
6
|
|
6
7
|
from hdx.data.resource import Resource
|
7
8
|
|
@@ -36,8 +37,10 @@ class RWIConfig(HDXConfig):
|
|
36
37
|
self, country: str, **kwargs
|
37
38
|
) -> List[Resource]:
|
38
39
|
"""Get relevant data units for a country, optionally filtering for latest version"""
|
39
|
-
|
40
|
-
|
40
|
+
country = pycountry.countries.lookup(country)
|
41
|
+
values = [country.alpha_3]
|
42
|
+
resources = self.get_dataset_resources(
|
43
|
+
filter={"url": values},
|
41
44
|
)
|
42
45
|
|
43
46
|
if self.latest_only and len(resources) > 1:
|