giga-spatial 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,25 +15,48 @@ from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerato
15
15
 
16
16
 
17
17
  class MercatorViewGenerator(GeometryBasedZonalViewGenerator[T]):
18
- """Mid-level class for zonal view generation based on geometries with identifiers.
18
+ """
19
+ Generates zonal views using Mercator tiles as the zones.
19
20
 
20
- This class serves as an intermediate between the abstract ZonalViewGenerator and specific
21
- implementations like MercatorViewGenerator or H3ViewGenerator. It handles the common case
22
- where zones are defined by a mapping between zone identifiers and geometries, either
23
- provided as a dictionary or as a GeoDataFrame.
21
+ This class specializes in creating zonal views where the zones are defined by
22
+ Mercator tiles. It extends the `GeometryBasedZonalViewGenerator` and leverages
23
+ the `MercatorTiles` and `CountryMercatorTiles` classes to generate tiles based on
24
+ various input sources.
24
25
 
25
- The class extends the base functionality with methods for mapping common geospatial
26
- datasets including GHSL (Global Human Settlement Layer), Google Open Buildings,
27
- and Microsoft Global Buildings data.
26
+ The primary input source defines the geographical area of interest. This can be
27
+ a country, a specific geometry, a set of points, or even a list of predefined
28
+ quadkeys. The `zoom_level` determines the granularity of the Mercator tiles.
28
29
 
29
30
  Attributes:
30
- zone_dict (Dict[T, Polygon]): Mapping of zone identifiers to geometries.
31
- zone_id_column (str): Name of the column containing zone identifiers.
32
- zone_data_crs (str): Coordinate reference system of the zone data.
33
- _zone_gdf (gpd.GeoDataFrame): Cached GeoDataFrame representation of zones.
34
- data_store (DataStore): For accessing input data.
35
- generator_config (ZonalViewGeneratorConfig): Configuration for view generation.
36
- logger: Logger instance for this class.
31
+ source (Union[str, BaseGeometry, gpd.GeoDataFrame, List[Union[Point, Tuple[float, float]]], List[str]]):
32
+ Specifies the geographic area or specific tiles to use. Can be:
33
+ - A country name (str): Uses `CountryMercatorTiles` to generate tiles covering the country.
34
+ - A Shapely geometry (BaseGeometry): Uses `MercatorTiles.from_spatial` to create tiles intersecting the geometry.
35
+ - A GeoDataFrame (gpd.GeoDataFrame): Uses `MercatorTiles.from_spatial` to create tiles intersecting the geometries.
36
+ - A list of points (List[Union[Point, Tuple[float, float]]]): Uses `MercatorTiles.from_spatial` to create tiles containing the points.
37
+ - A list of quadkeys (List[str]): Uses `MercatorTiles.from_quadkeys` to use the specified tiles directly.
38
+ zoom_level (int): The zoom level of the Mercator tiles. Higher zoom levels result in smaller, more detailed tiles.
39
+ predicate (str): The spatial predicate used when filtering tiles based on a spatial source (e.g., "intersects", "contains"). Defaults to "intersects".
40
+ config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generation process.
41
+ data_store (Optional[DataStore]): A DataStore instance for accessing data.
42
+ logger (Optional[logging.Logger]): A logger instance for logging.
43
+
44
+ Methods:
45
+ _init_zone_data(source, zoom_level, predicate): Initializes the Mercator tile GeoDataFrame based on the input source.
46
+ # Inherits other methods from GeometryBasedZonalViewGenerator, such as:
47
+ # map_ghsl(), map_google_buildings(), map_ms_buildings(), aggregate_data(), save_view()
48
+
49
+ Example:
50
+ # Create a MercatorViewGenerator for tiles covering Germany at zoom level 6
51
+ generator = MercatorViewGenerator(source="Germany", zoom_level=6)
52
+
53
+ # Create a MercatorViewGenerator for tiles intersecting a specific polygon
54
+ polygon = ... # Define a Shapely Polygon
55
+ generator = MercatorViewGenerator(source=polygon, zoom_level=8)
56
+
57
+ # Create a MercatorViewGenerator from a list of quadkeys
58
+ quadkeys = ["0020023131023032", "0020023131023033"]
59
+ generator = MercatorViewGenerator(source=quadkeys, zoom_level=12)
37
60
  """
38
61
 
39
62
  def __init__(
@@ -53,16 +76,19 @@ class MercatorViewGenerator(GeometryBasedZonalViewGenerator[T]):
53
76
  ):
54
77
 
55
78
  super().__init__(
56
- zone_data=self._init_zone_data(source, zoom_level, predicate),
79
+ zone_data=self._init_zone_data(source, zoom_level, predicate, data_store),
57
80
  zone_id_column="quadkey",
58
81
  config=config,
59
82
  data_store=data_store,
60
83
  logger=logger,
61
84
  )
85
+ self.logger.info(f"Initialized MercatorViewGenerator")
62
86
 
63
- def _init_zone_data(self, source, zoom_level, predicate):
87
+ def _init_zone_data(self, source, zoom_level, predicate, data_store=None):
64
88
  if isinstance(source, str):
65
- tiles = CountryMercatorTiles.create(country=source, zoom_level=zoom_level)
89
+ tiles = CountryMercatorTiles.create(
90
+ country=source, zoom_level=zoom_level, data_store=data_store
91
+ )
66
92
  elif isinstance(source, (BaseGeometry, Iterable)):
67
93
  if isinstance(source, Iterable) and all(
68
94
  isinstance(qk, str) for qk in source
@@ -73,6 +99,11 @@ class MercatorViewGenerator(GeometryBasedZonalViewGenerator[T]):
73
99
  source=source, zoom_level=zoom_level, predicate=predicate
74
100
  )
75
101
  else:
76
- raise ValueError("sadadasfasfkasmf")
102
+ raise TypeError(
103
+ f"Unsupported source type for MercatorViewGenerator. 'source' must be "
104
+ f"a country name (str), a Shapely geometry, a GeoDataFrame, "
105
+ f"a list of quadkeys (str), or a list of (lon, lat) tuples/Shapely Point objects. "
106
+ f"Received type: {type(source)}."
107
+ )
77
108
 
78
109
  return tiles.to_geodataframe()
@@ -1 +1 @@
1
- from gigaspatial.grid.mercator_tiles import *
1
+ from gigaspatial.grid.mercator_tiles import MercatorTiles, CountryMercatorTiles
@@ -4,10 +4,10 @@ import mercantile
4
4
  from shapely.geometry import box
5
5
  from shapely.geometry.base import BaseGeometry
6
6
  from shapely.strtree import STRtree
7
- from shapely import MultiPolygon, Polygon, Point
7
+ from shapely import Point
8
8
  import json
9
9
  from pathlib import Path
10
- from pydantic import BaseModel, Field, PrivateAttr
10
+ from pydantic import BaseModel, Field
11
11
  from typing import List, Union, Iterable, Optional, Tuple, ClassVar
12
12
  import pycountry
13
13
 
@@ -31,6 +31,9 @@ class MercatorTiles(BaseModel):
31
31
  if not quadkeys:
32
32
  cls.logger.warning("No quadkeys provided to from_quadkeys.")
33
33
  return cls(zoom_level=0, quadkeys=[])
34
+ cls.logger.info(
35
+ f"Initializing MercatorTiles from {len(quadkeys)} provided quadkeys."
36
+ )
34
37
  return cls(zoom_level=len(quadkeys[0]), quadkeys=set(quadkeys))
35
38
 
36
39
  @classmethod
@@ -120,14 +123,7 @@ class MercatorTiles(BaseModel):
120
123
  cls.logger.info(
121
124
  f"Creating MercatorTiles from {len(points)} points at zoom level: {zoom_level}"
122
125
  )
123
- quadkeys = {
124
- (
125
- mercantile.quadkey(mercantile.tile(p.x, p.y, zoom_level))
126
- if isinstance(p, Point)
127
- else mercantile.quadkey(mercantile.tile(p[1], p[0], zoom_level))
128
- )
129
- for p in points
130
- }
126
+ quadkeys = set(cls.get_quadkeys_from_points(points, zoom_level))
131
127
  cls.logger.info(f"Generated {len(quadkeys)} unique quadkeys from points.")
132
128
  return cls(zoom_level=zoom_level, quadkeys=list(quadkeys), **kwargs)
133
129
 
@@ -219,6 +215,29 @@ class MercatorTiles(BaseModel):
219
215
  {"quadkey": self.quadkeys, "geometry": self.to_geoms()}, crs="EPSG:4326"
220
216
  )
221
217
 
218
+ @staticmethod
219
+ def get_quadkeys_from_points(
220
+ points: List[Union[Point, Tuple[float, float]]], zoom_level: int
221
+ ) -> List[str]:
222
+ """Get list of quadkeys for the provided points at specified zoom level.
223
+
224
+ Args:
225
+ points: List of points as either shapely Points or (lon, lat) tuples
226
+ zoom_level: Zoom level for the quadkeys
227
+
228
+ Returns:
229
+ List of quadkey strings
230
+ """
231
+ quadkeys = [
232
+ (
233
+ mercantile.quadkey(mercantile.tile(p.x, p.y, zoom_level))
234
+ if isinstance(p, Point)
235
+ else mercantile.quadkey(mercantile.tile(p[1], p[0], zoom_level))
236
+ )
237
+ for p in points
238
+ ]
239
+ return quadkeys
240
+
222
241
  def save(self, file: Union[str, Path], format: str = "json") -> None:
223
242
  """Save MercatorTiles to file in specified format."""
224
243
  with self.data_store.open(str(file), "wb" if format == "parquet" else "w") as f:
@@ -270,6 +289,10 @@ class CountryMercatorTiles(MercatorTiles):
270
289
  country=pycountry.countries.lookup(country).alpha_3,
271
290
  )
272
291
 
292
+ cls.logger.info(
293
+ f"Initializing Mercator zones for country: {country} at zoom level {zoom_level}"
294
+ )
295
+
273
296
  country_geom = (
274
297
  AdminBoundaries.create(
275
298
  country_code=country,
@@ -10,7 +10,7 @@ import pycountry
10
10
  from gigaspatial.core.io.data_store import DataStore
11
11
  from gigaspatial.core.io.readers import read_dataset
12
12
  from gigaspatial.handlers.hdx import HDXConfig
13
- from gigaspatial.config import config
13
+ from gigaspatial.config import config as global_config
14
14
 
15
15
 
16
16
  class AdminBoundary(BaseModel):
@@ -33,7 +33,6 @@ class AdminBoundary(BaseModel):
33
33
  )
34
34
 
35
35
  class Config:
36
- # extra = "allow"
37
36
  arbitrary_types_allowed = True
38
37
 
39
38
 
@@ -48,7 +47,7 @@ class AdminBoundaries(BaseModel):
48
47
  description="Administrative level (e.g., 0=country, 1=state, etc.)",
49
48
  )
50
49
 
51
- logger: ClassVar = config.get_logger("AdminBoundaries")
50
+ logger: ClassVar = global_config.get_logger("AdminBoundaries")
52
51
 
53
52
  _schema_config: ClassVar[Dict[str, Dict[str, str]]] = {
54
53
  "gadm": {
@@ -301,28 +300,50 @@ class AdminBoundaries(BaseModel):
301
300
  path: Optional[Union[str, "Path"]] = None,
302
301
  **kwargs,
303
302
  ) -> "AdminBoundaries":
304
- """Factory method to create AdminBoundaries instance from either GADM or data store.
303
+ """
304
+ Factory method to create an AdminBoundaries instance using various data sources,
305
+ depending on the provided parameters and global configuration.
306
+
307
+ Loading Logic:
308
+ 1. If a `data_store` is provided and either a `path` is given or
309
+ `global_config.ADMIN_BOUNDARIES_DATA_DIR` is set:
310
+ - If `path` is not provided but `country_code` is, the path is constructed
311
+ using `global_config.get_admin_path()`.
312
+ - Loads boundaries from the specified data store and path.
313
+
314
+ 2. If only `country_code` is provided (no data_store):
315
+ - Attempts to load boundaries from GeoRepo (if available).
316
+ - If GeoRepo is unavailable, attempts to load from GADM.
317
+ - If GADM fails, falls back to geoBoundaries.
318
+ - Raises an error if all sources fail.
319
+
320
+ 3. If neither `country_code` nor `data_store` is provided:
321
+ - Raises a ValueError.
305
322
 
306
323
  Args:
307
- country_code: ISO country code (2 or 3 letter) or country name
308
- admin_level: Administrative level (0=country, 1=state/province, etc.)
309
- data_store: Optional data store instance for loading from existing data
310
- path: Optional path to data file (used with data_store)
311
- **kwargs: Additional arguments passed to the underlying creation methods
324
+ country_code (Optional[str]): ISO country code (2 or 3 letter) or country name.
325
+ admin_level (int): Administrative level (0=country, 1=state/province, etc.).
326
+ data_store (Optional[DataStore]): Optional data store instance for loading from existing data.
327
+ path (Optional[Union[str, Path]]): Optional path to data file (used with data_store).
328
+ **kwargs: Additional arguments passed to the underlying creation methods.
312
329
 
313
330
  Returns:
314
- AdminBoundaries: Configured instance
331
+ AdminBoundaries: Configured instance.
315
332
 
316
333
  Raises:
317
334
  ValueError: If neither country_code nor (data_store, path) are provided,
318
- or if country_code lookup fails
335
+ or if country_code lookup fails.
336
+ RuntimeError: If all data sources fail to load boundaries.
319
337
 
320
- Example:
321
- # From country code
322
- boundaries = AdminBoundaries.create(country_code="USA", admin_level=1)
338
+ Examples:
339
+ # Load from a data store (path auto-generated if not provided)
340
+ boundaries = AdminBoundaries.create(country_code="USA", admin_level=1, data_store=store)
323
341
 
324
- # From data store
342
+ # Load from a specific file in a data store
325
343
  boundaries = AdminBoundaries.create(data_store=store, path="data.shp")
344
+
345
+ # Load from online sources (GeoRepo, GADM, geoBoundaries)
346
+ boundaries = AdminBoundaries.create(country_code="USA", admin_level=1)
326
347
  """
327
348
  cls.logger.info(
328
349
  f"Creating AdminBoundaries instance. Country: {country_code}, "
@@ -330,17 +351,21 @@ class AdminBoundaries(BaseModel):
330
351
  f"path provided: {path is not None}"
331
352
  )
332
353
 
354
+ from_data_store = data_store is not None and (
355
+ global_config.ADMIN_BOUNDARIES_DATA_DIR is not None or path is not None
356
+ )
357
+
333
358
  # Validate input parameters
334
359
  if not country_code and not data_store:
335
360
  raise ValueError("Either country_code or data_store must be provided.")
336
361
 
337
- if data_store and not path and not country_code:
362
+ if from_data_store and not path and not country_code:
338
363
  raise ValueError(
339
364
  "If data_store is provided, either path or country_code must also be specified."
340
365
  )
341
366
 
342
367
  # Handle data store path first
343
- if data_store is not None:
368
+ if from_data_store:
344
369
  iso3_code = None
345
370
  if country_code:
346
371
  try:
@@ -350,7 +375,7 @@ class AdminBoundaries(BaseModel):
350
375
 
351
376
  # Generate path if not provided
352
377
  if path is None and iso3_code:
353
- path = config.get_admin_path(
378
+ path = global_config.get_admin_path(
354
379
  country_code=iso3_code,
355
380
  admin_level=admin_level,
356
381
  )
@@ -14,7 +14,6 @@ import requests
14
14
  from tqdm import tqdm
15
15
  import zipfile
16
16
  import tempfile
17
- import shutil
18
17
  from pydantic import (
19
18
  HttpUrl,
20
19
  Field,
@@ -25,8 +24,6 @@ from pydantic import (
25
24
  import logging
26
25
 
27
26
  from gigaspatial.core.io.data_store import DataStore
28
- from gigaspatial.core.io.local_data_store import LocalDataStore
29
- from gigaspatial.handlers.boundaries import AdminBoundaries
30
27
  from gigaspatial.processing.tif_processor import TifProcessor
31
28
  from gigaspatial.handlers.base import (
32
29
  BaseHandlerConfig,
@@ -241,8 +238,8 @@ class GHSLDataConfig(BaseHandlerConfig):
241
238
  ValueError: If the input `source` is not one of the supported types.
242
239
  """
243
240
  if isinstance(source, gpd.GeoDataFrame):
244
- # if source.crs != "EPSG:4326":
245
- # source = source.to_crs("EPSG:4326")
241
+ if source.crs != crs:
242
+ source = source.to_crs(crs)
246
243
  search_geom = source.geometry.unary_union
247
244
  elif isinstance(
248
245
  source,
@@ -273,7 +270,9 @@ class GHSLDataConfig(BaseHandlerConfig):
273
270
  tile_geom.intersects(search_geom) for tile_geom in self.tiles_gdf.geometry
274
271
  )
275
272
 
276
- return self.tiles_gdf.loc[mask, "tile_id"].to_list()
273
+ intersecting_tiles = self.tiles_gdf.loc[mask, "tile_id"].to_list()
274
+
275
+ return intersecting_tiles
277
276
 
278
277
  def _get_product_info(self) -> dict:
279
278
  """Generate and return common product information used in multiple methods."""
@@ -340,7 +339,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
340
339
 
341
340
  Args:
342
341
  tile_id: tile ID to process.
343
- extract: If True and the downloaded file is a zip, extract its contents. Defaults to False.
342
+ extract: If True and the downloaded file is a zip, extract its contents. Defaults to True.
344
343
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
345
344
  **kwargs: Additional parameters passed to download methods
346
345
 
@@ -356,14 +355,34 @@ class GHSLDataDownloader(BaseHandlerDownloader):
356
355
  return self._download_file(url, output_path)
357
356
 
358
357
  extracted_files: List[Path] = []
358
+ temp_downloaded_path: Optional[Path] = None
359
359
 
360
360
  try:
361
361
  with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as temp_file:
362
- downloaded_path = self._download_file(url, Path(temp_file.name))
363
- if not downloaded_path:
364
- return None
362
+ temp_downloaded_path = Path(temp_file.name)
363
+ self.logger.debug(
364
+ f"Downloading {url} to temporary file: {temp_downloaded_path}"
365
+ )
366
+
367
+ response = requests.get(url, stream=True)
368
+ response.raise_for_status()
369
+
370
+ total_size = int(response.headers.get("content-length", 0))
371
+
372
+ with tqdm(
373
+ total=total_size,
374
+ unit="B",
375
+ unit_scale=True,
376
+ desc=f"Downloading {tile_id}",
377
+ ) as pbar:
378
+ for chunk in response.iter_content(chunk_size=8192):
379
+ if chunk:
380
+ temp_file.write(chunk)
381
+ pbar.update(len(chunk))
382
+
383
+ self.logger.info(f"Successfully downloaded temporary file!")
365
384
 
366
- with zipfile.ZipFile(str(downloaded_path), "r") as zip_ref:
385
+ with zipfile.ZipFile(str(temp_downloaded_path), "r") as zip_ref:
367
386
  if file_pattern:
368
387
  import re
369
388
 
@@ -385,9 +404,24 @@ class GHSLDataDownloader(BaseHandlerDownloader):
385
404
  Path(temp_file.name).unlink()
386
405
  return extracted_files
387
406
 
407
+ except requests.exceptions.RequestException as e:
408
+ self.logger.error(f"Failed to download {url} to temporary file: {e}")
409
+ return None
410
+ except zipfile.BadZipFile:
411
+ self.logger.error(f"Downloaded file for {tile_id} is not a valid zip file.")
412
+ return None
388
413
  except Exception as e:
389
414
  self.logger.error(f"Error downloading/extracting tile {tile_id}: {e}")
390
415
  return None
416
+ finally:
417
+ if temp_downloaded_path and temp_downloaded_path.exists():
418
+ try:
419
+ temp_downloaded_path.unlink()
420
+ self.logger.debug(f"Deleted temporary file: {temp_downloaded_path}")
421
+ except OSError as e:
422
+ self.logger.warning(
423
+ f"Could not delete temporary file {temp_downloaded_path}: {e}"
424
+ )
391
425
 
392
426
  def download_data_units(
393
427
  self,
@@ -401,7 +435,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
401
435
 
402
436
  Args:
403
437
  tile_ids: A list of tile IDs to download.
404
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
438
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
405
439
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
406
440
  **kwargs: Additional parameters passed to download methods
407
441
 
@@ -456,7 +490,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
456
490
  - A list of (latitude, longitude) tuples or Shapely Point objects.
457
491
  - A Shapely BaseGeometry object (e.g., Polygon, MultiPolygon).
458
492
  - A GeoDataFrame with geometry column in EPSG:4326.
459
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
493
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
460
494
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
461
495
  **kwargs: Additional keyword arguments. These will be passed down to
462
496
  `AdminBoundaries.create()` (if `source` is a country)
@@ -496,7 +530,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
496
530
  country_geom_path: Optional path to a GeoJSON file containing the
497
531
  country boundary. If provided, this boundary is used
498
532
  instead of the default from `AdminBoundaries`.
499
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
533
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
500
534
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
501
535
  **kwargs: Additional keyword arguments that are passed to
502
536
  `download_data_units`. For example, `extract` to download and extract.
@@ -770,3 +804,34 @@ class GHSLDataHandler(BaseHandler):
770
804
  return pd.concat(
771
805
  [tp.to_dataframe() for tp in tif_processors], ignore_index=True
772
806
  )
807
+
808
+ def load_into_geodataframe(
809
+ self,
810
+ source: Union[
811
+ str, # country
812
+ List[Union[tuple, Point]], # points
813
+ BaseGeometry, # geometry
814
+ gpd.GeoDataFrame, # geodataframe
815
+ Path, # path
816
+ List[Union[str, Path]], # list of paths
817
+ ],
818
+ ensure_available: bool = True,
819
+ **kwargs,
820
+ ) -> pd.DataFrame:
821
+ """
822
+ Load GHSL data into a geopandas GeoDataFrame.
823
+
824
+ Args:
825
+ source: The data source specification
826
+ ensure_available: If True, ensure data is downloaded before loading
827
+ **kwargs: Additional parameters passed to load methods
828
+
829
+ Returns:
830
+ GeoDataFrame containing the GHSL data
831
+ """
832
+ tif_processors = self.load_data(
833
+ source=source, ensure_available=ensure_available, **kwargs
834
+ )
835
+ return pd.concat(
836
+ [tp.to_geodataframe() for tp in tif_processors], ignore_index=True
837
+ )
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import List, Optional, Union, Literal
3
3
  from pydantic.dataclasses import dataclass
4
4
  from datetime import datetime
5
+ import pycountry
5
6
 
6
7
  from hdx.data.resource import Resource
7
8
 
@@ -36,8 +37,10 @@ class RWIConfig(HDXConfig):
36
37
  self, country: str, **kwargs
37
38
  ) -> List[Resource]:
38
39
  """Get relevant data units for a country, optionally filtering for latest version"""
39
- resources = super().get_relevant_data_units_by_country(
40
- country=country, key="url"
40
+ country = pycountry.countries.lookup(country)
41
+ values = [country.alpha_3]
42
+ resources = self.get_dataset_resources(
43
+ filter={"url": values},
41
44
  )
42
45
 
43
46
  if self.latest_only and len(resources) > 1: