giga-spatial 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,6 @@ import requests
14
14
  from tqdm import tqdm
15
15
  import zipfile
16
16
  import tempfile
17
- import shutil
18
17
  from pydantic import (
19
18
  HttpUrl,
20
19
  Field,
@@ -25,8 +24,6 @@ from pydantic import (
25
24
  import logging
26
25
 
27
26
  from gigaspatial.core.io.data_store import DataStore
28
- from gigaspatial.core.io.local_data_store import LocalDataStore
29
- from gigaspatial.handlers.boundaries import AdminBoundaries
30
27
  from gigaspatial.processing.tif_processor import TifProcessor
31
28
  from gigaspatial.handlers.base import (
32
29
  BaseHandlerConfig,
@@ -241,8 +238,8 @@ class GHSLDataConfig(BaseHandlerConfig):
241
238
  ValueError: If the input `source` is not one of the supported types.
242
239
  """
243
240
  if isinstance(source, gpd.GeoDataFrame):
244
- # if source.crs != "EPSG:4326":
245
- # source = source.to_crs("EPSG:4326")
241
+ if source.crs != crs:
242
+ source = source.to_crs(crs)
246
243
  search_geom = source.geometry.unary_union
247
244
  elif isinstance(
248
245
  source,
@@ -273,7 +270,9 @@ class GHSLDataConfig(BaseHandlerConfig):
273
270
  tile_geom.intersects(search_geom) for tile_geom in self.tiles_gdf.geometry
274
271
  )
275
272
 
276
- return self.tiles_gdf.loc[mask, "tile_id"].to_list()
273
+ intersecting_tiles = self.tiles_gdf.loc[mask, "tile_id"].to_list()
274
+
275
+ return intersecting_tiles
277
276
 
278
277
  def _get_product_info(self) -> dict:
279
278
  """Generate and return common product information used in multiple methods."""
@@ -340,7 +339,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
340
339
 
341
340
  Args:
342
341
  tile_id: tile ID to process.
343
- extract: If True and the downloaded file is a zip, extract its contents. Defaults to False.
342
+ extract: If True and the downloaded file is a zip, extract its contents. Defaults to True.
344
343
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
345
344
  **kwargs: Additional parameters passed to download methods
346
345
 
@@ -356,14 +355,34 @@ class GHSLDataDownloader(BaseHandlerDownloader):
356
355
  return self._download_file(url, output_path)
357
356
 
358
357
  extracted_files: List[Path] = []
358
+ temp_downloaded_path: Optional[Path] = None
359
359
 
360
360
  try:
361
361
  with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as temp_file:
362
- downloaded_path = self._download_file(url, Path(temp_file.name))
363
- if not downloaded_path:
364
- return None
362
+ temp_downloaded_path = Path(temp_file.name)
363
+ self.logger.debug(
364
+ f"Downloading {url} to temporary file: {temp_downloaded_path}"
365
+ )
366
+
367
+ response = requests.get(url, stream=True)
368
+ response.raise_for_status()
369
+
370
+ total_size = int(response.headers.get("content-length", 0))
371
+
372
+ with tqdm(
373
+ total=total_size,
374
+ unit="B",
375
+ unit_scale=True,
376
+ desc=f"Downloading {tile_id}",
377
+ ) as pbar:
378
+ for chunk in response.iter_content(chunk_size=8192):
379
+ if chunk:
380
+ temp_file.write(chunk)
381
+ pbar.update(len(chunk))
382
+
383
+ self.logger.info(f"Successfully downloaded temporary file!")
365
384
 
366
- with zipfile.ZipFile(str(downloaded_path), "r") as zip_ref:
385
+ with zipfile.ZipFile(str(temp_downloaded_path), "r") as zip_ref:
367
386
  if file_pattern:
368
387
  import re
369
388
 
@@ -385,9 +404,24 @@ class GHSLDataDownloader(BaseHandlerDownloader):
385
404
  Path(temp_file.name).unlink()
386
405
  return extracted_files
387
406
 
407
+ except requests.exceptions.RequestException as e:
408
+ self.logger.error(f"Failed to download {url} to temporary file: {e}")
409
+ return None
410
+ except zipfile.BadZipFile:
411
+ self.logger.error(f"Downloaded file for {tile_id} is not a valid zip file.")
412
+ return None
388
413
  except Exception as e:
389
414
  self.logger.error(f"Error downloading/extracting tile {tile_id}: {e}")
390
415
  return None
416
+ finally:
417
+ if temp_downloaded_path and temp_downloaded_path.exists():
418
+ try:
419
+ temp_downloaded_path.unlink()
420
+ self.logger.debug(f"Deleted temporary file: {temp_downloaded_path}")
421
+ except OSError as e:
422
+ self.logger.warning(
423
+ f"Could not delete temporary file {temp_downloaded_path}: {e}"
424
+ )
391
425
 
392
426
  def download_data_units(
393
427
  self,
@@ -401,7 +435,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
401
435
 
402
436
  Args:
403
437
  tile_ids: A list of tile IDs to download.
404
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
438
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
405
439
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
406
440
  **kwargs: Additional parameters passed to download methods
407
441
 
@@ -456,7 +490,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
456
490
  - A list of (latitude, longitude) tuples or Shapely Point objects.
457
491
  - A Shapely BaseGeometry object (e.g., Polygon, MultiPolygon).
458
492
  - A GeoDataFrame with geometry column in EPSG:4326.
459
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
493
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
460
494
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
461
495
  **kwargs: Additional keyword arguments. These will be passed down to
462
496
  `AdminBoundaries.create()` (if `source` is a country)
@@ -496,7 +530,7 @@ class GHSLDataDownloader(BaseHandlerDownloader):
496
530
  country_geom_path: Optional path to a GeoJSON file containing the
497
531
  country boundary. If provided, this boundary is used
498
532
  instead of the default from `AdminBoundaries`.
499
- extract: If True and the downloaded files are zips, extract their contents. Defaults to False.
533
+ extract: If True and the downloaded files are zips, extract their contents. Defaults to True.
500
534
  file_pattern: Optional regex pattern to filter extracted files (if extract=True).
501
535
  **kwargs: Additional keyword arguments that are passed to
502
536
  `download_data_units`. For example, `extract` to download and extract.
@@ -770,3 +804,34 @@ class GHSLDataHandler(BaseHandler):
770
804
  return pd.concat(
771
805
  [tp.to_dataframe() for tp in tif_processors], ignore_index=True
772
806
  )
807
+
808
+ def load_into_geodataframe(
809
+ self,
810
+ source: Union[
811
+ str, # country
812
+ List[Union[tuple, Point]], # points
813
+ BaseGeometry, # geometry
814
+ gpd.GeoDataFrame, # geodataframe
815
+ Path, # path
816
+ List[Union[str, Path]], # list of paths
817
+ ],
818
+ ensure_available: bool = True,
819
+ **kwargs,
820
+ ) -> pd.DataFrame:
821
+ """
822
+ Load GHSL data into a geopandas GeoDataFrame.
823
+
824
+ Args:
825
+ source: The data source specification
826
+ ensure_available: If True, ensure data is downloaded before loading
827
+ **kwargs: Additional parameters passed to load methods
828
+
829
+ Returns:
830
+ GeoDataFrame containing the GHSL data
831
+ """
832
+ tif_processors = self.load_data(
833
+ source=source, ensure_available=ensure_available, **kwargs
834
+ )
835
+ return pd.concat(
836
+ [tp.to_geodataframe() for tp in tif_processors], ignore_index=True
837
+ )