PyPI - giga-spatial - Versions diffs - 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

giga-spatial 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/METADATA +18 -8
{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/RECORD +15 -15
gigaspatial/__init__.py +1 -1
gigaspatial/config.py +6 -0
gigaspatial/handlers/__init__.py +7 -3
gigaspatial/handlers/boundaries.py +196 -43
gigaspatial/handlers/ghsl.py +7 -6
gigaspatial/handlers/giga.py +641 -0
gigaspatial/handlers/hdx.py +411 -143
gigaspatial/handlers/maxar_image.py +1 -2
gigaspatial/handlers/rwi.py +119 -121
gigaspatial/processing/tif_processor.py +88 -2
{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/WHEEL +0 -0
{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/licenses/LICENSE +0 -0
{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/top_level.txt +0 -0

gigaspatial/handlers/rwi.py CHANGED Viewed

@@ -1,15 +1,17 @@
 import logging
-from pathlib import Path
-from typing import List, Optional, Union, Dict, Any, Literal
-import pycountry
-import tempfile
+from typing import List, Optional, Union, Literal
+from pydantic.dataclasses import dataclass
+from datetime import datetime
-from pydantic import Field, field_validator
+from hdx.data.resource import Resource
+from pydantic import Field, ConfigDict
 from gigaspatial.core.io.data_store import DataStore
-from gigaspatial.handlers.hdx import HDXConfig, HDXDownloader
+from gigaspatial.handlers.hdx import HDXConfig, HDXDownloader, HDXReader, HDXHandler
+@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
 class RWIConfig(HDXConfig):
     """Configuration for Relative Wealth Index data access"""
@@ -22,16 +24,49 @@ class RWIConfig(HDXConfig):
     country: Optional[str] = Field(
         default=None, description="Country ISO code to filter data for"
     )
+    latest_only: bool = Field(
+        default=True,
+        description="If True, only get the latest resource for each country",
+    )
+    def __post_init__(self):
+        super().__post_init__()
+    def get_relevant_data_units_by_country(
+        self, country: str, **kwargs
+    ) -> List[Resource]:
+        """Get relevant data units for a country, optionally filtering for latest version"""
+        resources = super().get_relevant_data_units_by_country(
+            country=country, key="url"
+        )
+        if self.latest_only and len(resources) > 1:
+            # Find the resource with the latest creation date
+            latest_resource = None
+            latest_date = None
+            for resource in resources:
+                created = resource.get("created")
+                if created:
+                    try:
+                        created_dt = datetime.fromisoformat(
+                            created.replace("Z", "+00:00")
+                        )
+                        if latest_date is None or created_dt > latest_date:
+                            latest_date = created_dt
+                            latest_resource = resource
+                    except ValueError:
+                        self.logger.warning(
+                            f"Could not parse creation date for resource: {created}"
+                        )
+            if latest_resource:
+                resources = [latest_resource]
-    @field_validator("country")
-    def validate_country(cls, value: str) -> str:
-        try:
-            return pycountry.countries.lookup(value).alpha_3
-        except LookupError:
-            raise ValueError(f"Invalid country code provided: {value}")
+        return resources
-class RelativeWealthIndexDownloader(HDXDownloader):
+class RWIDownloader(HDXDownloader):
     """Specialized downloader for the Relative Wealth Index dataset from HDX"""
     def __init__(
@@ -40,118 +75,81 @@ class RelativeWealthIndexDownloader(HDXDownloader):
         data_store: Optional[DataStore] = None,
         logger: Optional[logging.Logger] = None,
     ):
-        if config is None:
-            config = RWIConfig()
-        elif isinstance(config, dict):
-            config = RWIConfig(**config)
+        config = config if isinstance(config, RWIConfig) else RWIConfig(**config)
         super().__init__(config=config, data_store=data_store, logger=logger)
-    @classmethod
-    def from_config(
-        cls,
-        country: Optional[str] = None,
-        **kwargs,
-    ):
-        """Create a downloader with RWI-specific configurations"""
-        config = RWIConfig(country=country, **kwargs)
-        return cls(config=config)
-    def download_dataset(self) -> List[str]:
-        """Download RWI dataset, optionally filtering for a specific country.
-        If country is specified, attempts to find and download only the resources
-        relevant to that country. Otherwise, downloads all RWI resources.
-        Returns:
-            List of paths to the downloaded files
-        """
-        # If no country specified, download all resources
-        if self.config.country is None:
-            return super().download_dataset()
-        # Get all resources from the dataset
-        try:
-            resources = self.get_dataset_resources()
-            if not resources:
-                self.logger.warning(f"No resources found for RWI dataset")
-                return []
-            # Prepare country identifiers for matching
-            country_code = self.config.country.lower()
-            country_name = pycountry.countries.lookup(self.config.country).name.lower()
-            country_alpha2 = pycountry.countries.lookup(
-                self.config.country
-            ).alpha_2.lower()
-            # Try different matching patterns
-            country_patterns = [
-                f"/{country_code}_",  # URL path with ISO3 prefix
-                f"/{country_code}.",  # URL path with ISO3 followed by extension
-                f"_{country_code}_",  # Filename with ISO3 in middle
-                f"_{country_code}.",  # Filename with ISO3 at end
-                f"/{country_name.replace(' ', '')}_",  # URL with no spaces
-                f"/{country_name.replace(' ', '-')}_",  # URL with hyphens
-                f"/{country_alpha2}_",  # URL with ISO2 code
-                country_name,  # Country name anywhere in URL
-            ]
-            # Find matching resources
-            matching_resources = []
-            for resource in resources:
-                # Get the URL safely
-                resource_url = resource.get("url", "")
-                if not resource_url:
-                    continue
-                resource_url = resource_url.lower()
-                # Check for matches with our patterns
-                if any(pattern in resource_url for pattern in country_patterns):
-                    matching_resources.append(resource)
-            if not matching_resources:
-                self.logger.warning(
-                    f"No resources matching country '{self.config.country}' were found. "
-                    f"Consider downloading the full dataset with country=None and filtering afterwards."
-                )
-                return []
-            # Download the matching resources
-            downloaded_paths = []
-            for res in matching_resources:
-                try:
-                    resource_name = res.get("name", "Unknown")
-                    self.logger.info(f"Downloading resource: {resource_name}")
-                    # Download to a temporary directory
-                    with tempfile.TemporaryDirectory() as tmpdir:
-                        url, local_path = res.download(folder=tmpdir)
-                        # Read the file and write to the DataStore
-                        with open(local_path, "rb") as f:
-                            data = f.read()
-                        # Compose the target path in the DataStore
-                        target_path = str(
-                            self.config.output_dir_path / Path(local_path).name
-                        )
-                        self.data_store.write_file(target_path, data)
-                        downloaded_paths.append(target_path)
-                    self.logger.info(
-                        f"Downloaded resource: {resource_name} to {target_path}"
-                    )
+class RWIReader(HDXReader):
+    """Specialized reader for the Relative Wealth Index dataset from HDX"""
-                except Exception as e:
-                    resource_name = res.get("name", "Unknown")
-                    self.logger.error(
-                        f"Error downloading resource {resource_name}: {str(e)}"
-                    )
+    def __init__(
+        self,
+        config: Union[RWIConfig, dict] = None,
+        data_store: Optional[DataStore] = None,
+        logger: Optional[logging.Logger] = None,
+    ):
+        config = config if isinstance(config, RWIConfig) else RWIConfig(**config)
+        super().__init__(config=config, data_store=data_store, logger=logger)
-            return downloaded_paths
-        except Exception as e:
-            self.logger.error(f"Error during country-filtered download: {str(e)}")
+class RWIHandler(HDXHandler):
+    """Handler for Relative Wealth Index dataset"""
-            # Fall back to downloading all resources
-            self.logger.info("Falling back to downloading all RWI resources")
-            return super().download_dataset()
+    def __init__(
+        self,
+        config: Optional[RWIConfig] = None,
+        downloader: Optional[RWIDownloader] = None,
+        reader: Optional[RWIReader] = None,
+        data_store: Optional[DataStore] = None,
+        logger: Optional[logging.Logger] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            dataset_name="relative-wealth-index",
+            config=config,
+            downloader=downloader,
+            reader=reader,
+            data_store=data_store,
+            logger=logger,
+            **kwargs,
+        )
+    def create_config(
+        self, data_store: DataStore, logger: logging.Logger, **kwargs
+    ) -> RWIConfig:
+        """Create and return a RWIConfig instance"""
+        return RWIConfig(
+            data_store=data_store,
+            logger=logger,
+            **kwargs,
+        )
+    def create_downloader(
+        self,
+        config: RWIConfig,
+        data_store: DataStore,
+        logger: logging.Logger,
+        **kwargs,
+    ) -> RWIDownloader:
+        """Create and return a RWIDownloader instance"""
+        return RWIDownloader(
+            config=config,
+            data_store=data_store,
+            logger=logger,
+            **kwargs,
+        )
+    def create_reader(
+        self,
+        config: RWIConfig,
+        data_store: DataStore,
+        logger: logging.Logger,
+        **kwargs,
+    ) -> RWIReader:
+        """Create and return a RWIReader instance"""
+        return RWIReader(
+            config=config,
+            data_store=data_store,
+            logger=logger,
+            **kwargs,
+        )

gigaspatial/processing/tif_processor.py CHANGED Viewed

@@ -18,12 +18,12 @@ from gigaspatial.config import config
 @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
 class TifProcessor:
     """
-    A class to handle tif data processing, supporting single-band, RGB, and RGBA data.
+    A class to handle tif data processing, supporting single-band, RGB, RGBA, and multi-band data.
     """
     dataset_path: Union[Path, str]
     data_store: Optional[DataStore] = None
-    mode: Literal["single", "rgb", "rgba"] = "single"
+    mode: Literal["single", "rgb", "rgba", "multi"] = "single"
     def __post_init__(self):
         """Validate inputs and set up logging."""
@@ -36,10 +36,15 @@ class TifProcessor:
         self._load_metadata()
+        # Validate mode and band count
         if self.mode == "rgba" and self.count != 4:
             raise ValueError("RGBA mode requires a 4-band TIF file")
         if self.mode == "rgb" and self.count != 3:
             raise ValueError("RGB mode requires a 3-band TIF file")
+        if self.mode == "single" and self.count != 1:
+            raise ValueError("Single mode requires a 1-band TIF file")
+        if self.mode == "multi" and self.count < 2:
+            raise ValueError("Multi mode requires a TIF file with 2 or more bands")
     @contextmanager
     def open_dataset(self):
@@ -118,6 +123,16 @@ class TifProcessor:
                     self._tabular = self._to_rgb_dataframe(drop_nodata=True)
                 elif self.mode == "rgba":
                     self._tabular = self._to_rgba_dataframe(drop_transparent=True)
+                elif self.mode == "multi":
+                    self._tabular = self._to_multi_band_dataframe(
+                        drop_nodata=True,
+                        drop_values=[],
+                        band_names=None,  # Use default band naming
+                    )
+                else:
+                    raise ValueError(
+                        f"Invalid mode: {self.mode}. Must be one of: single, rgb, rgba, multi"
+                    )
             except Exception as e:
                 raise ValueError(
                     f"Failed to process TIF file in mode '{self.mode}'. "
@@ -393,6 +408,77 @@ class TifProcessor:
         self.logger.info("Dataset is processed!")
         return data
+    def _to_multi_band_dataframe(
+        self,
+        drop_nodata: bool = True,
+        drop_values: list = [],
+        band_names: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
+        """
+        Process multi-band TIF to DataFrame with all bands included.
+        Args:
+            drop_nodata (bool): Whether to drop nodata values. Defaults to True.
+            drop_values (list): Additional values to drop from the dataset. Defaults to empty list.
+            band_names (Optional[List[str]]): Custom names for the bands. If None, bands will be named using
+                                            the band descriptions from the GeoTIFF metadata if available,
+                                            otherwise 'band_1', 'band_2', etc.
+        Returns:
+            pd.DataFrame: DataFrame containing coordinates and all band values
+        """
+        self.logger.info("Processing multi-band dataset...")
+        with self.open_dataset() as src:
+            # Read all bands
+            stack = src.read()
+            x_coords, y_coords = self._get_pixel_coordinates()
+            # Initialize dictionary with coordinates
+            data_dict = {"lon": x_coords.flatten(), "lat": y_coords.flatten()}
+            # Get band descriptions from metadata if available
+            if band_names is None and hasattr(src, "descriptions") and src.descriptions:
+                band_names = [
+                    desc if desc else f"band_{i+1}"
+                    for i, desc in enumerate(src.descriptions)
+                ]
+            # Process each band
+            for band_idx in range(self.count):
+                band_data = stack[band_idx]
+                # Handle nodata and other values to drop
+                if drop_nodata or drop_values:
+                    values_to_mask = []
+                    if drop_nodata and src.nodata is not None:
+                        values_to_mask.append(src.nodata)
+                    if drop_values:
+                        values_to_mask.extend(drop_values)
+                    if values_to_mask:
+                        data_mask = ~np.isin(band_data, values_to_mask)
+                        band_values = np.extract(data_mask, band_data)
+                        if band_idx == 0:  # Only need to mask coordinates once
+                            data_dict["lon"] = np.extract(data_mask, x_coords)
+                            data_dict["lat"] = np.extract(data_mask, y_coords)
+                    else:
+                        band_values = band_data.flatten()
+                else:
+                    band_values = band_data.flatten()
+                # Use custom band names if provided, otherwise use descriptions or default naming
+                band_name = (
+                    band_names[band_idx]
+                    if band_names and len(band_names) > band_idx
+                    else f"band_{band_idx + 1}"
+                )
+                data_dict[band_name] = band_values
+        self.logger.info("Multi-band dataset is processed!")
+        return pd.DataFrame(data_dict)
     def _get_pixel_coordinates(self):
         """Helper method to generate coordinate arrays for all pixels"""
         if "pixel_coords" not in self._cache:

{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{giga_spatial-0.6.2.dist-info → giga_spatial-0.6.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

giga-spatial 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

giga-spatial 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl