PyPI - giga-spatial - Versions diffs - 0.6.6__py3-none-any.whl → 0.6.8__py3-none-any.whl - Mend

giga-spatial 0.6.6py3-none-any.whl → 0.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{giga_spatial-0.6.6.dist-info → giga_spatial-0.6.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: giga-spatial
-Version: 0.6.6
+Version: 0.6.8
 Summary: A package for spatial data download & processing
 Home-page: https://github.com/unicef/giga-spatial
 Author: Utku Can Ozturk

{giga_spatial-0.6.6.dist-info → giga_spatial-0.6.8.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-giga_spatial-0.6.6.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-gigaspatial/__init__.py,sha256=I3h5MyD10PkOUQEBnR6L9ja7s4WeTEg8rRjRKTCWYWQ,22
+giga_spatial-0.6.8.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+gigaspatial/__init__.py,sha256=wwbrOIx2rQA0YHGob_KGFY89qGDsh20rh2M3y3Ua458,22
 gigaspatial/config.py,sha256=pLbxGc08OHT2IfTBzZVuIJTPR2vvg3KTFfvciOtRswk,9304
 gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
@@ -27,24 +27,24 @@ gigaspatial/handlers/boundaries.py,sha256=jtWyQt3iAzS77mbAOi7mjh3cv_YCV3uB_r1h56
 gigaspatial/handlers/ghsl.py,sha256=aSEVQVANzJf8O8TiQYmfwyeM43ZaO65VJHmiuLSQfLs,30524
 gigaspatial/handlers/giga.py,sha256=F5ZfcE37a24X-c6Xhyt72C9eZZbyN_gV7w_InxKFMQQ,28348
 gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
-gigaspatial/handlers/hdx.py,sha256=LTEs_xZF1yPhD8dAdZ_YN8Vcan7iB5_tZ8NjF_ip6u0,18001
+gigaspatial/handlers/hdx.py,sha256=1m6oG1DeEC_RLFtb6CrTReWpbQ5uG2e8EIt-IUkZbaI,18122
 gigaspatial/handlers/mapbox_image.py,sha256=M_nkJ_b1PD8FG1ajVgSycCb0NRTAI_SLpHdzszNetKA,7786
 gigaspatial/handlers/maxar_image.py,sha256=kcc8uGljQB0Yh0MKBA7lT7KwBbNZwFzuyBklR3db1P4,10204
 gigaspatial/handlers/microsoft_global_buildings.py,sha256=bQ5WHIv3v0wWrZZUbZkKPRjgdlqIxlK7CV_0zSvdrTw,20292
 gigaspatial/handlers/ookla_speedtest.py,sha256=EcvSAxJZ9GPfzYnT_C85Qgy2ecc9ndf70Pklk53OdC8,6506
 gigaspatial/handlers/opencellid.py,sha256=KuJqd-5-RO5ZzyDaBSrTgCK2ib5N_m3RUcPlX5heWwI,10683
-gigaspatial/handlers/osm.py,sha256=sLNMkOVh1v50jrWw7Z0-HILY5QTQjgKCHCeAfXj5jA8,14084
+gigaspatial/handlers/osm.py,sha256=vUbdUm6lO2f8YyU7o4qUSkWMxlZElp7EPBFlneRaeo0,16641
 gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
 gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
 gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
-gigaspatial/handlers/worldpop.py,sha256=pkTmqb0k0vpa58t6tM3jfcpMHt1YuayLPFEFEULlrLs,30156
+gigaspatial/handlers/worldpop.py,sha256=jV166EP02Xdj8jiT8aQi4sexds8Qd3KRGHXqq70_Sdk,30161
 gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
 gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
 gigaspatial/processing/geo.py,sha256=8kD7-LQdGzKVfuZDWr3zK5uQhPzgxbZ3JBPosLRBJ5M,41390
 gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
-gigaspatial/processing/tif_processor.py,sha256=QLln9D-_zBhdYQL9NAL_bmo0bmmxE3sxDUQEglYQK94,27490
+gigaspatial/processing/tif_processor.py,sha256=dZRhMGj5r7DIu8Bop31NPbN1IdOK1syIlCOFTjTiiyo,40024
 gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
-giga_spatial-0.6.6.dist-info/METADATA,sha256=ZKoXmthabbL_5xJYHdQfk3ev4Dz02tWU6RAtpv0vWSU,7537
-giga_spatial-0.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-giga_spatial-0.6.6.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
-giga_spatial-0.6.6.dist-info/RECORD,,
+giga_spatial-0.6.8.dist-info/METADATA,sha256=f9MSxVRX6yhfkeoGhrsO5CdbAmVVHfhq9T4Ip7CRac4,7537
+giga_spatial-0.6.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+giga_spatial-0.6.8.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
+giga_spatial-0.6.8.dist-info/RECORD,,

gigaspatial/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.6.6"
1	+ __version__ = "0.6.8"

gigaspatial/handlers/hdx.py CHANGED Viewed

@@ -247,7 +247,10 @@ class HDXConfig(BaseHandlerConfig):
             # If source is a dict, use it directly as a filter
             return self.get_dataset_resources(filter=source, **kwargs)
         else:
-            raise ValueError(f"Unsupported source type: {type(source)}")
+            raise ValueError(
+                f"Unsupported source type: {type(source)}"
+                "Please use country-based filtering or direct resource filtering instead."
+            )
     def get_relevant_data_units_by_geometry(
         self, geometry: Union[BaseGeometry, gpd.GeoDataFrame], **kwargs

gigaspatial/handlers/osm.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import requests
 import pandas as pd
 from typing import List, Dict, Union, Optional, Literal
-from dataclasses import dataclass
+from pydantic.dataclasses import dataclass
+from pydantic import Field
 from time import sleep
 from concurrent.futures import ThreadPoolExecutor
 from requests.exceptions import RequestException
@@ -20,8 +21,10 @@ class OSMLocationFetcher:
     shops, and other POI categories.
     """
-    country: str
-    location_types: Union[List[str], Dict[str, List[str]]]
+    country: Optional[str] = None
+    admin_level: Optional[int] = None
+    admin_value: Optional[str] = None
+    location_types: Union[List[str], Dict[str, List[str]]] = Field(...)
     base_url: str = "http://overpass-api.de/api/interpreter"
     timeout: int = 600
     max_retries: int = 3
@@ -29,10 +32,6 @@ class OSMLocationFetcher:
     def __post_init__(self):
         """Validate inputs, normalize location_types, and set up logging."""
-        try:
-            self.country = pycountry.countries.lookup(self.country).alpha_2
-        except LookupError:
-            raise ValueError(f"Invalid country code provided: {self.country}")
         # Normalize location_types to always be a dictionary
         if isinstance(self.location_types, list):
@@ -44,6 +43,75 @@ class OSMLocationFetcher:
         self.logger = config.get_logger(self.__class__.__name__)
+        # Validate area selection
+        if self.admin_level is not None and self.admin_value is not None:
+            self.area_query = f'area["admin_level"={self.admin_level}]["name"="{self.admin_value}"]->.searchArea;'
+            self.logger.info(
+                f"Using admin_level={self.admin_level}, name={self.admin_value} for area selection."
+            )
+        elif self.country is not None:
+            try:
+                self.country = pycountry.countries.lookup(self.country).alpha_2
+            except LookupError:
+                raise ValueError(f"Invalid country code provided: {self.country}")
+            self.area_query = f'area["ISO3166-1"={self.country}]->.searchArea;'
+            self.logger.info(f"Using country={self.country} for area selection.")
+        else:
+            raise ValueError(
+                "Either country or both admin_level and admin_value must be provided."
+            )
+    @staticmethod
+    def get_admin_names(
+        admin_level: int, country: Optional[str] = None, timeout: int = 120
+    ) -> List[str]:
+        """
+        Fetch all admin area names for a given admin_level (optionally within a country).
+        Args:
+            admin_level (int): The OSM admin_level to search for (e.g., 4 for states, 6 for counties).
+            country (str, optional): Country name or ISO code to filter within.
+            timeout (int): Timeout for the Overpass API request.
+        Returns:
+            List[str]: List of admin area names.
+        """
+        # Build area filter for country if provided
+        if country:
+            try:
+                country_code = pycountry.countries.lookup(country).alpha_2
+            except LookupError:
+                raise ValueError(f"Invalid country code or name: {country}")
+            area_filter = f'area["ISO3166-1"="{country_code}"]->.countryArea;'
+            area_ref = "(area.countryArea)"
+        else:
+            area_filter = ""
+            area_ref = ""
+        # Overpass QL to get all admin areas at the specified level
+        query = f"""
+        [out:json][timeout:{timeout}];
+        {area_filter}
+        (
+          relation["admin_level"="{admin_level}"]{area_ref};
+        );
+        out tags;
+        """
+        url = "http://overpass-api.de/api/interpreter"
+        response = requests.get(url, params={"data": query}, timeout=timeout)
+        response.raise_for_status()
+        data = response.json()
+        names = []
+        for el in data.get("elements", []):
+            tags = el.get("tags", {})
+            name = tags.get("name")
+            if name:
+                names.append(name)
+        return sorted(set(names))
     def _build_queries(self, since_year: Optional[int] = None) -> List[str]:
         """
         Construct separate Overpass QL queries for different element types and categories.
@@ -68,7 +136,7 @@ class OSMLocationFetcher:
         nodes_relations_query = f"""
         [out:json][timeout:{self.timeout}];
-        area["ISO3166-1"={self.country}]->.searchArea;
+        {self.area_query}
         (
             {nodes_relations_queries}
         );
@@ -86,7 +154,7 @@ class OSMLocationFetcher:
         ways_query = f"""
         [out:json][timeout:{self.timeout}];
-        area["ISO3166-1"={self.country}]->.searchArea;
+        {self.area_query}
         (
             {ways_queries}
         );

gigaspatial/handlers/worldpop.py CHANGED Viewed

@@ -611,7 +611,7 @@ class WPPopulationDownloader(BaseHandlerDownloader):
             total_size = int(response.headers.get("content-length", 0))
             file_path = self.config.get_data_unit_path(url)
-            with self.data_store.open(file_path, "wb") as file:
+            with self.data_store.open(str(file_path), "wb") as file:
                 with tqdm(
                     total=total_size,
                     unit="B",

gigaspatial/processing/tif_processor.py CHANGED Viewed

@@ -9,9 +9,13 @@ from shapely.geometry import box, Polygon, MultiPolygon
 from pathlib import Path
 import rasterio
 from rasterio.mask import mask
+from rasterio.merge import merge
+from rasterio.warp import calculate_default_transform, reproject, Resampling
 from functools import partial
 import multiprocessing
 from tqdm import tqdm
+import tempfile
+import os
 from gigaspatial.core.io.data_store import DataStore
 from gigaspatial.core.io.local_data_store import LocalDataStore
@@ -22,20 +26,34 @@ from gigaspatial.config import config
 class TifProcessor:
     """
     A class to handle tif data processing, supporting single-band, RGB, RGBA, and multi-band data.
+    Can merge multiple rasters into one during initialization.
     """
-    dataset_path: Union[Path, str]
+    dataset_path: Union[Path, str, List[Union[Path, str]]]
     data_store: Optional[DataStore] = None
     mode: Literal["single", "rgb", "rgba", "multi"] = "single"
+    merge_method: Literal["first", "last", "min", "max", "mean"] = "first"
+    target_crs: Optional[str] = None  # For reprojection if needed
+    resampling_method: Resampling = Resampling.nearest
     def __post_init__(self):
-        """Validate inputs and set up logging."""
+        """Validate inputs, merge rasters if needed, and set up logging."""
         self.data_store = self.data_store or LocalDataStore()
         self.logger = config.get_logger(self.__class__.__name__)
         self._cache = {}
-        if not self.data_store.file_exists(self.dataset_path):
-            raise FileNotFoundError(f"Dataset not found at {self.dataset_path}")
+        self._merged_file_path = None
+        self._temp_dir = None
+        # Handle multiple dataset paths
+        if isinstance(self.dataset_path, list):
+            self.dataset_paths = [Path(p) for p in self.dataset_path]
+            self._validate_multiple_datasets()
+            self._merge_rasters()
+            self.dataset_path = self._merged_file_path
+        else:
+            self.dataset_paths = [Path(self.dataset_path)]
+            if not self.data_store.file_exists(self.dataset_path):
+                raise FileNotFoundError(f"Dataset not found at {self.dataset_path}")
         self._load_metadata()
@@ -49,13 +67,298 @@ class TifProcessor:
         if self.mode == "multi" and self.count < 2:
             raise ValueError("Multi mode requires a TIF file with 2 or more bands")
+    def _validate_multiple_datasets(self):
+        """Validate that all datasets exist and have compatible properties."""
+        if len(self.dataset_paths) < 2:
+            raise ValueError("Multiple dataset paths required for merging")
+        # Check if all files exist
+        for path in self.dataset_paths:
+            if not self.data_store.file_exists(path):
+                raise FileNotFoundError(f"Dataset not found at {path}")
+        # Load first dataset to get reference properties
+        with self.data_store.open(self.dataset_paths[0], "rb") as f:
+            with rasterio.MemoryFile(f.read()) as memfile:
+                with memfile.open() as ref_src:
+                    ref_count = ref_src.count
+                    ref_dtype = ref_src.dtypes[0]
+                    ref_crs = ref_src.crs
+                    ref_transform = ref_src.transform
+                    ref_nodata = ref_src.nodata
+        # Validate all other datasets against reference
+        for i, path in enumerate(self.dataset_paths[1:], 1):
+            with self.data_store.open(path, "rb") as f:
+                with rasterio.MemoryFile(f.read()) as memfile:
+                    with memfile.open() as src:
+                        if src.count != ref_count:
+                            raise ValueError(
+                                f"Dataset {i} has {src.count} bands, expected {ref_count}"
+                            )
+                        if src.dtypes[0] != ref_dtype:
+                            raise ValueError(
+                                f"Dataset {i} has dtype {src.dtypes[0]}, expected {ref_dtype}"
+                            )
+                        if self.target_crs is None and src.crs != ref_crs:
+                            raise ValueError(
+                                f"Dataset {i} has CRS {src.crs}, expected {ref_crs}. Consider setting target_crs parameter."
+                            )
+                        if self.target_crs is None and not self._transforms_compatible(
+                            src.transform, ref_transform
+                        ):
+                            self.logger.warning(
+                                f"Dataset {i} has different resolution. Resampling may be needed."
+                            )
+                        if src.nodata != ref_nodata:
+                            self.logger.warning(
+                                f"Dataset {i} has different nodata value: {src.nodata} vs {ref_nodata}"
+                            )
+    def _transforms_compatible(self, transform1, transform2, tolerance=1e-6):
+        """Check if two transforms have compatible pixel sizes."""
+        return (
+            abs(transform1.a - transform2.a) < tolerance
+            and abs(transform1.e - transform2.e) < tolerance
+        )
+    def _merge_rasters(self):
+        """Merge multiple rasters into a single raster."""
+        self.logger.info(f"Merging {len(self.dataset_paths)} rasters...")
+        # Create temporary directory for merged file
+        self._temp_dir = tempfile.mkdtemp()
+        merged_filename = "merged_raster.tif"
+        self._merged_file_path = os.path.join(self._temp_dir, merged_filename)
+        # Open all datasets and handle reprojection if needed
+        src_files = []
+        reprojected_files = []
+        try:
+            for path in self.dataset_paths:
+                with self.data_store.open(path, "rb") as f:
+                    # Create temporary file for each dataset
+                    temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False)
+                    temp_file.write(f.read())
+                    temp_file.close()
+                    src_files.append(rasterio.open(temp_file.name))
+            # Handle reprojection if target_crs is specified
+            if self.target_crs:
+                self.logger.info(f"Reprojecting rasters to {self.target_crs}...")
+                processed_files = self._reproject_rasters(src_files, self.target_crs)
+                reprojected_files = processed_files
+            else:
+                processed_files = src_files
+            if self.merge_method == "mean":
+                # For mean, we need to handle it manually
+                merged_array, merged_transform = self._merge_with_mean(src_files)
+                # Use first source as reference for metadata
+                ref_src = src_files[0]
+                profile = ref_src.profile.copy()
+                profile.update(
+                    {
+                        "height": merged_array.shape[-2],
+                        "width": merged_array.shape[-1],
+                        "transform": merged_transform,
+                    }
+                )
+                # Write merged raster
+                with rasterio.open(self._merged_file_path, "w", **profile) as dst:
+                    dst.write(merged_array)
+            else:
+                # Use rasterio's merge function
+                merged_array, merged_transform = merge(
+                    src_files,
+                    method=self.merge_method,
+                    resampling=self.resampling_method,
+                )
+                # Use first source as reference for metadata
+                ref_src = src_files[0]
+                profile = ref_src.profile.copy()
+                profile.update(
+                    {
+                        "height": merged_array.shape[-2],
+                        "width": merged_array.shape[-1],
+                        "transform": merged_transform,
+                    }
+                )
+                if self.target_crs:
+                    profile["crs"] = self.target_crs
+                # Write merged raster
+                with rasterio.open(self._merged_file_path, "w", **profile) as dst:
+                    dst.write(merged_array)
+        finally:
+            # Clean up source files
+            for src in src_files:
+                temp_path = src.name
+                src.close()
+                try:
+                    os.unlink(temp_path)
+                except:
+                    pass
+            # Clean up reprojected files
+            for src in reprojected_files:
+                if src not in src_files:  # Don't double-close
+                    temp_path = src.name
+                    src.close()
+                    try:
+                        os.unlink(temp_path)
+                    except:
+                        pass
+        self.logger.info("Raster merging completed!")
+    def _reproject_rasters(self, src_files, target_crs):
+        """Reproject all rasters to a common CRS before merging."""
+        reprojected_files = []
+        for i, src in enumerate(src_files):
+            if src.crs.to_string() == target_crs:
+                # No reprojection needed
+                reprojected_files.append(src)
+                continue
+            # Calculate transform and dimensions for reprojection
+            transform, width, height = calculate_default_transform(
+                src.crs,
+                target_crs,
+                src.width,
+                src.height,
+                *src.bounds,
+                resolution=self.resolution if hasattr(self, "resolution") else None,
+            )
+            # Create temporary file for reprojected raster
+            temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False)
+            temp_file.close()
+            # Set up profile for reprojected raster
+            profile = src.profile.copy()
+            profile.update(
+                {
+                    "crs": target_crs,
+                    "transform": transform,
+                    "width": width,
+                    "height": height,
+                }
+            )
+            # Reproject and write to temporary file
+            with rasterio.open(temp_file.name, "w", **profile) as dst:
+                for band_idx in range(1, src.count + 1):
+                    reproject(
+                        source=rasterio.band(src, band_idx),
+                        destination=rasterio.band(dst, band_idx),
+                        src_transform=src.transform,
+                        src_crs=src.crs,
+                        dst_transform=transform,
+                        dst_crs=target_crs,
+                        resampling=self.resampling_method,
+                    )
+            # Open reprojected file
+            reprojected_files.append(rasterio.open(temp_file.name))
+        return reprojected_files
+    def _merge_with_mean(self, src_files):
+        """Merge rasters using mean aggregation."""
+        # Get bounds and resolution for merged raster
+        bounds = src_files[0].bounds
+        transform = src_files[0].transform
+        for src in src_files[1:]:
+            bounds = rasterio.coords.BoundingBox(
+                min(bounds.left, src.bounds.left),
+                min(bounds.bottom, src.bounds.bottom),
+                max(bounds.right, src.bounds.right),
+                max(bounds.top, src.bounds.top),
+            )
+        # Calculate dimensions for merged raster
+        width = int((bounds.right - bounds.left) / abs(transform.a))
+        height = int((bounds.top - bounds.bottom) / abs(transform.e))
+        # Create new transform for merged bounds
+        merged_transform = rasterio.transform.from_bounds(
+            bounds.left, bounds.bottom, bounds.right, bounds.top, width, height
+        )
+        # Initialize arrays for sum and count
+        sum_array = np.zeros((src_files[0].count, height, width), dtype=np.float64)
+        count_array = np.zeros((height, width), dtype=np.int32)
+        # Process each source file
+        for src in src_files:
+            # Read data
+            data = src.read()
+            # Calculate offset in merged raster
+            src_bounds = src.bounds
+            col_off = int((src_bounds.left - bounds.left) / abs(transform.a))
+            row_off = int((bounds.top - src_bounds.top) / abs(transform.e))
+            # Get valid data mask
+            if src.nodata is not None:
+                valid_mask = data[0] != src.nodata
+            else:
+                valid_mask = np.ones(data[0].shape, dtype=bool)
+            # Add to sum and count arrays
+            end_row = row_off + data.shape[1]
+            end_col = col_off + data.shape[2]
+            sum_array[:, row_off:end_row, col_off:end_col] += np.where(
+                valid_mask, data, 0
+            )
+            count_array[row_off:end_row, col_off:end_col] += valid_mask.astype(np.int32)
+        # Calculate mean
+        mean_array = np.divide(
+            sum_array,
+            count_array,
+            out=np.full_like(
+                sum_array, src_files[0].nodata or 0, dtype=sum_array.dtype
+            ),
+            where=count_array > 0,
+        )
+        return mean_array.astype(src_files[0].dtypes[0]), merged_transform
+    def __del__(self):
+        """Cleanup temporary files."""
+        if self._temp_dir and os.path.exists(self._temp_dir):
+            try:
+                import shutil
+                shutil.rmtree(self._temp_dir)
+            except:
+                pass
     @contextmanager
     def open_dataset(self):
         """Context manager for accessing the dataset"""
-        with self.data_store.open(self.dataset_path, "rb") as f:
-            with rasterio.MemoryFile(f.read()) as memfile:
-                with memfile.open() as src:
-                    yield src
+        if self._merged_file_path:
+            # Open merged file directly
+            with rasterio.open(self._merged_file_path) as src:
+                yield src
+        else:
+            # Original single file logic
+            with self.data_store.open(self.dataset_path, "rb") as f:
+                with rasterio.MemoryFile(f.read()) as memfile:
+                    with memfile.open() as src:
+                        yield src
     def _load_metadata(self):
         """Load metadata from the TIF file if not already cached"""
@@ -73,6 +376,17 @@ class TifProcessor:
                 self._cache["count"] = src.count
                 self._cache["dtype"] = src.dtypes[0]
+    @property
+    def is_merged(self) -> bool:
+        """Check if this processor was created from multiple rasters."""
+        return len(self.dataset_paths) > 1
+    @property
+    def source_count(self) -> int:
+        """Get the number of source rasters."""
+        return len(self.dataset_paths)
+    # All other methods remain the same...
     @property
     def transform(self):
         """Get the transform from the TIF file"""
@@ -380,7 +694,7 @@ class TifProcessor:
             results = [item for sublist in batched_results for item in sublist]
         return np.array(results)
     def _initializer_worker(self):
         """
         Initializer function for each worker process.
@@ -727,9 +1041,7 @@ def sample_multiple_tifs_by_polygons(
     sampled_values = np.full(len(polygon_list), np.nan, dtype=np.float32)
     for tp in tif_processors:
-        values = tp.sample_by_polygons(
-            polygon_list=polygon_list, stat=stat
-        )
+        values = tp.sample_by_polygons(polygon_list=polygon_list, stat=stat)
         mask = np.isnan(sampled_values)  # replace all NaNs

{giga_spatial-0.6.6.dist-info → giga_spatial-0.6.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{giga_spatial-0.6.6.dist-info → giga_spatial-0.6.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{giga_spatial-0.6.6.dist-info → giga_spatial-0.6.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

giga-spatial 0.6.6__py3-none-any.whl → 0.6.8__py3-none-any.whl

giga-spatial 0.6.6py3-none-any.whl → 0.6.8py3-none-any.whl