giga-spatial 0.6.7__tar.gz → 0.6.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/CHANGELOG.md +49 -1
  2. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/PKG-INFO +1 -1
  3. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/giga_spatial.egg-info/PKG-INFO +1 -1
  4. giga_spatial-0.6.9/gigaspatial/__init__.py +1 -0
  5. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/base.py +2 -2
  6. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/hdx.py +4 -1
  7. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/osm.py +77 -9
  8. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/tif_processor.py +325 -13
  9. giga_spatial-0.6.7/gigaspatial/__init__.py +0 -1
  10. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/.env_sample +0 -0
  11. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/CODE_OF_CONDUCT.md +0 -0
  12. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/CONTRIBUTING.md +0 -0
  13. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/LICENSE +0 -0
  14. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/MANIFEST.in +0 -0
  15. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/PULL_REQUEST_TEMPLATE.md +0 -0
  16. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/README.md +0 -0
  17. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/.DS_Store +0 -0
  18. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/core.md +0 -0
  19. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/generators.md +0 -0
  20. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/grid.md +0 -0
  21. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/handlers.md +0 -0
  22. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/index.md +0 -0
  23. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/api/processing.md +0 -0
  24. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
  25. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/assets/datasets.png +0 -0
  26. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/assets/logo.png +0 -0
  27. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/changelog.md +0 -0
  28. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/contributing.md +0 -0
  29. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/advanced.md +0 -0
  30. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/basic.md +0 -0
  31. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/downloading/ghsl.md +0 -0
  32. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/downloading/osm.md +0 -0
  33. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/index.md +0 -0
  34. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/processing/tif.md +0 -0
  35. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/examples/use-cases.md +0 -0
  36. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/getting-started/installation.md +0 -0
  37. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/getting-started/quickstart.md +0 -0
  38. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/index.md +0 -0
  39. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/license.md +0 -0
  40. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/stylesheets/extra.css +0 -0
  41. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/user-guide/configuration.md +0 -0
  42. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/docs/user-guide/index.md +0 -0
  43. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/giga_spatial.egg-info/SOURCES.txt +0 -0
  44. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/giga_spatial.egg-info/dependency_links.txt +0 -0
  45. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/giga_spatial.egg-info/requires.txt +0 -0
  46. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/giga_spatial.egg-info/top_level.txt +0 -0
  47. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/config.py +0 -0
  48. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/__init__.py +0 -0
  49. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/__init__.py +0 -0
  50. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/adls_data_store.py +0 -0
  51. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/data_api.py +0 -0
  52. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/data_store.py +0 -0
  53. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/database.py +0 -0
  54. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/local_data_store.py +0 -0
  55. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/readers.py +0 -0
  56. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/io/writers.py +0 -0
  57. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/schemas/__init__.py +0 -0
  58. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/core/schemas/entity.py +0 -0
  59. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/__init__.py +0 -0
  60. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/poi.py +0 -0
  61. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/zonal/__init__.py +0 -0
  62. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/zonal/admin.py +0 -0
  63. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/zonal/base.py +0 -0
  64. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/zonal/geometry.py +0 -0
  65. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/generators/zonal/mercator.py +0 -0
  66. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/grid/__init__.py +0 -0
  67. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/grid/mercator_tiles.py +0 -0
  68. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/__init__.py +0 -0
  69. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/boundaries.py +0 -0
  70. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/ghsl.py +0 -0
  71. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/giga.py +0 -0
  72. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/google_open_buildings.py +0 -0
  73. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/mapbox_image.py +0 -0
  74. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/maxar_image.py +0 -0
  75. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
  76. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/ookla_speedtest.py +0 -0
  77. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/opencellid.py +0 -0
  78. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/overture.py +0 -0
  79. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/rwi.py +0 -0
  80. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/unicef_georepo.py +0 -0
  81. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/handlers/worldpop.py +0 -0
  82. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/__init__.py +0 -0
  83. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/algorithms.py +0 -0
  84. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/geo.py +0 -0
  85. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/sat_images.py +0 -0
  86. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/gigaspatial/processing/utils.py +0 -0
  87. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/pyproject.toml +0 -0
  88. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/requirements.txt +0 -0
  89. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/setup.cfg +0 -0
  90. {giga_spatial-0.6.7 → giga_spatial-0.6.9}/setup.py +0 -0
@@ -2,7 +2,55 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
- ## [v0.6.7] - 2024-07-16
5
+ ## [v0.6.9] - 2025-07-26
6
+
7
+ ### Fixed
8
+
9
+ - Resolved a bug in the handler base class where non-hashable types (dicts) were incorrectly used as dictionary keys in `unit_to_path` mapping, preventing potential runtime errors during data availability checks.
10
+
11
+ ## [v0.6.8] - 2025-07-26
12
+
13
+ ### Added
14
+
15
+ - **OSMLocationFetcher Enhancements**
16
+ - Support for querying OSM locations by arbitrary administrative levels (e.g., states, provinces, cities), in addition to country-level queries.
17
+ - New optional parameters:
18
+ - `admin_level`: Specify OSM administrative level (e.g., 4 for states, 6 for counties).
19
+ - `admin_value`: Name of the administrative area to query (e.g., "California").
20
+ - New static method `get_admin_names(admin_level, country=None)`:
21
+ - Fetch all administrative area names for a given `admin_level`, optionally filtered by country.
22
+ - Helps users discover valid admin area names for constructing precise queries.
23
+
24
+ - **Multi-Raster Merging Support in TifProcessor**
25
+ - Added ability to initialize `TifProcessor` with **multiple raster datasets**.
26
+ - Merges rasters on load with configurable strategies:
27
+ - Supported `merge_method` options: `first`, `last`, `min`, `max`, `mean`.
28
+ - Supports **on-the-fly reprojection** for rasters with differing coordinate reference systems via `target_crs`.
29
+ - Handles **resampling** using `resampling_method` (default: `nearest`).
30
+ - Comprehensive validation to ensure compatibility of input rasters (e.g., resolution, nodata, dtype).
31
+ - Temporary file management for merged output with automatic cleanup.
32
+ - Backward compatible with single-raster use cases.
33
+
34
+ **New TifProcessor Parameters:**
35
+ - `merge_method` (default: `first`) – How to combine pixel values across rasters.
36
+ - `target_crs` (optional) – CRS to reproject rasters before merging.
37
+ - `resampling_method` – Resampling method for reprojection.
38
+
39
+ **New Properties:**
40
+ - `is_merged`: Indicates whether the current instance represents merged rasters.
41
+ - `source_count`: Number of raster datasets merged.
42
+
43
+ ### Changed
44
+
45
+ - **OSMLocationFetcher Overpass Query Logic**
46
+ - Refactored Overpass QL query builder to support **subnational queries** using `admin_level` and `admin_value`.
47
+ - Improved flexibility and precision for spatial data collection across different administrative hierarchies.
48
+
49
+ ### Breaking Changes
50
+
51
+ - None. All changes are fully backward compatible.
52
+
53
+ ## [v0.6.7] - 2025-07-16
6
54
 
7
55
  ### Fixed
8
56
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.7
3
+ Version: 0.6.9
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.7
3
+ Version: 0.6.9
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -0,0 +1 @@
1
+ __version__ = "0.6.9"
@@ -619,14 +619,14 @@ class BaseHandler(ABC):
619
619
  # Download logic
620
620
  if data_units is not None:
621
621
  # Map data_units to their paths and select only those that are missing
622
- unit_to_path = dict(zip(data_units, data_paths))
622
+ unit_to_path = dict(zip(data_paths,data_units)) #units might be dicts, cannot be used as key
623
623
  if force_download:
624
624
  # Download all units if force_download
625
625
  self.downloader.download_data_units(data_units, **kwargs)
626
626
  else:
627
627
  missing_units = [
628
628
  unit
629
- for unit, path in unit_to_path.items()
629
+ for path, unit in unit_to_path.items()
630
630
  if path in missing_paths
631
631
  ]
632
632
  if missing_units:
@@ -247,7 +247,10 @@ class HDXConfig(BaseHandlerConfig):
247
247
  # If source is a dict, use it directly as a filter
248
248
  return self.get_dataset_resources(filter=source, **kwargs)
249
249
  else:
250
- raise ValueError(f"Unsupported source type: {type(source)}")
250
+ raise ValueError(
251
+ f"Unsupported source type: {type(source)}"
252
+ "Please use country-based filtering or direct resource filtering instead."
253
+ )
251
254
 
252
255
  def get_relevant_data_units_by_geometry(
253
256
  self, geometry: Union[BaseGeometry, gpd.GeoDataFrame], **kwargs
@@ -1,7 +1,8 @@
1
1
  import requests
2
2
  import pandas as pd
3
3
  from typing import List, Dict, Union, Optional, Literal
4
- from dataclasses import dataclass
4
+ from pydantic.dataclasses import dataclass
5
+ from pydantic import Field
5
6
  from time import sleep
6
7
  from concurrent.futures import ThreadPoolExecutor
7
8
  from requests.exceptions import RequestException
@@ -20,8 +21,10 @@ class OSMLocationFetcher:
20
21
  shops, and other POI categories.
21
22
  """
22
23
 
23
- country: str
24
- location_types: Union[List[str], Dict[str, List[str]]]
24
+ country: Optional[str] = None
25
+ admin_level: Optional[int] = None
26
+ admin_value: Optional[str] = None
27
+ location_types: Union[List[str], Dict[str, List[str]]] = Field(...)
25
28
  base_url: str = "http://overpass-api.de/api/interpreter"
26
29
  timeout: int = 600
27
30
  max_retries: int = 3
@@ -29,10 +32,6 @@ class OSMLocationFetcher:
29
32
 
30
33
  def __post_init__(self):
31
34
  """Validate inputs, normalize location_types, and set up logging."""
32
- try:
33
- self.country = pycountry.countries.lookup(self.country).alpha_2
34
- except LookupError:
35
- raise ValueError(f"Invalid country code provided: {self.country}")
36
35
 
37
36
  # Normalize location_types to always be a dictionary
38
37
  if isinstance(self.location_types, list):
@@ -44,6 +43,75 @@ class OSMLocationFetcher:
44
43
 
45
44
  self.logger = config.get_logger(self.__class__.__name__)
46
45
 
46
+ # Validate area selection
47
+ if self.admin_level is not None and self.admin_value is not None:
48
+ self.area_query = f'area["admin_level"={self.admin_level}]["name"="{self.admin_value}"]->.searchArea;'
49
+ self.logger.info(
50
+ f"Using admin_level={self.admin_level}, name={self.admin_value} for area selection."
51
+ )
52
+ elif self.country is not None:
53
+ try:
54
+ self.country = pycountry.countries.lookup(self.country).alpha_2
55
+ except LookupError:
56
+ raise ValueError(f"Invalid country code provided: {self.country}")
57
+ self.area_query = f'area["ISO3166-1"={self.country}]->.searchArea;'
58
+ self.logger.info(f"Using country={self.country} for area selection.")
59
+ else:
60
+ raise ValueError(
61
+ "Either country or both admin_level and admin_value must be provided."
62
+ )
63
+
64
+ @staticmethod
65
+ def get_admin_names(
66
+ admin_level: int, country: Optional[str] = None, timeout: int = 120
67
+ ) -> List[str]:
68
+ """
69
+ Fetch all admin area names for a given admin_level (optionally within a country).
70
+
71
+ Args:
72
+ admin_level (int): The OSM admin_level to search for (e.g., 4 for states, 6 for counties).
73
+ country (str, optional): Country name or ISO code to filter within.
74
+ timeout (int): Timeout for the Overpass API request.
75
+
76
+ Returns:
77
+ List[str]: List of admin area names.
78
+ """
79
+
80
+ # Build area filter for country if provided
81
+ if country:
82
+ try:
83
+ country_code = pycountry.countries.lookup(country).alpha_2
84
+ except LookupError:
85
+ raise ValueError(f"Invalid country code or name: {country}")
86
+ area_filter = f'area["ISO3166-1"="{country_code}"]->.countryArea;'
87
+ area_ref = "(area.countryArea)"
88
+ else:
89
+ area_filter = ""
90
+ area_ref = ""
91
+
92
+ # Overpass QL to get all admin areas at the specified level
93
+ query = f"""
94
+ [out:json][timeout:{timeout}];
95
+ {area_filter}
96
+ (
97
+ relation["admin_level"="{admin_level}"]{area_ref};
98
+ );
99
+ out tags;
100
+ """
101
+
102
+ url = "http://overpass-api.de/api/interpreter"
103
+ response = requests.get(url, params={"data": query}, timeout=timeout)
104
+ response.raise_for_status()
105
+ data = response.json()
106
+
107
+ names = []
108
+ for el in data.get("elements", []):
109
+ tags = el.get("tags", {})
110
+ name = tags.get("name")
111
+ if name:
112
+ names.append(name)
113
+ return sorted(set(names))
114
+
47
115
  def _build_queries(self, since_year: Optional[int] = None) -> List[str]:
48
116
  """
49
117
  Construct separate Overpass QL queries for different element types and categories.
@@ -68,7 +136,7 @@ class OSMLocationFetcher:
68
136
 
69
137
  nodes_relations_query = f"""
70
138
  [out:json][timeout:{self.timeout}];
71
- area["ISO3166-1"={self.country}]->.searchArea;
139
+ {self.area_query}
72
140
  (
73
141
  {nodes_relations_queries}
74
142
  );
@@ -86,7 +154,7 @@ class OSMLocationFetcher:
86
154
 
87
155
  ways_query = f"""
88
156
  [out:json][timeout:{self.timeout}];
89
- area["ISO3166-1"={self.country}]->.searchArea;
157
+ {self.area_query}
90
158
  (
91
159
  {ways_queries}
92
160
  );
@@ -9,9 +9,13 @@ from shapely.geometry import box, Polygon, MultiPolygon
9
9
  from pathlib import Path
10
10
  import rasterio
11
11
  from rasterio.mask import mask
12
+ from rasterio.merge import merge
13
+ from rasterio.warp import calculate_default_transform, reproject, Resampling
12
14
  from functools import partial
13
15
  import multiprocessing
14
16
  from tqdm import tqdm
17
+ import tempfile
18
+ import os
15
19
 
16
20
  from gigaspatial.core.io.data_store import DataStore
17
21
  from gigaspatial.core.io.local_data_store import LocalDataStore
@@ -22,20 +26,34 @@ from gigaspatial.config import config
22
26
  class TifProcessor:
23
27
  """
24
28
  A class to handle tif data processing, supporting single-band, RGB, RGBA, and multi-band data.
29
+ Can merge multiple rasters into one during initialization.
25
30
  """
26
31
 
27
- dataset_path: Union[Path, str]
32
+ dataset_path: Union[Path, str, List[Union[Path, str]]]
28
33
  data_store: Optional[DataStore] = None
29
34
  mode: Literal["single", "rgb", "rgba", "multi"] = "single"
35
+ merge_method: Literal["first", "last", "min", "max", "mean"] = "first"
36
+ target_crs: Optional[str] = None # For reprojection if needed
37
+ resampling_method: Resampling = Resampling.nearest
30
38
 
31
39
  def __post_init__(self):
32
- """Validate inputs and set up logging."""
40
+ """Validate inputs, merge rasters if needed, and set up logging."""
33
41
  self.data_store = self.data_store or LocalDataStore()
34
42
  self.logger = config.get_logger(self.__class__.__name__)
35
43
  self._cache = {}
36
-
37
- if not self.data_store.file_exists(self.dataset_path):
38
- raise FileNotFoundError(f"Dataset not found at {self.dataset_path}")
44
+ self._merged_file_path = None
45
+ self._temp_dir = None
46
+
47
+ # Handle multiple dataset paths
48
+ if isinstance(self.dataset_path, list):
49
+ self.dataset_paths = [Path(p) for p in self.dataset_path]
50
+ self._validate_multiple_datasets()
51
+ self._merge_rasters()
52
+ self.dataset_path = self._merged_file_path
53
+ else:
54
+ self.dataset_paths = [Path(self.dataset_path)]
55
+ if not self.data_store.file_exists(self.dataset_path):
56
+ raise FileNotFoundError(f"Dataset not found at {self.dataset_path}")
39
57
 
40
58
  self._load_metadata()
41
59
 
@@ -49,13 +67,298 @@ class TifProcessor:
49
67
  if self.mode == "multi" and self.count < 2:
50
68
  raise ValueError("Multi mode requires a TIF file with 2 or more bands")
51
69
 
70
+ def _validate_multiple_datasets(self):
71
+ """Validate that all datasets exist and have compatible properties."""
72
+ if len(self.dataset_paths) < 2:
73
+ raise ValueError("Multiple dataset paths required for merging")
74
+
75
+ # Check if all files exist
76
+ for path in self.dataset_paths:
77
+ if not self.data_store.file_exists(path):
78
+ raise FileNotFoundError(f"Dataset not found at {path}")
79
+
80
+ # Load first dataset to get reference properties
81
+ with self.data_store.open(self.dataset_paths[0], "rb") as f:
82
+ with rasterio.MemoryFile(f.read()) as memfile:
83
+ with memfile.open() as ref_src:
84
+ ref_count = ref_src.count
85
+ ref_dtype = ref_src.dtypes[0]
86
+ ref_crs = ref_src.crs
87
+ ref_transform = ref_src.transform
88
+ ref_nodata = ref_src.nodata
89
+
90
+ # Validate all other datasets against reference
91
+ for i, path in enumerate(self.dataset_paths[1:], 1):
92
+ with self.data_store.open(path, "rb") as f:
93
+ with rasterio.MemoryFile(f.read()) as memfile:
94
+ with memfile.open() as src:
95
+ if src.count != ref_count:
96
+ raise ValueError(
97
+ f"Dataset {i} has {src.count} bands, expected {ref_count}"
98
+ )
99
+ if src.dtypes[0] != ref_dtype:
100
+ raise ValueError(
101
+ f"Dataset {i} has dtype {src.dtypes[0]}, expected {ref_dtype}"
102
+ )
103
+ if self.target_crs is None and src.crs != ref_crs:
104
+ raise ValueError(
105
+ f"Dataset {i} has CRS {src.crs}, expected {ref_crs}. Consider setting target_crs parameter."
106
+ )
107
+ if self.target_crs is None and not self._transforms_compatible(
108
+ src.transform, ref_transform
109
+ ):
110
+ self.logger.warning(
111
+ f"Dataset {i} has different resolution. Resampling may be needed."
112
+ )
113
+ if src.nodata != ref_nodata:
114
+ self.logger.warning(
115
+ f"Dataset {i} has different nodata value: {src.nodata} vs {ref_nodata}"
116
+ )
117
+
118
+ def _transforms_compatible(self, transform1, transform2, tolerance=1e-6):
119
+ """Check if two transforms have compatible pixel sizes."""
120
+ return (
121
+ abs(transform1.a - transform2.a) < tolerance
122
+ and abs(transform1.e - transform2.e) < tolerance
123
+ )
124
+
125
+ def _merge_rasters(self):
126
+ """Merge multiple rasters into a single raster."""
127
+ self.logger.info(f"Merging {len(self.dataset_paths)} rasters...")
128
+
129
+ # Create temporary directory for merged file
130
+ self._temp_dir = tempfile.mkdtemp()
131
+ merged_filename = "merged_raster.tif"
132
+ self._merged_file_path = os.path.join(self._temp_dir, merged_filename)
133
+
134
+ # Open all datasets and handle reprojection if needed
135
+ src_files = []
136
+ reprojected_files = []
137
+
138
+ try:
139
+ for path in self.dataset_paths:
140
+ with self.data_store.open(path, "rb") as f:
141
+ # Create temporary file for each dataset
142
+ temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False)
143
+ temp_file.write(f.read())
144
+ temp_file.close()
145
+ src_files.append(rasterio.open(temp_file.name))
146
+
147
+ # Handle reprojection if target_crs is specified
148
+ if self.target_crs:
149
+ self.logger.info(f"Reprojecting rasters to {self.target_crs}...")
150
+ processed_files = self._reproject_rasters(src_files, self.target_crs)
151
+ reprojected_files = processed_files
152
+ else:
153
+ processed_files = src_files
154
+
155
+ if self.merge_method == "mean":
156
+ # For mean, we need to handle it manually
157
+ merged_array, merged_transform = self._merge_with_mean(src_files)
158
+
159
+ # Use first source as reference for metadata
160
+ ref_src = src_files[0]
161
+ profile = ref_src.profile.copy()
162
+ profile.update(
163
+ {
164
+ "height": merged_array.shape[-2],
165
+ "width": merged_array.shape[-1],
166
+ "transform": merged_transform,
167
+ }
168
+ )
169
+
170
+ # Write merged raster
171
+ with rasterio.open(self._merged_file_path, "w", **profile) as dst:
172
+ dst.write(merged_array)
173
+
174
+ else:
175
+ # Use rasterio's merge function
176
+ merged_array, merged_transform = merge(
177
+ src_files,
178
+ method=self.merge_method,
179
+ resampling=self.resampling_method,
180
+ )
181
+
182
+ # Use first source as reference for metadata
183
+ ref_src = src_files[0]
184
+ profile = ref_src.profile.copy()
185
+ profile.update(
186
+ {
187
+ "height": merged_array.shape[-2],
188
+ "width": merged_array.shape[-1],
189
+ "transform": merged_transform,
190
+ }
191
+ )
192
+
193
+ if self.target_crs:
194
+ profile["crs"] = self.target_crs
195
+
196
+ # Write merged raster
197
+ with rasterio.open(self._merged_file_path, "w", **profile) as dst:
198
+ dst.write(merged_array)
199
+
200
+ finally:
201
+ # Clean up source files
202
+ for src in src_files:
203
+ temp_path = src.name
204
+ src.close()
205
+ try:
206
+ os.unlink(temp_path)
207
+ except:
208
+ pass
209
+
210
+ # Clean up reprojected files
211
+ for src in reprojected_files:
212
+ if src not in src_files: # Don't double-close
213
+ temp_path = src.name
214
+ src.close()
215
+ try:
216
+ os.unlink(temp_path)
217
+ except:
218
+ pass
219
+
220
+ self.logger.info("Raster merging completed!")
221
+
222
+ def _reproject_rasters(self, src_files, target_crs):
223
+ """Reproject all rasters to a common CRS before merging."""
224
+ reprojected_files = []
225
+
226
+ for i, src in enumerate(src_files):
227
+ if src.crs.to_string() == target_crs:
228
+ # No reprojection needed
229
+ reprojected_files.append(src)
230
+ continue
231
+
232
+ # Calculate transform and dimensions for reprojection
233
+ transform, width, height = calculate_default_transform(
234
+ src.crs,
235
+ target_crs,
236
+ src.width,
237
+ src.height,
238
+ *src.bounds,
239
+ resolution=self.resolution if hasattr(self, "resolution") else None,
240
+ )
241
+
242
+ # Create temporary file for reprojected raster
243
+ temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False)
244
+ temp_file.close()
245
+
246
+ # Set up profile for reprojected raster
247
+ profile = src.profile.copy()
248
+ profile.update(
249
+ {
250
+ "crs": target_crs,
251
+ "transform": transform,
252
+ "width": width,
253
+ "height": height,
254
+ }
255
+ )
256
+
257
+ # Reproject and write to temporary file
258
+ with rasterio.open(temp_file.name, "w", **profile) as dst:
259
+ for band_idx in range(1, src.count + 1):
260
+ reproject(
261
+ source=rasterio.band(src, band_idx),
262
+ destination=rasterio.band(dst, band_idx),
263
+ src_transform=src.transform,
264
+ src_crs=src.crs,
265
+ dst_transform=transform,
266
+ dst_crs=target_crs,
267
+ resampling=self.resampling_method,
268
+ )
269
+
270
+ # Open reprojected file
271
+ reprojected_files.append(rasterio.open(temp_file.name))
272
+
273
+ return reprojected_files
274
+
275
+ def _merge_with_mean(self, src_files):
276
+ """Merge rasters using mean aggregation."""
277
+ # Get bounds and resolution for merged raster
278
+ bounds = src_files[0].bounds
279
+ transform = src_files[0].transform
280
+
281
+ for src in src_files[1:]:
282
+ bounds = rasterio.coords.BoundingBox(
283
+ min(bounds.left, src.bounds.left),
284
+ min(bounds.bottom, src.bounds.bottom),
285
+ max(bounds.right, src.bounds.right),
286
+ max(bounds.top, src.bounds.top),
287
+ )
288
+
289
+ # Calculate dimensions for merged raster
290
+ width = int((bounds.right - bounds.left) / abs(transform.a))
291
+ height = int((bounds.top - bounds.bottom) / abs(transform.e))
292
+
293
+ # Create new transform for merged bounds
294
+ merged_transform = rasterio.transform.from_bounds(
295
+ bounds.left, bounds.bottom, bounds.right, bounds.top, width, height
296
+ )
297
+
298
+ # Initialize arrays for sum and count
299
+ sum_array = np.zeros((src_files[0].count, height, width), dtype=np.float64)
300
+ count_array = np.zeros((height, width), dtype=np.int32)
301
+
302
+ # Process each source file
303
+ for src in src_files:
304
+ # Read data
305
+ data = src.read()
306
+
307
+ # Calculate offset in merged raster
308
+ src_bounds = src.bounds
309
+ col_off = int((src_bounds.left - bounds.left) / abs(transform.a))
310
+ row_off = int((bounds.top - src_bounds.top) / abs(transform.e))
311
+
312
+ # Get valid data mask
313
+ if src.nodata is not None:
314
+ valid_mask = data[0] != src.nodata
315
+ else:
316
+ valid_mask = np.ones(data[0].shape, dtype=bool)
317
+
318
+ # Add to sum and count arrays
319
+ end_row = row_off + data.shape[1]
320
+ end_col = col_off + data.shape[2]
321
+
322
+ sum_array[:, row_off:end_row, col_off:end_col] += np.where(
323
+ valid_mask, data, 0
324
+ )
325
+ count_array[row_off:end_row, col_off:end_col] += valid_mask.astype(np.int32)
326
+
327
+ # Calculate mean
328
+ mean_array = np.divide(
329
+ sum_array,
330
+ count_array,
331
+ out=np.full_like(
332
+ sum_array, src_files[0].nodata or 0, dtype=sum_array.dtype
333
+ ),
334
+ where=count_array > 0,
335
+ )
336
+
337
+ return mean_array.astype(src_files[0].dtypes[0]), merged_transform
338
+
339
+ def __del__(self):
340
+ """Cleanup temporary files."""
341
+ if self._temp_dir and os.path.exists(self._temp_dir):
342
+ try:
343
+ import shutil
344
+
345
+ shutil.rmtree(self._temp_dir)
346
+ except:
347
+ pass
348
+
52
349
  @contextmanager
53
350
  def open_dataset(self):
54
351
  """Context manager for accessing the dataset"""
55
- with self.data_store.open(self.dataset_path, "rb") as f:
56
- with rasterio.MemoryFile(f.read()) as memfile:
57
- with memfile.open() as src:
58
- yield src
352
+ if self._merged_file_path:
353
+ # Open merged file directly
354
+ with rasterio.open(self._merged_file_path) as src:
355
+ yield src
356
+ else:
357
+ # Original single file logic
358
+ with self.data_store.open(self.dataset_path, "rb") as f:
359
+ with rasterio.MemoryFile(f.read()) as memfile:
360
+ with memfile.open() as src:
361
+ yield src
59
362
 
60
363
  def _load_metadata(self):
61
364
  """Load metadata from the TIF file if not already cached"""
@@ -73,6 +376,17 @@ class TifProcessor:
73
376
  self._cache["count"] = src.count
74
377
  self._cache["dtype"] = src.dtypes[0]
75
378
 
379
+ @property
380
+ def is_merged(self) -> bool:
381
+ """Check if this processor was created from multiple rasters."""
382
+ return len(self.dataset_paths) > 1
383
+
384
+ @property
385
+ def source_count(self) -> int:
386
+ """Get the number of source rasters."""
387
+ return len(self.dataset_paths)
388
+
389
+ # All other methods remain the same...
76
390
  @property
77
391
  def transform(self):
78
392
  """Get the transform from the TIF file"""
@@ -380,7 +694,7 @@ class TifProcessor:
380
694
  results = [item for sublist in batched_results for item in sublist]
381
695
 
382
696
  return np.array(results)
383
-
697
+
384
698
  def _initializer_worker(self):
385
699
  """
386
700
  Initializer function for each worker process.
@@ -727,9 +1041,7 @@ def sample_multiple_tifs_by_polygons(
727
1041
  sampled_values = np.full(len(polygon_list), np.nan, dtype=np.float32)
728
1042
 
729
1043
  for tp in tif_processors:
730
- values = tp.sample_by_polygons(
731
- polygon_list=polygon_list, stat=stat
732
- )
1044
+ values = tp.sample_by_polygons(polygon_list=polygon_list, stat=stat)
733
1045
 
734
1046
  mask = np.isnan(sampled_values) # replace all NaNs
735
1047
 
@@ -1 +0,0 @@
1
- __version__ = "0.6.7"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes