giga-spatial 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. giga_spatial-0.6.0.dist-info/METADATA +141 -0
  2. giga_spatial-0.6.0.dist-info/RECORD +47 -0
  3. giga_spatial-0.6.0.dist-info/WHEEL +5 -0
  4. giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
  5. giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
  6. gigaspatial/__init__.py +1 -0
  7. gigaspatial/config.py +226 -0
  8. gigaspatial/core/__init__.py +0 -0
  9. gigaspatial/core/io/__init__.py +5 -0
  10. gigaspatial/core/io/adls_data_store.py +325 -0
  11. gigaspatial/core/io/data_api.py +113 -0
  12. gigaspatial/core/io/data_store.py +147 -0
  13. gigaspatial/core/io/local_data_store.py +92 -0
  14. gigaspatial/core/io/readers.py +265 -0
  15. gigaspatial/core/io/writers.py +128 -0
  16. gigaspatial/core/schemas/__init__.py +0 -0
  17. gigaspatial/core/schemas/entity.py +244 -0
  18. gigaspatial/generators/__init__.py +2 -0
  19. gigaspatial/generators/poi.py +636 -0
  20. gigaspatial/generators/zonal/__init__.py +3 -0
  21. gigaspatial/generators/zonal/base.py +370 -0
  22. gigaspatial/generators/zonal/geometry.py +439 -0
  23. gigaspatial/generators/zonal/mercator.py +78 -0
  24. gigaspatial/grid/__init__.py +1 -0
  25. gigaspatial/grid/mercator_tiles.py +286 -0
  26. gigaspatial/handlers/__init__.py +40 -0
  27. gigaspatial/handlers/base.py +761 -0
  28. gigaspatial/handlers/boundaries.py +305 -0
  29. gigaspatial/handlers/ghsl.py +772 -0
  30. gigaspatial/handlers/giga.py +145 -0
  31. gigaspatial/handlers/google_open_buildings.py +472 -0
  32. gigaspatial/handlers/hdx.py +241 -0
  33. gigaspatial/handlers/mapbox_image.py +208 -0
  34. gigaspatial/handlers/maxar_image.py +291 -0
  35. gigaspatial/handlers/microsoft_global_buildings.py +548 -0
  36. gigaspatial/handlers/ookla_speedtest.py +199 -0
  37. gigaspatial/handlers/opencellid.py +290 -0
  38. gigaspatial/handlers/osm.py +356 -0
  39. gigaspatial/handlers/overture.py +126 -0
  40. gigaspatial/handlers/rwi.py +157 -0
  41. gigaspatial/handlers/unicef_georepo.py +806 -0
  42. gigaspatial/handlers/worldpop.py +266 -0
  43. gigaspatial/processing/__init__.py +4 -0
  44. gigaspatial/processing/geo.py +1054 -0
  45. gigaspatial/processing/sat_images.py +39 -0
  46. gigaspatial/processing/tif_processor.py +477 -0
  47. gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,266 @@
1
+ from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator
2
+ from pathlib import Path
3
+ import os
4
+ from typing import Optional, Union, Literal, ClassVar
5
+ import pandas as pd
6
+ import pycountry
7
+ import requests
8
+ from tqdm import tqdm
9
+ from urllib.error import URLError
10
+ import logging
11
+
12
+ from gigaspatial.core.io.readers import *
13
+ from gigaspatial.core.io.writers import *
14
+ from gigaspatial.core.io.data_store import DataStore
15
+ from gigaspatial.core.io.local_data_store import LocalDataStore
16
+ from gigaspatial.config import config as global_config
17
+
18
+
19
+ class WorldPopConfig(BaseModel):
20
+ # class variables
21
+ _metadata_cache: ClassVar[Optional[pd.DataFrame]] = None
22
+
23
+ # constants
24
+ CURRENT_MAX_YEAR: int = 2022
25
+ EARLIEST_YEAR: int = 2000
26
+ SCHOOL_AGE_YEAR: int = 2020
27
+
28
+ # base config
29
+ WORLDPOP_DB_BASE_URL: HttpUrl = Field(default="https://data.worldpop.org/")
30
+ SCHOOL_AGE_POPULATION_PATH: str = Field(
31
+ default="GIS/AgeSex_structures/school_age_population/v1/2020/"
32
+ )
33
+ PPP_2021_2022_PATH: str = Field(
34
+ default="GIS/Population/Global_2021_2022_1km_UNadj/"
35
+ )
36
+ DATASETS_METADATA_PATH: str = Field(default="assets/wpgpDatasets.csv")
37
+
38
+ # user config
39
+ base_path: Path = Field(default=global_config.get_path("worldpop", "bronze"))
40
+ country: str = Field(...)
41
+ year: int = Field(..., ge=EARLIEST_YEAR, le=CURRENT_MAX_YEAR)
42
+ resolution: Literal["HIGH", "LOW"] = Field(
43
+ default="LOW",
44
+ description="Spatial resolution of the population grid: HIGH (100m) or LOW (1km)",
45
+ )
46
+ un_adjusted: bool = True
47
+ constrained: bool = False
48
+ school_age: Optional[Literal["PRIMARY", "SECONDARY"]] = None
49
+ gender: Literal["F", "M", "F_M"] = "F_M"
50
+
51
+ @field_validator("country")
52
+ def validate_country(cls, value: str) -> str:
53
+ try:
54
+ return pycountry.countries.lookup(value).alpha_3
55
+ except LookupError:
56
+ raise ValueError(f"Invalid country code provided: {value}")
57
+
58
+ @model_validator(mode="after")
59
+ def validate_configuration(self):
60
+ """
61
+ Validate that the configuration is valid based on dataset availability constraints.
62
+
63
+ Specific rules:
64
+ - Post-2020 data is only available at 1km resolution with UN adjustment
65
+ - School age population data is only available for 2020 at 1km resolution
66
+ """
67
+ if self.year > self.SCHOOL_AGE_YEAR:
68
+ if self.resolution != "LOW":
69
+ raise ValueError(
70
+ f"Data for year {self.year} is only available at LOW (1km) resolution"
71
+ )
72
+
73
+ if not self.un_adjusted:
74
+ raise ValueError(
75
+ f"Data for year {self.year} is only available with UN adjustment"
76
+ )
77
+
78
+ if self.school_age:
79
+ if self.resolution != "LOW":
80
+ raise ValueError(
81
+ f"School age data is only available at LOW (1km) resolution"
82
+ )
83
+
84
+ if self.year != self.SCHOOL_AGE_YEAR:
85
+ self.year = self.SCHOOL_AGE_YEAR
86
+ raise ValueError(f"School age data is only available for 2020")
87
+
88
+ return self
89
+
90
+ @property
91
+ def dataset_url(self) -> str:
92
+ """Get the URL for the configured dataset. The URL is computed on first access and then cached for subsequent calls."""
93
+ if not hasattr(self, "_dataset_url"):
94
+ self._dataset_url = self._compute_dataset_url()
95
+ return self._dataset_url
96
+
97
+ @property
98
+ def dataset_path(self) -> Path:
99
+ """Construct and return the path for the configured dataset."""
100
+ url_parts = self.dataset_url.split("/")
101
+ file_path = (
102
+ "/".join(
103
+ [url_parts[4], url_parts[5], url_parts[7], self.country, url_parts[-1]]
104
+ )
105
+ if self.school_age
106
+ else "/".join([url_parts[4], url_parts[6], self.country, url_parts[-1]])
107
+ )
108
+ return self.base_path / file_path
109
+
110
+ def _load_datasets_metadata(self) -> pd.DataFrame:
111
+ """Load and return the WorldPop datasets metadata, using cache if available."""
112
+ if WorldPopConfig._metadata_cache is not None:
113
+ return WorldPopConfig._metadata_cache
114
+
115
+ try:
116
+ WorldPopConfig._metadata_cache = pd.read_csv(
117
+ str(self.WORLDPOP_DB_BASE_URL) + self.DATASETS_METADATA_PATH
118
+ )
119
+ return WorldPopConfig._metadata_cache
120
+ except (URLError, pd.errors.EmptyDataError) as e:
121
+ raise RuntimeError(f"Failed to load WorldPop datasets metadata: {e}")
122
+
123
+ def _compute_dataset_url(self) -> str:
124
+ """Construct and return the URL for the configured dataset."""
125
+ # handle post-2020 datasets
126
+ if self.year > self.SCHOOL_AGE_YEAR:
127
+ return (
128
+ str(self.WORLDPOP_DB_BASE_URL)
129
+ + self.PPP_2021_2022_PATH
130
+ + f"{'' if self.constrained else 'un'}constrained/{self.year}/{self.country}/{self.country.lower()}_ppp_{self.year}_1km_UNadj{'_constrained' if self.constrained else ''}.tif"
131
+ )
132
+
133
+ # handle school-age population datasets
134
+ if self.school_age:
135
+ return (
136
+ str(self.WORLDPOP_DB_BASE_URL)
137
+ + self.SCHOOL_AGE_POPULATION_PATH
138
+ + f"{self.country}/{self.country}_SAP_1km_2020/{self.country}_{self.gender}_{self.school_age}_2020_1km.tif"
139
+ )
140
+
141
+ # handle standard population datasets
142
+ wp_metadata = self._load_datasets_metadata()
143
+
144
+ try:
145
+ dataset_url = (
146
+ self.WORLDPOP_DB_BASE_URL
147
+ + wp_metadata[
148
+ (wp_metadata.ISO3 == self.country)
149
+ & (
150
+ wp_metadata.Covariate
151
+ == "ppp_"
152
+ + str(self.year)
153
+ + ("_UNadj" if self.un_adjusted else "")
154
+ )
155
+ ].PathToRaster.values[0]
156
+ )
157
+ except IndexError:
158
+ raise ValueError(
159
+ f"No dataset found for country={self.country}, year={self.year}, un_adjusted={self.un_adjusted}"
160
+ )
161
+
162
+ # handle resolution conversion if needed
163
+ if self.resolution == "HIGH":
164
+ return dataset_url
165
+
166
+ url_parts = dataset_url.split("/")
167
+ url_parts[5] = (
168
+ url_parts[5] + "_1km" + ("_UNadj" if self.un_adjusted else "")
169
+ ) # get 1km folder with UNadj specification
170
+ url_parts[8] = url_parts[8].replace(
171
+ str(self.year), str(self.year) + "_1km_Aggregated"
172
+ ) # get filename with 1km res
173
+ dataset_url = "/".join(url_parts)
174
+
175
+ return dataset_url
176
+
177
+ def __repr__(self) -> str:
178
+
179
+ parts = [
180
+ f"WorldpopConfig(",
181
+ f" country='{self.country}'",
182
+ f" year={self.year}",
183
+ f" resolution={self.resolution}",
184
+ f" un_adjusted={self.un_adjusted}",
185
+ f" constrained={self.constrained}",
186
+ ]
187
+
188
+ if self.school_age:
189
+ parts.append(f" school_age='{self.school_age}'")
190
+ parts.append(f" gender='{self.gender}'")
191
+
192
+ parts.append(")")
193
+
194
+ return "\n".join(parts)
195
+
196
+
197
+ class WorldPopDownloader:
198
+ """A class to handle downloads of WorldPop datasets."""
199
+
200
+ def __init__(
201
+ self,
202
+ config: Union[WorldPopConfig, dict[str, Union[str, int]]],
203
+ data_store: Optional[DataStore] = None,
204
+ logger: Optional[logging.Logger] = None,
205
+ ):
206
+ """
207
+ Initialize the downloader.
208
+
209
+ Args:
210
+ config: Configuration for the WorldPop dataset, either as a WorldPopConfig object or a dictionary of parameters
211
+ data_store: Optional data storage interface. If not provided, uses LocalDataStore.
212
+ logger: Optional custom logger. If not provided, uses default logger.
213
+ """
214
+ self.logger = logger or global_config.get_logger(self.__class__.__name__)
215
+ self.data_store = data_store or LocalDataStore()
216
+ self.config = (
217
+ config if isinstance(config, WorldPopConfig) else WorldPopConfig(**config)
218
+ )
219
+
220
+ @classmethod
221
+ def from_country_year(cls, country: str, year: int, **kwargs):
222
+ """
223
+ Create a downloader instance from country and year.
224
+
225
+ Args:
226
+ country: Country code or name
227
+ year: Year of the dataset
228
+ **kwargs: Additional parameters for WorldPopConfig or the downloader
229
+ """
230
+ return cls({"country": country, "year": year}, **kwargs)
231
+
232
+ def download_dataset(self) -> str:
233
+ """
234
+ Download the configured dataset to the provided output path.
235
+ """
236
+
237
+ try:
238
+ response = requests.get(self.config.dataset_url, stream=True)
239
+ response.raise_for_status()
240
+
241
+ output_path = str(self.config.dataset_path)
242
+
243
+ total_size = int(response.headers.get("content-length", 0))
244
+
245
+ with self.data_store.open(output_path, "wb") as file:
246
+ with tqdm(
247
+ total=total_size,
248
+ unit="B",
249
+ unit_scale=True,
250
+ desc=f"Downloading {os.path.basename(output_path)}",
251
+ ) as pbar:
252
+ for chunk in response.iter_content(chunk_size=8192):
253
+ if chunk:
254
+ file.write(chunk)
255
+ pbar.update(len(chunk))
256
+
257
+ self.logger.debug(f"Successfully downloaded dataset: {self.config}")
258
+
259
+ return output_path
260
+
261
+ except requests.exceptions.RequestException as e:
262
+ self.logger.error(f"Failed to download dataset {self.config}: {str(e)}")
263
+ return None
264
+ except Exception as e:
265
+ self.logger.error(f"Unexpected error downloading dataset: {str(e)}")
266
+ return None
@@ -0,0 +1,4 @@
1
+ from gigaspatial.processing.geo import *
2
+ from gigaspatial.processing.tif_processor import *
3
+ from gigaspatial.processing.sat_images import *
4
+ from gigaspatial.processing.utils import *