giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,266 @@
|
|
1
|
+
from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator
|
2
|
+
from pathlib import Path
|
3
|
+
import os
|
4
|
+
from typing import Optional, Union, Literal, ClassVar
|
5
|
+
import pandas as pd
|
6
|
+
import pycountry
|
7
|
+
import requests
|
8
|
+
from tqdm import tqdm
|
9
|
+
from urllib.error import URLError
|
10
|
+
import logging
|
11
|
+
|
12
|
+
from gigaspatial.core.io.readers import *
|
13
|
+
from gigaspatial.core.io.writers import *
|
14
|
+
from gigaspatial.core.io.data_store import DataStore
|
15
|
+
from gigaspatial.core.io.local_data_store import LocalDataStore
|
16
|
+
from gigaspatial.config import config as global_config
|
17
|
+
|
18
|
+
|
19
|
+
class WorldPopConfig(BaseModel):
|
20
|
+
# class variables
|
21
|
+
_metadata_cache: ClassVar[Optional[pd.DataFrame]] = None
|
22
|
+
|
23
|
+
# constants
|
24
|
+
CURRENT_MAX_YEAR: int = 2022
|
25
|
+
EARLIEST_YEAR: int = 2000
|
26
|
+
SCHOOL_AGE_YEAR: int = 2020
|
27
|
+
|
28
|
+
# base config
|
29
|
+
WORLDPOP_DB_BASE_URL: HttpUrl = Field(default="https://data.worldpop.org/")
|
30
|
+
SCHOOL_AGE_POPULATION_PATH: str = Field(
|
31
|
+
default="GIS/AgeSex_structures/school_age_population/v1/2020/"
|
32
|
+
)
|
33
|
+
PPP_2021_2022_PATH: str = Field(
|
34
|
+
default="GIS/Population/Global_2021_2022_1km_UNadj/"
|
35
|
+
)
|
36
|
+
DATASETS_METADATA_PATH: str = Field(default="assets/wpgpDatasets.csv")
|
37
|
+
|
38
|
+
# user config
|
39
|
+
base_path: Path = Field(default=global_config.get_path("worldpop", "bronze"))
|
40
|
+
country: str = Field(...)
|
41
|
+
year: int = Field(..., ge=EARLIEST_YEAR, le=CURRENT_MAX_YEAR)
|
42
|
+
resolution: Literal["HIGH", "LOW"] = Field(
|
43
|
+
default="LOW",
|
44
|
+
description="Spatial resolution of the population grid: HIGH (100m) or LOW (1km)",
|
45
|
+
)
|
46
|
+
un_adjusted: bool = True
|
47
|
+
constrained: bool = False
|
48
|
+
school_age: Optional[Literal["PRIMARY", "SECONDARY"]] = None
|
49
|
+
gender: Literal["F", "M", "F_M"] = "F_M"
|
50
|
+
|
51
|
+
@field_validator("country")
|
52
|
+
def validate_country(cls, value: str) -> str:
|
53
|
+
try:
|
54
|
+
return pycountry.countries.lookup(value).alpha_3
|
55
|
+
except LookupError:
|
56
|
+
raise ValueError(f"Invalid country code provided: {value}")
|
57
|
+
|
58
|
+
@model_validator(mode="after")
|
59
|
+
def validate_configuration(self):
|
60
|
+
"""
|
61
|
+
Validate that the configuration is valid based on dataset availability constraints.
|
62
|
+
|
63
|
+
Specific rules:
|
64
|
+
- Post-2020 data is only available at 1km resolution with UN adjustment
|
65
|
+
- School age population data is only available for 2020 at 1km resolution
|
66
|
+
"""
|
67
|
+
if self.year > self.SCHOOL_AGE_YEAR:
|
68
|
+
if self.resolution != "LOW":
|
69
|
+
raise ValueError(
|
70
|
+
f"Data for year {self.year} is only available at LOW (1km) resolution"
|
71
|
+
)
|
72
|
+
|
73
|
+
if not self.un_adjusted:
|
74
|
+
raise ValueError(
|
75
|
+
f"Data for year {self.year} is only available with UN adjustment"
|
76
|
+
)
|
77
|
+
|
78
|
+
if self.school_age:
|
79
|
+
if self.resolution != "LOW":
|
80
|
+
raise ValueError(
|
81
|
+
f"School age data is only available at LOW (1km) resolution"
|
82
|
+
)
|
83
|
+
|
84
|
+
if self.year != self.SCHOOL_AGE_YEAR:
|
85
|
+
self.year = self.SCHOOL_AGE_YEAR
|
86
|
+
raise ValueError(f"School age data is only available for 2020")
|
87
|
+
|
88
|
+
return self
|
89
|
+
|
90
|
+
@property
|
91
|
+
def dataset_url(self) -> str:
|
92
|
+
"""Get the URL for the configured dataset. The URL is computed on first access and then cached for subsequent calls."""
|
93
|
+
if not hasattr(self, "_dataset_url"):
|
94
|
+
self._dataset_url = self._compute_dataset_url()
|
95
|
+
return self._dataset_url
|
96
|
+
|
97
|
+
@property
|
98
|
+
def dataset_path(self) -> Path:
|
99
|
+
"""Construct and return the path for the configured dataset."""
|
100
|
+
url_parts = self.dataset_url.split("/")
|
101
|
+
file_path = (
|
102
|
+
"/".join(
|
103
|
+
[url_parts[4], url_parts[5], url_parts[7], self.country, url_parts[-1]]
|
104
|
+
)
|
105
|
+
if self.school_age
|
106
|
+
else "/".join([url_parts[4], url_parts[6], self.country, url_parts[-1]])
|
107
|
+
)
|
108
|
+
return self.base_path / file_path
|
109
|
+
|
110
|
+
def _load_datasets_metadata(self) -> pd.DataFrame:
|
111
|
+
"""Load and return the WorldPop datasets metadata, using cache if available."""
|
112
|
+
if WorldPopConfig._metadata_cache is not None:
|
113
|
+
return WorldPopConfig._metadata_cache
|
114
|
+
|
115
|
+
try:
|
116
|
+
WorldPopConfig._metadata_cache = pd.read_csv(
|
117
|
+
str(self.WORLDPOP_DB_BASE_URL) + self.DATASETS_METADATA_PATH
|
118
|
+
)
|
119
|
+
return WorldPopConfig._metadata_cache
|
120
|
+
except (URLError, pd.errors.EmptyDataError) as e:
|
121
|
+
raise RuntimeError(f"Failed to load WorldPop datasets metadata: {e}")
|
122
|
+
|
123
|
+
def _compute_dataset_url(self) -> str:
|
124
|
+
"""Construct and return the URL for the configured dataset."""
|
125
|
+
# handle post-2020 datasets
|
126
|
+
if self.year > self.SCHOOL_AGE_YEAR:
|
127
|
+
return (
|
128
|
+
str(self.WORLDPOP_DB_BASE_URL)
|
129
|
+
+ self.PPP_2021_2022_PATH
|
130
|
+
+ f"{'' if self.constrained else 'un'}constrained/{self.year}/{self.country}/{self.country.lower()}_ppp_{self.year}_1km_UNadj{'_constrained' if self.constrained else ''}.tif"
|
131
|
+
)
|
132
|
+
|
133
|
+
# handle school-age population datasets
|
134
|
+
if self.school_age:
|
135
|
+
return (
|
136
|
+
str(self.WORLDPOP_DB_BASE_URL)
|
137
|
+
+ self.SCHOOL_AGE_POPULATION_PATH
|
138
|
+
+ f"{self.country}/{self.country}_SAP_1km_2020/{self.country}_{self.gender}_{self.school_age}_2020_1km.tif"
|
139
|
+
)
|
140
|
+
|
141
|
+
# handle standard population datasets
|
142
|
+
wp_metadata = self._load_datasets_metadata()
|
143
|
+
|
144
|
+
try:
|
145
|
+
dataset_url = (
|
146
|
+
self.WORLDPOP_DB_BASE_URL
|
147
|
+
+ wp_metadata[
|
148
|
+
(wp_metadata.ISO3 == self.country)
|
149
|
+
& (
|
150
|
+
wp_metadata.Covariate
|
151
|
+
== "ppp_"
|
152
|
+
+ str(self.year)
|
153
|
+
+ ("_UNadj" if self.un_adjusted else "")
|
154
|
+
)
|
155
|
+
].PathToRaster.values[0]
|
156
|
+
)
|
157
|
+
except IndexError:
|
158
|
+
raise ValueError(
|
159
|
+
f"No dataset found for country={self.country}, year={self.year}, un_adjusted={self.un_adjusted}"
|
160
|
+
)
|
161
|
+
|
162
|
+
# handle resolution conversion if needed
|
163
|
+
if self.resolution == "HIGH":
|
164
|
+
return dataset_url
|
165
|
+
|
166
|
+
url_parts = dataset_url.split("/")
|
167
|
+
url_parts[5] = (
|
168
|
+
url_parts[5] + "_1km" + ("_UNadj" if self.un_adjusted else "")
|
169
|
+
) # get 1km folder with UNadj specification
|
170
|
+
url_parts[8] = url_parts[8].replace(
|
171
|
+
str(self.year), str(self.year) + "_1km_Aggregated"
|
172
|
+
) # get filename with 1km res
|
173
|
+
dataset_url = "/".join(url_parts)
|
174
|
+
|
175
|
+
return dataset_url
|
176
|
+
|
177
|
+
def __repr__(self) -> str:
|
178
|
+
|
179
|
+
parts = [
|
180
|
+
f"WorldpopConfig(",
|
181
|
+
f" country='{self.country}'",
|
182
|
+
f" year={self.year}",
|
183
|
+
f" resolution={self.resolution}",
|
184
|
+
f" un_adjusted={self.un_adjusted}",
|
185
|
+
f" constrained={self.constrained}",
|
186
|
+
]
|
187
|
+
|
188
|
+
if self.school_age:
|
189
|
+
parts.append(f" school_age='{self.school_age}'")
|
190
|
+
parts.append(f" gender='{self.gender}'")
|
191
|
+
|
192
|
+
parts.append(")")
|
193
|
+
|
194
|
+
return "\n".join(parts)
|
195
|
+
|
196
|
+
|
197
|
+
class WorldPopDownloader:
|
198
|
+
"""A class to handle downloads of WorldPop datasets."""
|
199
|
+
|
200
|
+
def __init__(
|
201
|
+
self,
|
202
|
+
config: Union[WorldPopConfig, dict[str, Union[str, int]]],
|
203
|
+
data_store: Optional[DataStore] = None,
|
204
|
+
logger: Optional[logging.Logger] = None,
|
205
|
+
):
|
206
|
+
"""
|
207
|
+
Initialize the downloader.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
config: Configuration for the WorldPop dataset, either as a WorldPopConfig object or a dictionary of parameters
|
211
|
+
data_store: Optional data storage interface. If not provided, uses LocalDataStore.
|
212
|
+
logger: Optional custom logger. If not provided, uses default logger.
|
213
|
+
"""
|
214
|
+
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
215
|
+
self.data_store = data_store or LocalDataStore()
|
216
|
+
self.config = (
|
217
|
+
config if isinstance(config, WorldPopConfig) else WorldPopConfig(**config)
|
218
|
+
)
|
219
|
+
|
220
|
+
@classmethod
|
221
|
+
def from_country_year(cls, country: str, year: int, **kwargs):
|
222
|
+
"""
|
223
|
+
Create a downloader instance from country and year.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
country: Country code or name
|
227
|
+
year: Year of the dataset
|
228
|
+
**kwargs: Additional parameters for WorldPopConfig or the downloader
|
229
|
+
"""
|
230
|
+
return cls({"country": country, "year": year}, **kwargs)
|
231
|
+
|
232
|
+
def download_dataset(self) -> str:
|
233
|
+
"""
|
234
|
+
Download the configured dataset to the provided output path.
|
235
|
+
"""
|
236
|
+
|
237
|
+
try:
|
238
|
+
response = requests.get(self.config.dataset_url, stream=True)
|
239
|
+
response.raise_for_status()
|
240
|
+
|
241
|
+
output_path = str(self.config.dataset_path)
|
242
|
+
|
243
|
+
total_size = int(response.headers.get("content-length", 0))
|
244
|
+
|
245
|
+
with self.data_store.open(output_path, "wb") as file:
|
246
|
+
with tqdm(
|
247
|
+
total=total_size,
|
248
|
+
unit="B",
|
249
|
+
unit_scale=True,
|
250
|
+
desc=f"Downloading {os.path.basename(output_path)}",
|
251
|
+
) as pbar:
|
252
|
+
for chunk in response.iter_content(chunk_size=8192):
|
253
|
+
if chunk:
|
254
|
+
file.write(chunk)
|
255
|
+
pbar.update(len(chunk))
|
256
|
+
|
257
|
+
self.logger.debug(f"Successfully downloaded dataset: {self.config}")
|
258
|
+
|
259
|
+
return output_path
|
260
|
+
|
261
|
+
except requests.exceptions.RequestException as e:
|
262
|
+
self.logger.error(f"Failed to download dataset {self.config}: {str(e)}")
|
263
|
+
return None
|
264
|
+
except Exception as e:
|
265
|
+
self.logger.error(f"Unexpected error downloading dataset: {str(e)}")
|
266
|
+
return None
|