giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,305 @@
|
|
1
|
+
from pydantic import BaseModel, Field
|
2
|
+
from typing import Optional, ClassVar, Union, Dict, List
|
3
|
+
import geopandas as gpd
|
4
|
+
from pathlib import Path
|
5
|
+
from urllib.error import HTTPError
|
6
|
+
from shapely.geometry import Polygon, MultiPolygon, shape
|
7
|
+
import pycountry
|
8
|
+
|
9
|
+
from gigaspatial.core.io.data_store import DataStore
|
10
|
+
from gigaspatial.core.io.readers import read_dataset
|
11
|
+
from gigaspatial.config import config
|
12
|
+
|
13
|
+
|
14
|
+
class AdminBoundary(BaseModel):
|
15
|
+
"""Base class for administrative boundary data with flexible fields."""
|
16
|
+
|
17
|
+
id: str = Field(..., description="Unique identifier for the administrative unit")
|
18
|
+
name: str = Field(..., description="Primary local name")
|
19
|
+
geometry: Union[Polygon, MultiPolygon] = Field(
|
20
|
+
..., description="Geometry of the administrative boundary"
|
21
|
+
)
|
22
|
+
|
23
|
+
name_en: Optional[str] = Field(
|
24
|
+
None, description="English name if different from local name"
|
25
|
+
)
|
26
|
+
parent_id: Optional[str] = Field(
|
27
|
+
None, description="ID of parent administrative unit"
|
28
|
+
)
|
29
|
+
country_code: Optional[str] = Field(
|
30
|
+
None, min_length=3, max_length=3, description="ISO 3166-1 alpha-3 country code"
|
31
|
+
)
|
32
|
+
|
33
|
+
class Config:
|
34
|
+
# extra = "allow"
|
35
|
+
arbitrary_types_allowed = True
|
36
|
+
|
37
|
+
|
38
|
+
class AdminBoundaries(BaseModel):
|
39
|
+
"""Base class for administrative boundary data with flexible fields."""
|
40
|
+
|
41
|
+
boundaries: List[AdminBoundary] = Field(default_factory=list)
|
42
|
+
level: int = Field(
|
43
|
+
...,
|
44
|
+
ge=0,
|
45
|
+
le=4,
|
46
|
+
description="Administrative level (e.g., 0=country, 1=state, etc.)",
|
47
|
+
)
|
48
|
+
|
49
|
+
logger: ClassVar = config.get_logger("AdminBoundaries")
|
50
|
+
|
51
|
+
_schema_config: ClassVar[Dict[str, Dict[str, str]]] = {
|
52
|
+
"gadm": {
|
53
|
+
"country_code": "GID_0",
|
54
|
+
"id": "GID_{level}",
|
55
|
+
"name": "NAME_{level}",
|
56
|
+
"parent_id": "GID_{parent_level}",
|
57
|
+
},
|
58
|
+
"internal": {
|
59
|
+
"id": "admin{level}_id_giga",
|
60
|
+
"name": "name",
|
61
|
+
"name_en": "name_en",
|
62
|
+
"country_code": "iso_3166_1_alpha_3",
|
63
|
+
},
|
64
|
+
}
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def get_schema_config(cls) -> Dict[str, Dict[str, str]]:
|
68
|
+
"""Return field mappings for different data sources"""
|
69
|
+
return cls._schema_config
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def from_gadm(
|
73
|
+
cls, country_code: str, admin_level: int = 0, **kwargs
|
74
|
+
) -> "AdminBoundaries":
|
75
|
+
"""Load and create instance from GADM data."""
|
76
|
+
url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_{country_code}_{admin_level}.json"
|
77
|
+
cls.logger.info(
|
78
|
+
f"Loading GADM data for country: {country_code}, admin level: {admin_level} from URL: {url}"
|
79
|
+
)
|
80
|
+
try:
|
81
|
+
gdf = gpd.read_file(url)
|
82
|
+
|
83
|
+
gdf = cls._map_fields(gdf, "gadm", admin_level)
|
84
|
+
|
85
|
+
if admin_level == 0:
|
86
|
+
gdf["country_code"] = gdf["id"]
|
87
|
+
gdf["name"] = gdf["COUNTRY"]
|
88
|
+
elif admin_level == 1:
|
89
|
+
gdf["country_code"] = gdf["parent_id"]
|
90
|
+
|
91
|
+
boundaries = [
|
92
|
+
AdminBoundary(**row_dict) for row_dict in gdf.to_dict("records")
|
93
|
+
]
|
94
|
+
cls.logger.info(f"Created {len(boundaries)} AdminBoundary objects.")
|
95
|
+
return cls(
|
96
|
+
boundaries=boundaries, level=admin_level, country_code=country_code
|
97
|
+
)
|
98
|
+
|
99
|
+
except (ValueError, HTTPError, FileNotFoundError) as e:
|
100
|
+
cls.logger.warning(
|
101
|
+
f"Error loading GADM data for {country_code} at admin level {admin_level}: {str(e)}"
|
102
|
+
)
|
103
|
+
return cls._create_empty_instance(country_code, admin_level, "gadm")
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def from_data_store(
|
107
|
+
cls,
|
108
|
+
data_store: DataStore,
|
109
|
+
path: Union[str, "Path"],
|
110
|
+
admin_level: int = 0,
|
111
|
+
**kwargs,
|
112
|
+
) -> "AdminBoundaries":
|
113
|
+
"""Load and create instance from internal data store."""
|
114
|
+
cls.logger.info(
|
115
|
+
f"Loading data from data store at path: {path}, admin level: {admin_level}"
|
116
|
+
)
|
117
|
+
try:
|
118
|
+
gdf = read_dataset(data_store, str(path), **kwargs)
|
119
|
+
|
120
|
+
if gdf.empty:
|
121
|
+
cls.logger.warning(f"No data found at {path}.")
|
122
|
+
return cls._create_empty_instance(None, admin_level, "internal")
|
123
|
+
|
124
|
+
gdf = cls._map_fields(gdf, "internal", admin_level)
|
125
|
+
|
126
|
+
if admin_level == 0:
|
127
|
+
gdf["id"] = gdf["country_code"]
|
128
|
+
else:
|
129
|
+
gdf["parent_id"] = gdf["id"].apply(lambda x: x[:-3])
|
130
|
+
|
131
|
+
boundaries = [
|
132
|
+
AdminBoundary(**row_dict) for row_dict in gdf.to_dict("records")
|
133
|
+
]
|
134
|
+
cls.logger.info(f"Created {len(boundaries)} AdminBoundary objects.")
|
135
|
+
return cls(boundaries=boundaries, level=admin_level)
|
136
|
+
|
137
|
+
except (FileNotFoundError, KeyError) as e:
|
138
|
+
cls.logger.warning(
|
139
|
+
f"No data found at {path} for admin level {admin_level}: {str(e)}"
|
140
|
+
)
|
141
|
+
return cls._create_empty_instance(None, admin_level, "internal")
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
def from_georepo(
|
145
|
+
cls,
|
146
|
+
country_code: str = None,
|
147
|
+
admin_level: int = 0,
|
148
|
+
**kwargs,
|
149
|
+
) -> "AdminBoundaries":
|
150
|
+
"""
|
151
|
+
Load and create instance from GeoRepo (UNICEF) API.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
country: Country name (if using name-based lookup)
|
155
|
+
iso3: ISO3 code (if using code-based lookup)
|
156
|
+
admin_level: Administrative level (0=country, 1=state, etc.)
|
157
|
+
api_key: GeoRepo API key (optional)
|
158
|
+
email: GeoRepo user email (optional)
|
159
|
+
kwargs: Extra arguments (ignored)
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
AdminBoundaries instance
|
163
|
+
"""
|
164
|
+
cls.logger.info(
|
165
|
+
f"Loading data from UNICEF GeoRepo for country: {country_code}, admin level: {admin_level}"
|
166
|
+
)
|
167
|
+
from gigaspatial.handlers.unicef_georepo import get_country_boundaries_by_iso3
|
168
|
+
|
169
|
+
# Fetch boundaries from GeoRepo
|
170
|
+
geojson = get_country_boundaries_by_iso3(country_code, admin_level=admin_level)
|
171
|
+
|
172
|
+
features = geojson.get("features", [])
|
173
|
+
boundaries = []
|
174
|
+
parent_level = admin_level - 1
|
175
|
+
|
176
|
+
for feat in features:
|
177
|
+
props = feat.get("properties", {})
|
178
|
+
geometry = feat.get("geometry")
|
179
|
+
shapely_geom = shape(geometry) if geometry else None
|
180
|
+
# For admin_level 0, no parent_id
|
181
|
+
parent_id = None
|
182
|
+
if admin_level > 0:
|
183
|
+
parent_id = props.get(f"adm{parent_level}_ucode")
|
184
|
+
|
185
|
+
boundary = AdminBoundary(
|
186
|
+
id=props.get("ucode"),
|
187
|
+
name=props.get("name"),
|
188
|
+
name_en=props.get("name_en"),
|
189
|
+
geometry=shapely_geom,
|
190
|
+
parent_id=parent_id,
|
191
|
+
country_code=country_code,
|
192
|
+
)
|
193
|
+
boundaries.append(boundary)
|
194
|
+
|
195
|
+
cls.logger.info(
|
196
|
+
f"Created {len(boundaries)} AdminBoundary objects from GeoRepo data."
|
197
|
+
)
|
198
|
+
|
199
|
+
# Try to infer country_code from first boundary if not set
|
200
|
+
if boundaries and not boundaries[0].country_code:
|
201
|
+
boundaries[0].country_code = boundaries[0].id[:3]
|
202
|
+
|
203
|
+
return cls(boundaries=boundaries, level=admin_level)
|
204
|
+
|
205
|
+
@classmethod
|
206
|
+
def create(
|
207
|
+
cls,
|
208
|
+
country_code: Optional[str] = None,
|
209
|
+
admin_level: int = 0,
|
210
|
+
data_store: Optional[DataStore] = None,
|
211
|
+
path: Optional[Union[str, "Path"]] = None,
|
212
|
+
**kwargs,
|
213
|
+
) -> "AdminBoundaries":
|
214
|
+
"""Factory method to create AdminBoundaries instance from either GADM or data store."""
|
215
|
+
cls.logger.info(
|
216
|
+
f"Creating AdminBoundaries instance. Country: {country_code}, admin level: {admin_level}, data_store provided: {data_store is not None}, path provided: {path is not None}"
|
217
|
+
)
|
218
|
+
iso3_code = pycountry.countries.lookup(country_code).alpha_3
|
219
|
+
if data_store is not None:
|
220
|
+
if path is None:
|
221
|
+
if country_code is None:
|
222
|
+
ValueError(
|
223
|
+
"If data_store is provided, path or country_code must also be specified."
|
224
|
+
)
|
225
|
+
path = config.get_admin_path(
|
226
|
+
country_code=iso3_code,
|
227
|
+
admin_level=admin_level,
|
228
|
+
)
|
229
|
+
return cls.from_data_store(data_store, path, admin_level, **kwargs)
|
230
|
+
elif country_code is not None:
|
231
|
+
from gigaspatial.handlers.unicef_georepo import GeoRepoClient
|
232
|
+
|
233
|
+
client = GeoRepoClient()
|
234
|
+
|
235
|
+
if client.check_connection():
|
236
|
+
cls.logger.info("GeoRepo connection successful.")
|
237
|
+
return cls.from_georepo(
|
238
|
+
iso3_code,
|
239
|
+
admin_level=admin_level,
|
240
|
+
)
|
241
|
+
else:
|
242
|
+
cls.logger.warning(
|
243
|
+
"GeoRepo connection check failed. Falling back to GADM."
|
244
|
+
)
|
245
|
+
return cls.from_gadm(iso3_code, admin_level, **kwargs)
|
246
|
+
else:
|
247
|
+
raise ValueError(
|
248
|
+
"Either country_code or (data_store, path) must be provided."
|
249
|
+
)
|
250
|
+
|
251
|
+
@classmethod
|
252
|
+
def _create_empty_instance(
|
253
|
+
cls, country_code: Optional[str], admin_level: int, source_type: str
|
254
|
+
) -> "AdminBoundaries":
|
255
|
+
"""Create an empty instance with the required schema structure."""
|
256
|
+
# for to_geodataframe() to use later
|
257
|
+
instance = cls(boundaries=[], level=admin_level, country_code=country_code)
|
258
|
+
|
259
|
+
schema_fields = set(cls.get_schema_config()[source_type].keys())
|
260
|
+
schema_fields.update(["geometry", "country_code", "id", "name", "name_en"])
|
261
|
+
if admin_level > 0:
|
262
|
+
schema_fields.add("parent_id")
|
263
|
+
|
264
|
+
instance._empty_schema = list(schema_fields)
|
265
|
+
return instance
|
266
|
+
|
267
|
+
@classmethod
|
268
|
+
def _map_fields(
|
269
|
+
cls,
|
270
|
+
gdf: gpd.GeoDataFrame,
|
271
|
+
source: str,
|
272
|
+
current_level: int,
|
273
|
+
) -> gpd.GeoDataFrame:
|
274
|
+
"""Map source fields to schema fields"""
|
275
|
+
config = cls.get_schema_config().get(source, {})
|
276
|
+
parent_level = current_level - 1
|
277
|
+
|
278
|
+
field_mapping = {}
|
279
|
+
for k, v in config.items():
|
280
|
+
if "{parent_level}" in v:
|
281
|
+
field_mapping[v.format(parent_level=parent_level)] = k
|
282
|
+
elif "{level}" in v:
|
283
|
+
field_mapping[v.format(level=current_level)] = k
|
284
|
+
else:
|
285
|
+
field_mapping[v] = k
|
286
|
+
|
287
|
+
return gdf.rename(columns=field_mapping)
|
288
|
+
|
289
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
290
|
+
"""Convert the AdminBoundaries to a GeoDataFrame."""
|
291
|
+
if not self.boundaries:
|
292
|
+
if hasattr(self, "_empty_schema"):
|
293
|
+
columns = self._empty_schema
|
294
|
+
else:
|
295
|
+
columns = ["id", "name", "country_code", "geometry"]
|
296
|
+
if self.level > 0:
|
297
|
+
columns.append("parent_id")
|
298
|
+
|
299
|
+
return gpd.GeoDataFrame(columns=columns, geometry="geometry", crs=4326)
|
300
|
+
|
301
|
+
return gpd.GeoDataFrame(
|
302
|
+
[boundary.model_dump() for boundary in self.boundaries],
|
303
|
+
geometry="geometry",
|
304
|
+
crs=4326,
|
305
|
+
)
|