giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,548 @@
|
|
1
|
+
from dataclasses import field
|
2
|
+
from pydantic.dataclasses import dataclass
|
3
|
+
from pydantic import ConfigDict
|
4
|
+
from pathlib import Path
|
5
|
+
import functools
|
6
|
+
import multiprocessing
|
7
|
+
from typing import List, Optional, Tuple, Union, Dict, Iterable
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
from shapely.geometry import Point
|
11
|
+
from shapely.geometry.base import BaseGeometry
|
12
|
+
from difflib import SequenceMatcher
|
13
|
+
import pycountry
|
14
|
+
import requests
|
15
|
+
from tqdm import tqdm
|
16
|
+
import logging
|
17
|
+
import geopandas as gpd
|
18
|
+
|
19
|
+
from gigaspatial.core.io.data_store import DataStore
|
20
|
+
from gigaspatial.grid.mercator_tiles import (
|
21
|
+
MercatorTiles,
|
22
|
+
CountryMercatorTiles,
|
23
|
+
)
|
24
|
+
from gigaspatial.handlers.base import (
|
25
|
+
BaseHandlerReader,
|
26
|
+
BaseHandlerConfig,
|
27
|
+
BaseHandlerDownloader,
|
28
|
+
BaseHandler,
|
29
|
+
)
|
30
|
+
from gigaspatial.config import config as global_config
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
34
|
+
class MSBuildingsConfig(BaseHandlerConfig):
|
35
|
+
"""Configuration for Microsoft Global Buildings dataset files."""
|
36
|
+
|
37
|
+
TILE_URLS: str = (
|
38
|
+
"https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv"
|
39
|
+
)
|
40
|
+
MERCATOR_ZOOM_LEVEL: int = 9
|
41
|
+
base_path: Path = global_config.get_path("microsoft_global_buildings", "bronze")
|
42
|
+
|
43
|
+
LOCATION_MAPPING_FILE: Path = base_path / "location_mapping.json"
|
44
|
+
SIMILARITY_SCORE: float = 0.8
|
45
|
+
DEFAULT_MAPPING: Dict[str, str] = field(
|
46
|
+
default_factory=lambda: {
|
47
|
+
"Bonaire": "BES",
|
48
|
+
"Brunei": "BRN",
|
49
|
+
"IvoryCoast": "CIV",
|
50
|
+
"CongoDRC": "COD",
|
51
|
+
"DemocraticRepublicoftheCongo": "COD",
|
52
|
+
"RepublicoftheCongo": "COG",
|
53
|
+
"TheGambia": "GMB",
|
54
|
+
"FYROMakedonija": "MKD",
|
55
|
+
"SultanateofOman": "OMN",
|
56
|
+
"StateofQatar": "QAT",
|
57
|
+
"Russia": "RUS",
|
58
|
+
"KingdomofSaudiArabia": "SAU",
|
59
|
+
"Svalbard": "SJM",
|
60
|
+
"Swaziland": "SWZ",
|
61
|
+
"StMartin": "SXM",
|
62
|
+
"leSaint-Martin": "MAF",
|
63
|
+
"Turkey": "TUR",
|
64
|
+
"VaticanCity": "VAT",
|
65
|
+
"BritishVirginIslands": "VGB",
|
66
|
+
"USVirginIslands": "VIR",
|
67
|
+
"RepublicofYemen": "YEM",
|
68
|
+
"CzechRepublic": "CZE",
|
69
|
+
"French-Martinique": "MTQ",
|
70
|
+
"French-Guadeloupe": "GLP",
|
71
|
+
"UnitedStates": "USA",
|
72
|
+
}
|
73
|
+
)
|
74
|
+
CUSTOM_MAPPING: Optional[Dict[str, str]] = None
|
75
|
+
|
76
|
+
def __post_init__(self):
|
77
|
+
"""Initialize the configuration, load tile URLs, and set up location mapping."""
|
78
|
+
super().__post_init__()
|
79
|
+
self._load_tile_urls()
|
80
|
+
self.upload_date = self.df_tiles.upload_date[0]
|
81
|
+
self._setup_location_mapping()
|
82
|
+
|
83
|
+
def _load_tile_urls(self):
|
84
|
+
"""Load dataset links from csv file."""
|
85
|
+
self.df_tiles = pd.read_csv(
|
86
|
+
self.TILE_URLS,
|
87
|
+
names=["location", "quadkey", "url", "size", "upload_date"],
|
88
|
+
dtype={"quadkey": str},
|
89
|
+
header=0,
|
90
|
+
)
|
91
|
+
|
92
|
+
def _setup_location_mapping(self):
|
93
|
+
"""Load or create the mapping between dataset locations and ISO country codes."""
|
94
|
+
from gigaspatial.core.io.readers import read_json
|
95
|
+
from gigaspatial.core.io.writers import write_json
|
96
|
+
|
97
|
+
if self.data_store.file_exists(str(self.LOCATION_MAPPING_FILE)):
|
98
|
+
self.location_mapping = read_json(
|
99
|
+
self.data_store, str(self.LOCATION_MAPPING_FILE)
|
100
|
+
)
|
101
|
+
else:
|
102
|
+
self.location_mapping = self.create_location_mapping(
|
103
|
+
similarity_score_threshold=self.SIMILARITY_SCORE
|
104
|
+
)
|
105
|
+
self.location_mapping.update(self.DEFAULT_MAPPING)
|
106
|
+
write_json(
|
107
|
+
self.location_mapping, self.data_store, str(self.LOCATION_MAPPING_FILE)
|
108
|
+
)
|
109
|
+
|
110
|
+
self.location_mapping.update(self.CUSTOM_MAPPING or {})
|
111
|
+
self._map_locations()
|
112
|
+
self.df_tiles.loc[self.df_tiles.country.isnull(), "country"] = None
|
113
|
+
|
114
|
+
def _map_locations(self):
|
115
|
+
"""Map the 'location' column in the tiles DataFrame to ISO country codes."""
|
116
|
+
self.df_tiles["country"] = self.df_tiles.location.map(self.location_mapping)
|
117
|
+
|
118
|
+
def create_location_mapping(self, similarity_score_threshold: float = 0.8):
|
119
|
+
"""
|
120
|
+
Create a mapping between the dataset's location names and ISO 3166-1 alpha-3 country codes.
|
121
|
+
|
122
|
+
This function iterates through known countries and attempts to find matching
|
123
|
+
locations in the dataset based on string similarity.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
similarity_score_threshold: The minimum similarity score (between 0 and 1)
|
127
|
+
for a dataset location to be considered a match
|
128
|
+
for a country. Defaults to 0.8.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
A dictionary where keys are dataset location names and values are
|
132
|
+
the corresponding ISO 3166-1 alpha-3 country codes.
|
133
|
+
"""
|
134
|
+
|
135
|
+
def similar(a, b):
|
136
|
+
return SequenceMatcher(None, a, b).ratio()
|
137
|
+
|
138
|
+
location_mapping = dict()
|
139
|
+
|
140
|
+
for country in pycountry.countries:
|
141
|
+
if country.name not in self.df_tiles.location.unique():
|
142
|
+
try:
|
143
|
+
country_quadkey = CountryMercatorTiles.create(
|
144
|
+
country.alpha_3, self.MERCATOR_ZOOM_LEVEL
|
145
|
+
)
|
146
|
+
except:
|
147
|
+
self.logger.warning(f"{country.name} is not mapped.")
|
148
|
+
continue
|
149
|
+
country_datasets = country_quadkey.filter_quadkeys(
|
150
|
+
self.df_tiles.quadkey
|
151
|
+
)
|
152
|
+
matching_locations = self.df_tiles[
|
153
|
+
self.df_tiles.quadkey.isin(country_datasets.quadkeys)
|
154
|
+
].location.unique()
|
155
|
+
scores = np.array(
|
156
|
+
[
|
157
|
+
(
|
158
|
+
similar(c, country.common_name)
|
159
|
+
if hasattr(country, "common_name")
|
160
|
+
else similar(c, country.name)
|
161
|
+
)
|
162
|
+
for c in matching_locations
|
163
|
+
]
|
164
|
+
)
|
165
|
+
if any(scores > similarity_score_threshold):
|
166
|
+
matched = matching_locations[scores > similarity_score_threshold]
|
167
|
+
if len(matched) > 2:
|
168
|
+
self.logger.warning(
|
169
|
+
f"Multiple matches exist for {country.name}. {country.name} is not mapped."
|
170
|
+
)
|
171
|
+
location_mapping[matched[0]] = country.alpha_3
|
172
|
+
self.logger.debug(f"{country.name} matched with {matched[0]}!")
|
173
|
+
else:
|
174
|
+
self.logger.warning(
|
175
|
+
f"No direct matches for {country.name}. {country.name} is not mapped."
|
176
|
+
)
|
177
|
+
self.logger.debug("Possible matches are: ")
|
178
|
+
for c, score in zip(matching_locations, scores):
|
179
|
+
self.logger.debug(c, score)
|
180
|
+
else:
|
181
|
+
location_mapping[country.name] = country.alpha_3
|
182
|
+
|
183
|
+
return location_mapping
|
184
|
+
|
185
|
+
def get_relevant_data_units_by_geometry(
|
186
|
+
self, geometry: Union[BaseGeometry, gpd.GeoDataFrame], **kwargs
|
187
|
+
) -> pd.DataFrame:
|
188
|
+
"""
|
189
|
+
Return intersecting tiles for a given geometry or GeoDataFrame.
|
190
|
+
"""
|
191
|
+
return self._get_relevant_tiles(geometry)
|
192
|
+
|
193
|
+
def get_relevant_data_units_by_points(
|
194
|
+
self, points: Iterable[Union[Point, tuple]], **kwargs
|
195
|
+
) -> pd.DataFrame:
|
196
|
+
"""
|
197
|
+
Return intersecting tiles for a list of points.
|
198
|
+
"""
|
199
|
+
return self._get_relevant_tiles(points)
|
200
|
+
|
201
|
+
def get_relevant_data_units_by_country(
|
202
|
+
self, country: str, **kwargs
|
203
|
+
) -> pd.DataFrame:
|
204
|
+
"""
|
205
|
+
Return intersecting tiles for a given country.
|
206
|
+
"""
|
207
|
+
return self._get_relevant_tiles(country)
|
208
|
+
|
209
|
+
def get_data_unit_path(self, unit: Union[pd.Series, dict], **kwargs) -> Path:
|
210
|
+
|
211
|
+
tile_location = unit["country"] if unit["country"] else unit["location"]
|
212
|
+
|
213
|
+
return (
|
214
|
+
self.base_path
|
215
|
+
/ tile_location
|
216
|
+
/ self.upload_date
|
217
|
+
/ f'{unit["quadkey"]}.csv.gz'
|
218
|
+
)
|
219
|
+
|
220
|
+
def get_data_unit_paths(
|
221
|
+
self, units: Union[pd.DataFrame, Iterable[dict]], **kwargs
|
222
|
+
) -> List:
|
223
|
+
if isinstance(units, pd.DataFrame):
|
224
|
+
return [self.get_data_unit_path(row) for _, row in units.iterrows()]
|
225
|
+
return super().get_data_unit_paths(units)
|
226
|
+
|
227
|
+
def _get_relevant_tiles(
|
228
|
+
self,
|
229
|
+
source: Union[
|
230
|
+
str, # country
|
231
|
+
BaseGeometry, # shapely geoms
|
232
|
+
gpd.GeoDataFrame,
|
233
|
+
Iterable[Union[Point, Tuple[float, float]]], # points
|
234
|
+
],
|
235
|
+
) -> pd.DataFrame:
|
236
|
+
"""
|
237
|
+
Get the DataFrame of Microsoft Buildings tiles that intersect with a given source spatial geometry.
|
238
|
+
|
239
|
+
In case country given, this method first tries to find tiles directly mapped to the given country.
|
240
|
+
If no directly mapped tiles are found and the country is not in the location
|
241
|
+
mapping, it attempts to find overlapping tiles by creating Mercator tiles
|
242
|
+
for the country and filtering the dataset's tiles.
|
243
|
+
|
244
|
+
Args:
|
245
|
+
source: A country code/name, a Shapely geometry, a GeoDataFrame, or a list of Point
|
246
|
+
objects or (lat, lon) tuples representing the area of interest.
|
247
|
+
The coordinates are assumed to be in EPSG:4326.
|
248
|
+
|
249
|
+
Returns:
|
250
|
+
A pandas DataFrame containing the rows from the tiles list that
|
251
|
+
spatially intersect with the `source`. Returns an empty DataFrame
|
252
|
+
if no intersecting tiles are found.
|
253
|
+
"""
|
254
|
+
if isinstance(source, str):
|
255
|
+
try:
|
256
|
+
country_code = pycountry.countries.lookup(source).alpha_3
|
257
|
+
except:
|
258
|
+
raise ValueError("Invalid`country` value!")
|
259
|
+
|
260
|
+
mask = self.df_tiles["country"] == country_code
|
261
|
+
|
262
|
+
if any(mask):
|
263
|
+
return self.df_tiles.loc[
|
264
|
+
mask, ["quadkey", "url", "country", "location"]
|
265
|
+
].to_dict("records")
|
266
|
+
|
267
|
+
self.logger.warning(
|
268
|
+
f"The country code '{country_code}' is not directly in the location mapping. "
|
269
|
+
"Manually checking for overlapping locations with the country boundary."
|
270
|
+
)
|
271
|
+
|
272
|
+
source_tiles = CountryMercatorTiles.create(
|
273
|
+
country_code, self.MERCATOR_ZOOM_LEVEL
|
274
|
+
)
|
275
|
+
else:
|
276
|
+
source_tiles = MercatorTiles.from_spatial(
|
277
|
+
source=source, zoom_level=self.MERCATOR_ZOOM_LEVEL
|
278
|
+
)
|
279
|
+
|
280
|
+
filtered_tiles = source_tiles.filter_quadkeys(self.df_tiles.quadkey)
|
281
|
+
|
282
|
+
mask = self.df_tiles.quadkey.isin(filtered_tiles.quadkeys)
|
283
|
+
|
284
|
+
return self.df_tiles.loc[
|
285
|
+
mask, ["quadkey", "url", "country", "location"]
|
286
|
+
].to_dict("records")
|
287
|
+
|
288
|
+
|
289
|
+
class MSBuildingsDownloader(BaseHandlerDownloader):
|
290
|
+
"""A class to handle downloads of Microsoft's Global ML Building Footprints dataset."""
|
291
|
+
|
292
|
+
def __init__(
|
293
|
+
self,
|
294
|
+
config: Optional[MSBuildingsConfig] = None,
|
295
|
+
data_store: Optional[DataStore] = None,
|
296
|
+
logger: Optional[logging.Logger] = None,
|
297
|
+
):
|
298
|
+
"""
|
299
|
+
Initialize the downloader.
|
300
|
+
|
301
|
+
Args:
|
302
|
+
config: Optional configuration for customizing download behavior and file paths.
|
303
|
+
If None, a default `MSBuildingsConfig` is used.
|
304
|
+
data_store: Optional instance of a `DataStore` for managing data storage.
|
305
|
+
If provided, it overrides the `data_store` in the `config`.
|
306
|
+
If None, the `data_store` from the `config` is used.
|
307
|
+
logger: Optional custom logger instance. If None, a default logger
|
308
|
+
named after the module is created and used.
|
309
|
+
"""
|
310
|
+
config = config or MSBuildingsConfig()
|
311
|
+
super().__init__(config=config, data_store=data_store, logger=logger)
|
312
|
+
|
313
|
+
def download_data_unit(
|
314
|
+
self,
|
315
|
+
tile_info: Union[pd.Series, dict],
|
316
|
+
**kwargs,
|
317
|
+
) -> Optional[str]:
|
318
|
+
"""Download data file for a single tile."""
|
319
|
+
|
320
|
+
tile_url = tile_info["url"]
|
321
|
+
|
322
|
+
try:
|
323
|
+
response = requests.get(tile_url, stream=True)
|
324
|
+
response.raise_for_status()
|
325
|
+
|
326
|
+
file_path = str(self.config.get_data_unit_path(tile_info))
|
327
|
+
|
328
|
+
with self.data_store.open(file_path, "wb") as file:
|
329
|
+
for chunk in response.iter_content(chunk_size=8192):
|
330
|
+
file.write(chunk)
|
331
|
+
|
332
|
+
self.logger.debug(
|
333
|
+
f"Successfully downloaded tile: {tile_info['quadkey']}"
|
334
|
+
)
|
335
|
+
return file_path
|
336
|
+
|
337
|
+
except requests.exceptions.RequestException as e:
|
338
|
+
self.logger.error(
|
339
|
+
f"Failed to download tile {tile_info['quadkey']}: {str(e)}"
|
340
|
+
)
|
341
|
+
return None
|
342
|
+
except Exception as e:
|
343
|
+
self.logger.error(f"Unexpected error downloading dataset: {str(e)}")
|
344
|
+
return None
|
345
|
+
|
346
|
+
def download_data_units(
|
347
|
+
self,
|
348
|
+
tiles: Union[pd.DataFrame, List[dict]],
|
349
|
+
**kwargs,
|
350
|
+
) -> List[str]:
|
351
|
+
"""Download data files for multiple tiles."""
|
352
|
+
|
353
|
+
if len(tiles) == 0:
|
354
|
+
self.logger.warning(f"There is no matching data")
|
355
|
+
return []
|
356
|
+
|
357
|
+
with multiprocessing.Pool(self.config.n_workers) as pool:
|
358
|
+
download_func = functools.partial(self.download_data_unit)
|
359
|
+
file_paths = list(
|
360
|
+
tqdm(
|
361
|
+
pool.imap(
|
362
|
+
download_func,
|
363
|
+
(
|
364
|
+
[row for _, row in tiles.iterrows()]
|
365
|
+
if isinstance(tiles, pd.DataFrame)
|
366
|
+
else tiles
|
367
|
+
),
|
368
|
+
),
|
369
|
+
total=len(tiles),
|
370
|
+
desc=f"Downloading polygons data",
|
371
|
+
)
|
372
|
+
)
|
373
|
+
|
374
|
+
return [path for path in file_paths if path is not None]
|
375
|
+
|
376
|
+
def download(
|
377
|
+
self,
|
378
|
+
source: Union[
|
379
|
+
str, # country
|
380
|
+
List[Union[Tuple[float, float], Point]], # points
|
381
|
+
BaseGeometry, # shapely geoms
|
382
|
+
gpd.GeoDataFrame,
|
383
|
+
],
|
384
|
+
**kwargs,
|
385
|
+
) -> List[str]:
|
386
|
+
"""
|
387
|
+
Download Microsoft Global ML Building Footprints data for a specified geographic region.
|
388
|
+
|
389
|
+
The region can be defined by a country, a list of points,
|
390
|
+
a Shapely geometry, or a GeoDataFrame. This method identifies the
|
391
|
+
relevant data tiles intersecting the region and downloads them in parallel.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
source: Defines the geographic area for which to download data.
|
395
|
+
Can be:
|
396
|
+
- A string representing a country code or name.
|
397
|
+
- A list of (latitude, longitude) tuples or Shapely Point objects.
|
398
|
+
- A Shapely BaseGeometry object (e.g., Polygon, MultiPolygon).
|
399
|
+
- A GeoDataFrame with a geometry column in EPSG:4326.
|
400
|
+
**kwargs: Additional parameters passed to data unit resolution methods
|
401
|
+
|
402
|
+
Returns:
|
403
|
+
A list of local file paths for the successfully downloaded tiles.
|
404
|
+
Returns an empty list if no data is found for the region or if
|
405
|
+
all downloads fail.
|
406
|
+
"""
|
407
|
+
|
408
|
+
tiles = self.config.get_relevant_data_units(source, **kwargs)
|
409
|
+
return self.download_data_units(tiles, **kwargs)
|
410
|
+
|
411
|
+
def download_by_country(
|
412
|
+
self,
|
413
|
+
country: str,
|
414
|
+
data_store: Optional[DataStore] = None,
|
415
|
+
country_geom_path: Optional[Union[str, Path]] = None,
|
416
|
+
) -> List[str]:
|
417
|
+
"""
|
418
|
+
Download Microsoft Global ML Building Footprints data for a specific country.
|
419
|
+
|
420
|
+
This is a convenience method to download data for an entire country
|
421
|
+
using its code or name.
|
422
|
+
|
423
|
+
Args:
|
424
|
+
country: The country code (e.g., 'USA', 'GBR') or name.
|
425
|
+
data_store: Optional instance of a `DataStore` to be used by
|
426
|
+
`AdminBoundaries` for loading country boundaries. If None,
|
427
|
+
`AdminBoundaries` will use its default data loading.
|
428
|
+
country_geom_path: Optional path to a GeoJSON file containing the
|
429
|
+
country boundary. If provided, this boundary is used
|
430
|
+
instead of the default from `AdminBoundaries`.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
A list of local file paths for the successfully downloaded tiles.
|
434
|
+
Returns an empty list if no data is found for the country or if
|
435
|
+
all downloads fail.
|
436
|
+
"""
|
437
|
+
return self.download(
|
438
|
+
source=country, data_store=data_store, path=country_geom_path
|
439
|
+
)
|
440
|
+
|
441
|
+
|
442
|
+
class MSBuildingsReader(BaseHandlerReader):
|
443
|
+
"""
|
444
|
+
Reader for Microsoft Global Buildings data, supporting country, points, and geometry-based resolution.
|
445
|
+
"""
|
446
|
+
|
447
|
+
def __init__(
|
448
|
+
self,
|
449
|
+
config: Optional[MSBuildingsConfig] = None,
|
450
|
+
data_store: Optional[DataStore] = None,
|
451
|
+
logger: Optional[logging.Logger] = None,
|
452
|
+
):
|
453
|
+
config = config or MSBuildingsConfig()
|
454
|
+
super().__init__(config=config, data_store=data_store, logger=logger)
|
455
|
+
|
456
|
+
def load_from_paths(
|
457
|
+
self, source_data_path: List[Union[str, Path]], **kwargs
|
458
|
+
) -> gpd.GeoDataFrame:
|
459
|
+
"""
|
460
|
+
Load building data from Microsoft Buildings dataset.
|
461
|
+
Args:
|
462
|
+
source_data_path: List of file paths to load
|
463
|
+
Returns:
|
464
|
+
GeoDataFrame containing building data
|
465
|
+
"""
|
466
|
+
from gigaspatial.core.io.readers import read_gzipped_json_or_csv
|
467
|
+
from shapely.geometry import shape
|
468
|
+
|
469
|
+
def read_ms_dataset(data_store: DataStore, file_path: str):
|
470
|
+
df = read_gzipped_json_or_csv(file_path=file_path, data_store=data_store)
|
471
|
+
df["geometry"] = df["geometry"].apply(shape)
|
472
|
+
return gpd.GeoDataFrame(df, crs=4326)
|
473
|
+
|
474
|
+
result = self._load_tabular_data(
|
475
|
+
file_paths=source_data_path, read_function=read_ms_dataset
|
476
|
+
)
|
477
|
+
return result
|
478
|
+
|
479
|
+
|
480
|
+
class MSBuildingsHandler(BaseHandler):
|
481
|
+
"""
|
482
|
+
Handler for Microsoft Global Buildings dataset.
|
483
|
+
|
484
|
+
This class provides a unified interface for downloading and loading Microsoft Global Buildings data.
|
485
|
+
It manages the lifecycle of configuration, downloading, and reading components.
|
486
|
+
"""
|
487
|
+
|
488
|
+
def create_config(
|
489
|
+
self, data_store: DataStore, logger: logging.Logger, **kwargs
|
490
|
+
) -> MSBuildingsConfig:
|
491
|
+
"""
|
492
|
+
Create and return a MSBuildingsConfig instance.
|
493
|
+
|
494
|
+
Args:
|
495
|
+
data_store: The data store instance to use
|
496
|
+
logger: The logger instance to use
|
497
|
+
**kwargs: Additional configuration parameters
|
498
|
+
|
499
|
+
Returns:
|
500
|
+
Configured MSBuildingsConfig instance
|
501
|
+
"""
|
502
|
+
return MSBuildingsConfig(data_store=data_store, logger=logger, **kwargs)
|
503
|
+
|
504
|
+
def create_downloader(
|
505
|
+
self,
|
506
|
+
config: MSBuildingsConfig,
|
507
|
+
data_store: DataStore,
|
508
|
+
logger: logging.Logger,
|
509
|
+
**kwargs,
|
510
|
+
) -> MSBuildingsDownloader:
|
511
|
+
"""
|
512
|
+
Create and return a MSBuildingsDownloader instance.
|
513
|
+
|
514
|
+
Args:
|
515
|
+
config: The configuration object
|
516
|
+
data_store: The data store instance to use
|
517
|
+
logger: The logger instance to use
|
518
|
+
**kwargs: Additional downloader parameters
|
519
|
+
|
520
|
+
Returns:
|
521
|
+
Configured MSBuildingsDownloader instance
|
522
|
+
"""
|
523
|
+
return MSBuildingsDownloader(
|
524
|
+
config=config, data_store=data_store, logger=logger, **kwargs
|
525
|
+
)
|
526
|
+
|
527
|
+
def create_reader(
|
528
|
+
self,
|
529
|
+
config: MSBuildingsConfig,
|
530
|
+
data_store: DataStore,
|
531
|
+
logger: logging.Logger,
|
532
|
+
**kwargs,
|
533
|
+
) -> MSBuildingsReader:
|
534
|
+
"""
|
535
|
+
Create and return a MSBuildingsReader instance.
|
536
|
+
|
537
|
+
Args:
|
538
|
+
config: The configuration object
|
539
|
+
data_store: The data store instance to use
|
540
|
+
logger: The logger instance to use
|
541
|
+
**kwargs: Additional reader parameters
|
542
|
+
|
543
|
+
Returns:
|
544
|
+
Configured MSBuildingsReader instance
|
545
|
+
"""
|
546
|
+
return MSBuildingsReader(
|
547
|
+
config=config, data_store=data_store, logger=logger, **kwargs
|
548
|
+
)
|