giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
import requests
|
2
|
+
import pandas as pd
|
3
|
+
import time
|
4
|
+
from pydantic.dataclasses import dataclass, Field
|
5
|
+
from pydantic import ConfigDict
|
6
|
+
from shapely.geometry import Point
|
7
|
+
import pycountry
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from gigaspatial.config import config as global_config
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
14
|
+
class GigaSchoolLocationFetcher:
|
15
|
+
"""
|
16
|
+
Fetch and process school location data from the Giga School Geolocation Data API.
|
17
|
+
"""
|
18
|
+
|
19
|
+
country: str = Field(...)
|
20
|
+
api_url: str = Field(
|
21
|
+
default="https://uni-ooi-giga-maps-service.azurewebsites.net/api/v1/schools_location/country/{isocode3}",
|
22
|
+
description="Base URL for the Giga School API",
|
23
|
+
)
|
24
|
+
api_key: str = global_config.GIGA_SCHOOL_LOCATION_API_KEY
|
25
|
+
page_size: int = Field(default=1000, description="Number of records per API page")
|
26
|
+
sleep_time: float = Field(
|
27
|
+
default=0.2, description="Sleep time between API requests"
|
28
|
+
)
|
29
|
+
|
30
|
+
logger: logging.Logger = Field(default=None, repr=False)
|
31
|
+
|
32
|
+
def __post_init__(self):
|
33
|
+
try:
|
34
|
+
self.country = pycountry.countries.lookup(self.country).alpha_3
|
35
|
+
except LookupError:
|
36
|
+
raise ValueError(f"Invalid country code provided: {self.country}")
|
37
|
+
self.api_url = self.api_url.format(isocode3=self.country)
|
38
|
+
if self.logger is None:
|
39
|
+
self.logger = global_config.get_logger(self.__class__.__name__)
|
40
|
+
|
41
|
+
def fetch_locations(self, **kwargs) -> pd.DataFrame:
|
42
|
+
"""
|
43
|
+
Fetch and process school locations.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
**kwargs: Additional parameters for customization
|
47
|
+
- page_size: Override default page size
|
48
|
+
- sleep_time: Override default sleep time between requests
|
49
|
+
- max_pages: Limit the number of pages to fetch
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
pd.DataFrame: School locations with geospatial info.
|
53
|
+
"""
|
54
|
+
# Override defaults with kwargs if provided
|
55
|
+
page_size = kwargs.get("page_size", self.page_size)
|
56
|
+
sleep_time = kwargs.get("sleep_time", self.sleep_time)
|
57
|
+
max_pages = kwargs.get("max_pages", None)
|
58
|
+
|
59
|
+
# Prepare headers
|
60
|
+
headers = {
|
61
|
+
"Authorization": f"Bearer {self.api_key}",
|
62
|
+
"Accept": "application/json",
|
63
|
+
}
|
64
|
+
|
65
|
+
all_data = []
|
66
|
+
page = 1
|
67
|
+
|
68
|
+
self.logger.info(
|
69
|
+
f"Starting to fetch school locations for country: {self.country}"
|
70
|
+
)
|
71
|
+
|
72
|
+
while True:
|
73
|
+
# Check if we've reached max_pages limit
|
74
|
+
if max_pages and page > max_pages:
|
75
|
+
self.logger.info(f"Reached maximum pages limit: {max_pages}")
|
76
|
+
break
|
77
|
+
|
78
|
+
params = {"page": page, "size": page_size}
|
79
|
+
|
80
|
+
try:
|
81
|
+
self.logger.debug(f"Fetching page {page} with params: {params}")
|
82
|
+
response = requests.get(self.api_url, headers=headers, params=params)
|
83
|
+
response.raise_for_status()
|
84
|
+
|
85
|
+
parsed = response.json()
|
86
|
+
data = parsed.get("data", [])
|
87
|
+
|
88
|
+
except requests.exceptions.RequestException as e:
|
89
|
+
self.logger.error(f"Request failed on page {page}: {e}")
|
90
|
+
break
|
91
|
+
except ValueError as e:
|
92
|
+
self.logger.error(f"Failed to parse JSON response on page {page}: {e}")
|
93
|
+
break
|
94
|
+
|
95
|
+
# Check if we got any data
|
96
|
+
if not data:
|
97
|
+
self.logger.info(f"No data on page {page}. Stopping.")
|
98
|
+
break
|
99
|
+
|
100
|
+
all_data.extend(data)
|
101
|
+
self.logger.info(f"Fetched page {page} with {len(data)} records")
|
102
|
+
|
103
|
+
# If we got fewer records than page_size, we've reached the end
|
104
|
+
if len(data) < page_size:
|
105
|
+
self.logger.info("Reached end of data (partial page received)")
|
106
|
+
break
|
107
|
+
|
108
|
+
page += 1
|
109
|
+
|
110
|
+
# Sleep to be respectful to the API
|
111
|
+
if sleep_time > 0:
|
112
|
+
time.sleep(sleep_time)
|
113
|
+
|
114
|
+
self.logger.info(f"Finished fetching. Total records: {len(all_data)}")
|
115
|
+
|
116
|
+
# Convert to DataFrame and process
|
117
|
+
if not all_data:
|
118
|
+
self.logger.warning("No data fetched, returning empty DataFrame")
|
119
|
+
return pd.DataFrame()
|
120
|
+
|
121
|
+
df = pd.DataFrame(all_data)
|
122
|
+
|
123
|
+
df = self._process_geospatial_data(df)
|
124
|
+
|
125
|
+
return df
|
126
|
+
|
127
|
+
def _process_geospatial_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
128
|
+
"""
|
129
|
+
Process and enhance the DataFrame with geospatial information.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
df: Raw DataFrame from API
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
pd.DataFrame: Enhanced DataFrame with geospatial data
|
136
|
+
"""
|
137
|
+
if df.empty:
|
138
|
+
return df
|
139
|
+
|
140
|
+
df["geometry"] = df.apply(
|
141
|
+
lambda row: Point(row["longitude"], row["latitude"]), axis=1
|
142
|
+
)
|
143
|
+
self.logger.info(f"Created geometry for all {len(df)} records")
|
144
|
+
|
145
|
+
return df
|
@@ -0,0 +1,472 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from pathlib import Path
|
3
|
+
import functools
|
4
|
+
import multiprocessing
|
5
|
+
from typing import List, Optional, Union, Literal, Tuple, Iterable
|
6
|
+
import geopandas as gpd
|
7
|
+
import pandas as pd
|
8
|
+
from shapely.geometry import MultiPoint, Point
|
9
|
+
from shapely.geometry.base import BaseGeometry
|
10
|
+
import requests
|
11
|
+
from tqdm import tqdm
|
12
|
+
import logging
|
13
|
+
|
14
|
+
from gigaspatial.core.io.data_store import DataStore
|
15
|
+
from gigaspatial.handlers.base import (
|
16
|
+
BaseHandlerReader,
|
17
|
+
BaseHandlerConfig,
|
18
|
+
BaseHandlerDownloader,
|
19
|
+
BaseHandler,
|
20
|
+
)
|
21
|
+
from gigaspatial.config import config as global_config
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class GoogleOpenBuildingsConfig(BaseHandlerConfig):
|
26
|
+
"""
|
27
|
+
Configuration for Google Open Buildings dataset files.
|
28
|
+
Implements the BaseHandlerConfig interface for data unit resolution.
|
29
|
+
"""
|
30
|
+
|
31
|
+
TILES_URL: str = (
|
32
|
+
"https://openbuildings-public-dot-gweb-research.uw.r.appspot.com/public/tiles.geojson"
|
33
|
+
)
|
34
|
+
base_path: Path = global_config.get_path("google_open_buildings", "bronze")
|
35
|
+
data_types: tuple = ("polygons", "points")
|
36
|
+
|
37
|
+
def __post_init__(self):
|
38
|
+
super().__post_init__()
|
39
|
+
self._load_s2_tiles()
|
40
|
+
|
41
|
+
def _load_s2_tiles(self):
|
42
|
+
"""Load S2 tiles from GeoJSON file."""
|
43
|
+
response = requests.get(self.TILES_URL)
|
44
|
+
response.raise_for_status()
|
45
|
+
self.tiles_gdf = gpd.GeoDataFrame.from_features(
|
46
|
+
response.json()["features"], crs="EPSG:4326"
|
47
|
+
)
|
48
|
+
|
49
|
+
def get_relevant_data_units_by_geometry(
|
50
|
+
self, geometry: Union[BaseGeometry, gpd.GeoDataFrame], **kwargs
|
51
|
+
) -> List[dict]:
|
52
|
+
"""
|
53
|
+
Return intersecting tiles for a given geometry or GeoDataFrame.
|
54
|
+
"""
|
55
|
+
return self._get_relevant_tiles(geometry)
|
56
|
+
|
57
|
+
def get_relevant_data_units_by_points(
|
58
|
+
self, points: Iterable[Union[Point, tuple]], **kwargs
|
59
|
+
) -> List[dict]:
|
60
|
+
"""
|
61
|
+
Return intersecting tiles for a list of points.
|
62
|
+
"""
|
63
|
+
return self._get_relevant_tiles(points)
|
64
|
+
|
65
|
+
def get_data_unit_path(
|
66
|
+
self,
|
67
|
+
unit: Union[pd.Series, dict, str],
|
68
|
+
data_type: str = "polygons",
|
69
|
+
**kwargs,
|
70
|
+
) -> Path:
|
71
|
+
"""
|
72
|
+
Given a tile row or tile_id, return the corresponding file path.
|
73
|
+
"""
|
74
|
+
tile_id = (
|
75
|
+
unit["tile_id"]
|
76
|
+
if isinstance(unit, pd.Series) or isinstance(unit, dict)
|
77
|
+
else unit
|
78
|
+
)
|
79
|
+
return self.base_path / f"{data_type}_s2_level_4_{tile_id}_buildings.csv.gz"
|
80
|
+
|
81
|
+
def get_data_unit_paths(
|
82
|
+
self,
|
83
|
+
units: Union[pd.DataFrame, Iterable[Union[dict, str]]],
|
84
|
+
data_type: str = "polygons",
|
85
|
+
**kwargs,
|
86
|
+
) -> list:
|
87
|
+
"""
|
88
|
+
Given data unit identifiers, return the corresponding file paths.
|
89
|
+
"""
|
90
|
+
if isinstance(units, pd.DataFrame):
|
91
|
+
return [
|
92
|
+
self.get_data_unit_path(row, data_type=data_type, **kwargs)
|
93
|
+
for _, row in units.iterrows()
|
94
|
+
]
|
95
|
+
return super().get_data_unit_paths(units, data_type=data_type)
|
96
|
+
|
97
|
+
def _get_relevant_tiles(
|
98
|
+
self,
|
99
|
+
source: Union[
|
100
|
+
BaseGeometry,
|
101
|
+
gpd.GeoDataFrame,
|
102
|
+
Iterable[Union[Point, tuple]],
|
103
|
+
],
|
104
|
+
) -> List[dict]:
|
105
|
+
"""
|
106
|
+
Identify and return the S2 tiles that spatially intersect with the given geometry.
|
107
|
+
"""
|
108
|
+
if isinstance(source, gpd.GeoDataFrame):
|
109
|
+
if source.crs != "EPSG:4326":
|
110
|
+
source = source.to_crs("EPSG:4326")
|
111
|
+
search_geom = source.geometry.unary_union
|
112
|
+
elif isinstance(source, BaseGeometry):
|
113
|
+
search_geom = source
|
114
|
+
elif isinstance(source, Iterable) and all(
|
115
|
+
len(pt) == 2 or isinstance(pt, Point) for pt in source
|
116
|
+
):
|
117
|
+
points = [
|
118
|
+
pt if isinstance(pt, Point) else Point(pt[1], pt[0]) for pt in source
|
119
|
+
]
|
120
|
+
search_geom = MultiPoint(points)
|
121
|
+
else:
|
122
|
+
raise ValueError(
|
123
|
+
f"Expected Geometry, GeoDataFrame or iterable object of Points got {source.__class__}"
|
124
|
+
)
|
125
|
+
mask = (
|
126
|
+
tile_geom.intersects(search_geom) for tile_geom in self.tiles_gdf.geometry
|
127
|
+
)
|
128
|
+
return self.tiles_gdf.loc[mask, ["tile_id", "tile_url", "size_mb"]].to_dict(
|
129
|
+
"records"
|
130
|
+
)
|
131
|
+
|
132
|
+
|
133
|
+
class GoogleOpenBuildingsDownloader(BaseHandlerDownloader):
|
134
|
+
"""A class to handle downloads of Google's Open Buildings dataset."""
|
135
|
+
|
136
|
+
def __init__(
|
137
|
+
self,
|
138
|
+
config: Optional[GoogleOpenBuildingsConfig] = None,
|
139
|
+
data_store: Optional[DataStore] = None,
|
140
|
+
logger: Optional[logging.Logger] = None,
|
141
|
+
):
|
142
|
+
"""
|
143
|
+
Initialize the downloader.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
config: Optional configuration for file paths and download settings.
|
147
|
+
If None, a default `GoogleOpenBuildingsConfig` is used.
|
148
|
+
data_store: Optional instance of a `DataStore` for managing data
|
149
|
+
storage. If None, a `LocalDataStore` is used.
|
150
|
+
logger: Optional custom logger instance. If None, a default logger
|
151
|
+
named after the module is created and used.
|
152
|
+
"""
|
153
|
+
config = config or GoogleOpenBuildingsConfig()
|
154
|
+
super().__init__(config=config, data_store=data_store, logger=logger)
|
155
|
+
|
156
|
+
def download_data_unit(
|
157
|
+
self,
|
158
|
+
tile_info: Union[pd.Series, dict],
|
159
|
+
data_type: Literal["polygons", "points"] = "polygons",
|
160
|
+
) -> Optional[str]:
|
161
|
+
"""Download data file for a single tile."""
|
162
|
+
|
163
|
+
tile_url = tile_info["tile_url"]
|
164
|
+
if data_type == "points":
|
165
|
+
tile_url = tile_url.replace("polygons", "points")
|
166
|
+
|
167
|
+
try:
|
168
|
+
response = requests.get(tile_url, stream=True)
|
169
|
+
response.raise_for_status()
|
170
|
+
|
171
|
+
file_path = str(
|
172
|
+
self.config.get_data_unit_path(
|
173
|
+
tile_info["tile_id"], data_type=data_type
|
174
|
+
)
|
175
|
+
)
|
176
|
+
|
177
|
+
with self.data_store.open(file_path, "wb") as file:
|
178
|
+
for chunk in response.iter_content(chunk_size=8192):
|
179
|
+
file.write(chunk)
|
180
|
+
|
181
|
+
self.logger.debug(
|
182
|
+
f"Successfully downloaded tile: {tile_info['tile_id']}"
|
183
|
+
)
|
184
|
+
return file_path
|
185
|
+
|
186
|
+
except requests.exceptions.RequestException as e:
|
187
|
+
self.logger.error(
|
188
|
+
f"Failed to download tile {tile_info['tile_id']}: {str(e)}"
|
189
|
+
)
|
190
|
+
return None
|
191
|
+
except Exception as e:
|
192
|
+
self.logger.error(f"Unexpected error downloading dataset: {str(e)}")
|
193
|
+
return None
|
194
|
+
|
195
|
+
def download_data_units(
|
196
|
+
self,
|
197
|
+
tiles: Union[pd.DataFrame, List[dict]],
|
198
|
+
data_type: Literal["polygons", "points"] = "polygons",
|
199
|
+
) -> List[str]:
|
200
|
+
"""Download data files for multiple tiles."""
|
201
|
+
|
202
|
+
if len(tiles) == 0:
|
203
|
+
self.logger.warning(f"There is no matching data")
|
204
|
+
return []
|
205
|
+
|
206
|
+
with multiprocessing.Pool(self.config.n_workers) as pool:
|
207
|
+
download_func = functools.partial(
|
208
|
+
self.download_data_unit, data_type=data_type
|
209
|
+
)
|
210
|
+
file_paths = list(
|
211
|
+
tqdm(
|
212
|
+
pool.imap(
|
213
|
+
download_func,
|
214
|
+
(
|
215
|
+
[row for _, row in tiles.iterrows()]
|
216
|
+
if isinstance(tiles, pd.DataFrame)
|
217
|
+
else tiles
|
218
|
+
),
|
219
|
+
),
|
220
|
+
total=len(tiles),
|
221
|
+
desc=f"Downloading {data_type} data",
|
222
|
+
)
|
223
|
+
)
|
224
|
+
|
225
|
+
return [path for path in file_paths if path is not None]
|
226
|
+
|
227
|
+
def download(
|
228
|
+
self,
|
229
|
+
source: Union[
|
230
|
+
str, # country
|
231
|
+
List[Union[Tuple[float, float], Point]], # points
|
232
|
+
BaseGeometry, # shapely geoms
|
233
|
+
gpd.GeoDataFrame,
|
234
|
+
],
|
235
|
+
data_type: Literal["polygons", "points"] = "polygons",
|
236
|
+
**kwargs,
|
237
|
+
) -> List[str]:
|
238
|
+
"""Download Google Open Buildings data for a specified geographic region.
|
239
|
+
|
240
|
+
The region can be defined by a country code/name, a list of points,
|
241
|
+
a Shapely geometry, or a GeoDataFrame. This method identifies the
|
242
|
+
relevant S2 tiles intersecting the region and downloads the
|
243
|
+
specified type of data (polygons or points) for those tiles in parallel.
|
244
|
+
|
245
|
+
Args:
|
246
|
+
source: Defines the geographic area for which to download data.
|
247
|
+
Can be:
|
248
|
+
- A string representing a country code or name.
|
249
|
+
- A list of (latitude, longitude) tuples or Shapely Point objects.
|
250
|
+
- A Shapely BaseGeometry object (e.g., Polygon, MultiPolygon).
|
251
|
+
- A GeoDataFrame with geometry column in EPSG:4326.
|
252
|
+
data_type: The type of building data to download ('polygons' or 'points').
|
253
|
+
Defaults to 'polygons'.
|
254
|
+
**kwargs: Additional keyword arguments that are passed to
|
255
|
+
`AdminBoundaries.create()` if `source` is a country code.
|
256
|
+
For example, `path` to a custom boundaries file.
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
A list of local file paths for the successfully downloaded tiles.
|
260
|
+
Returns an empty list if no data is found for the region or if
|
261
|
+
all downloads fail.
|
262
|
+
"""
|
263
|
+
|
264
|
+
tiles = self.config.get_relevant_data_units(source, **kwargs)
|
265
|
+
return self.download_data_units(tiles, data_type)
|
266
|
+
|
267
|
+
def download_by_country(
|
268
|
+
self,
|
269
|
+
country: str,
|
270
|
+
data_type: Literal["polygons", "points"] = "polygons",
|
271
|
+
data_store: Optional[DataStore] = None,
|
272
|
+
country_geom_path: Optional[Union[str, Path]] = None,
|
273
|
+
) -> List[str]:
|
274
|
+
"""
|
275
|
+
Download Google Open Buildings data for a specific country.
|
276
|
+
|
277
|
+
This is a convenience method to download data for an entire country
|
278
|
+
using its code or name.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
country: The country code (e.g., 'USA', 'GBR') or name.
|
282
|
+
data_type: The type of building data to download ('polygons' or 'points').
|
283
|
+
Defaults to 'polygons'.
|
284
|
+
data_store: Optional instance of a `DataStore` to be used by
|
285
|
+
`AdminBoundaries` for loading country boundaries. If None,
|
286
|
+
`AdminBoundaries` will use its default data loading.
|
287
|
+
country_geom_path: Optional path to a GeoJSON file containing the
|
288
|
+
country boundary. If provided, this boundary is used
|
289
|
+
instead of the default from `AdminBoundaries`.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
A list of local file paths for the successfully downloaded tiles
|
293
|
+
for the specified country.
|
294
|
+
"""
|
295
|
+
return self.download(
|
296
|
+
source=country,
|
297
|
+
data_type=data_type,
|
298
|
+
data_store=data_store,
|
299
|
+
path=country_geom_path,
|
300
|
+
)
|
301
|
+
|
302
|
+
|
303
|
+
class GoogleOpenBuildingsReader(BaseHandlerReader):
|
304
|
+
"""
|
305
|
+
Reader for Google Open Buildings data, supporting country, points, and geometry-based resolution.
|
306
|
+
"""
|
307
|
+
|
308
|
+
def __init__(
|
309
|
+
self,
|
310
|
+
config: Optional[GoogleOpenBuildingsConfig] = None,
|
311
|
+
data_store: Optional[DataStore] = None,
|
312
|
+
logger: Optional[logging.Logger] = None,
|
313
|
+
):
|
314
|
+
config = config or GoogleOpenBuildingsConfig()
|
315
|
+
super().__init__(config=config, data_store=data_store, logger=logger)
|
316
|
+
|
317
|
+
def load_from_paths(
|
318
|
+
self, source_data_path: List[Union[str, Path]], **kwargs
|
319
|
+
) -> gpd.GeoDataFrame:
|
320
|
+
"""
|
321
|
+
Load building data from Google Open Buildings dataset.
|
322
|
+
Args:
|
323
|
+
source_data_path: List of file paths to load
|
324
|
+
Returns:
|
325
|
+
GeoDataFrame containing building data
|
326
|
+
"""
|
327
|
+
result = self._load_tabular_data(file_paths=source_data_path)
|
328
|
+
return result
|
329
|
+
|
330
|
+
def load(self, source, data_type="polygons", **kwargs):
|
331
|
+
return super().load(source=source, data_type=data_type, **kwargs)
|
332
|
+
|
333
|
+
def load_points(self, source, **kwargs):
|
334
|
+
"""This is a convenience method to load points data"""
|
335
|
+
return self.load(source=source, data_type="points", **kwargs)
|
336
|
+
|
337
|
+
def load_polygons(self, source, **kwargs):
|
338
|
+
"""This is a convenience method to load polygons data"""
|
339
|
+
return self.load(source=source, data_type="polygons", **kwargs)
|
340
|
+
|
341
|
+
|
342
|
+
class GoogleOpenBuildingsHandler(BaseHandler):
|
343
|
+
"""
|
344
|
+
Handler for Google Open Buildings dataset.
|
345
|
+
|
346
|
+
This class provides a unified interface for downloading and loading Google Open Buildings data.
|
347
|
+
It manages the lifecycle of configuration, downloading, and reading components.
|
348
|
+
"""
|
349
|
+
|
350
|
+
def create_config(
|
351
|
+
self, data_store: DataStore, logger: logging.Logger, **kwargs
|
352
|
+
) -> GoogleOpenBuildingsConfig:
|
353
|
+
"""
|
354
|
+
Create and return a GoogleOpenBuildingsConfig instance.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
data_store: The data store instance to use
|
358
|
+
logger: The logger instance to use
|
359
|
+
**kwargs: Additional configuration parameters
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
Configured GoogleOpenBuildingsConfig instance
|
363
|
+
"""
|
364
|
+
return GoogleOpenBuildingsConfig(data_store=data_store, logger=logger, **kwargs)
|
365
|
+
|
366
|
+
def create_downloader(
|
367
|
+
self,
|
368
|
+
config: GoogleOpenBuildingsConfig,
|
369
|
+
data_store: DataStore,
|
370
|
+
logger: logging.Logger,
|
371
|
+
**kwargs,
|
372
|
+
) -> GoogleOpenBuildingsDownloader:
|
373
|
+
"""
|
374
|
+
Create and return a GoogleOpenBuildingsDownloader instance.
|
375
|
+
|
376
|
+
Args:
|
377
|
+
config: The configuration object
|
378
|
+
data_store: The data store instance to use
|
379
|
+
logger: The logger instance to use
|
380
|
+
**kwargs: Additional downloader parameters
|
381
|
+
|
382
|
+
Returns:
|
383
|
+
Configured GoogleOpenBuildingsDownloader instance
|
384
|
+
"""
|
385
|
+
return GoogleOpenBuildingsDownloader(
|
386
|
+
config=config, data_store=data_store, logger=logger, **kwargs
|
387
|
+
)
|
388
|
+
|
389
|
+
def create_reader(
|
390
|
+
self,
|
391
|
+
config: GoogleOpenBuildingsConfig,
|
392
|
+
data_store: DataStore,
|
393
|
+
logger: logging.Logger,
|
394
|
+
**kwargs,
|
395
|
+
) -> GoogleOpenBuildingsReader:
|
396
|
+
"""
|
397
|
+
Create and return a GoogleOpenBuildingsReader instance.
|
398
|
+
|
399
|
+
Args:
|
400
|
+
config: The configuration object
|
401
|
+
data_store: The data store instance to use
|
402
|
+
logger: The logger instance to use
|
403
|
+
**kwargs: Additional reader parameters
|
404
|
+
|
405
|
+
Returns:
|
406
|
+
Configured GoogleOpenBuildingsReader instance
|
407
|
+
"""
|
408
|
+
return GoogleOpenBuildingsReader(
|
409
|
+
config=config, data_store=data_store, logger=logger, **kwargs
|
410
|
+
)
|
411
|
+
|
412
|
+
def load_points(
|
413
|
+
self,
|
414
|
+
source: Union[
|
415
|
+
str, # country
|
416
|
+
List[Union[tuple, Point]], # points
|
417
|
+
BaseGeometry, # geometry
|
418
|
+
gpd.GeoDataFrame, # geodataframe
|
419
|
+
Path, # path
|
420
|
+
List[Union[str, Path]], # list of paths
|
421
|
+
],
|
422
|
+
ensure_available: bool = True,
|
423
|
+
**kwargs,
|
424
|
+
) -> gpd.GeoDataFrame:
|
425
|
+
"""
|
426
|
+
Load point data from Google Open Buildings dataset.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
source: The data source specification
|
430
|
+
ensure_available: If True, ensure data is downloaded before loading
|
431
|
+
**kwargs: Additional parameters passed to load methods
|
432
|
+
|
433
|
+
Returns:
|
434
|
+
GeoDataFrame containing building point data
|
435
|
+
"""
|
436
|
+
return self.load_data(
|
437
|
+
source=source,
|
438
|
+
ensure_available=ensure_available,
|
439
|
+
data_type="points",
|
440
|
+
**kwargs,
|
441
|
+
)
|
442
|
+
|
443
|
+
def load_polygons(
|
444
|
+
self,
|
445
|
+
source: Union[
|
446
|
+
str, # country
|
447
|
+
List[Union[tuple, Point]], # points
|
448
|
+
BaseGeometry, # geometry
|
449
|
+
gpd.GeoDataFrame, # geodataframe
|
450
|
+
Path, # path
|
451
|
+
List[Union[str, Path]], # list of paths
|
452
|
+
],
|
453
|
+
ensure_available: bool = True,
|
454
|
+
**kwargs,
|
455
|
+
) -> gpd.GeoDataFrame:
|
456
|
+
"""
|
457
|
+
Load polygon data from Google Open Buildings dataset.
|
458
|
+
|
459
|
+
Args:
|
460
|
+
source: The data source specification
|
461
|
+
ensure_available: If True, ensure data is downloaded before loading
|
462
|
+
**kwargs: Additional parameters passed to load methods
|
463
|
+
|
464
|
+
Returns:
|
465
|
+
GeoDataFrame containing building polygon data
|
466
|
+
"""
|
467
|
+
return self.load_data(
|
468
|
+
source=source,
|
469
|
+
ensure_available=ensure_available,
|
470
|
+
data_type="polygons",
|
471
|
+
**kwargs,
|
472
|
+
)
|