giga-spatial 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. giga_spatial-0.6.0.dist-info/METADATA +141 -0
  2. giga_spatial-0.6.0.dist-info/RECORD +47 -0
  3. giga_spatial-0.6.0.dist-info/WHEEL +5 -0
  4. giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
  5. giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
  6. gigaspatial/__init__.py +1 -0
  7. gigaspatial/config.py +226 -0
  8. gigaspatial/core/__init__.py +0 -0
  9. gigaspatial/core/io/__init__.py +5 -0
  10. gigaspatial/core/io/adls_data_store.py +325 -0
  11. gigaspatial/core/io/data_api.py +113 -0
  12. gigaspatial/core/io/data_store.py +147 -0
  13. gigaspatial/core/io/local_data_store.py +92 -0
  14. gigaspatial/core/io/readers.py +265 -0
  15. gigaspatial/core/io/writers.py +128 -0
  16. gigaspatial/core/schemas/__init__.py +0 -0
  17. gigaspatial/core/schemas/entity.py +244 -0
  18. gigaspatial/generators/__init__.py +2 -0
  19. gigaspatial/generators/poi.py +636 -0
  20. gigaspatial/generators/zonal/__init__.py +3 -0
  21. gigaspatial/generators/zonal/base.py +370 -0
  22. gigaspatial/generators/zonal/geometry.py +439 -0
  23. gigaspatial/generators/zonal/mercator.py +78 -0
  24. gigaspatial/grid/__init__.py +1 -0
  25. gigaspatial/grid/mercator_tiles.py +286 -0
  26. gigaspatial/handlers/__init__.py +40 -0
  27. gigaspatial/handlers/base.py +761 -0
  28. gigaspatial/handlers/boundaries.py +305 -0
  29. gigaspatial/handlers/ghsl.py +772 -0
  30. gigaspatial/handlers/giga.py +145 -0
  31. gigaspatial/handlers/google_open_buildings.py +472 -0
  32. gigaspatial/handlers/hdx.py +241 -0
  33. gigaspatial/handlers/mapbox_image.py +208 -0
  34. gigaspatial/handlers/maxar_image.py +291 -0
  35. gigaspatial/handlers/microsoft_global_buildings.py +548 -0
  36. gigaspatial/handlers/ookla_speedtest.py +199 -0
  37. gigaspatial/handlers/opencellid.py +290 -0
  38. gigaspatial/handlers/osm.py +356 -0
  39. gigaspatial/handlers/overture.py +126 -0
  40. gigaspatial/handlers/rwi.py +157 -0
  41. gigaspatial/handlers/unicef_georepo.py +806 -0
  42. gigaspatial/handlers/worldpop.py +266 -0
  43. gigaspatial/processing/__init__.py +4 -0
  44. gigaspatial/processing/geo.py +1054 -0
  45. gigaspatial/processing/sat_images.py +39 -0
  46. gigaspatial/processing/tif_processor.py +477 -0
  47. gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,636 @@
1
+ from pathlib import Path
2
+ from typing import List, Optional, Union, Tuple
3
+ from pydantic.dataclasses import dataclass, Field
4
+
5
+ import geopandas as gpd
6
+ import pandas as pd
7
+ import logging
8
+
9
+ from gigaspatial.core.io.data_store import DataStore
10
+ from gigaspatial.core.io.local_data_store import LocalDataStore
11
+ from gigaspatial.core.io.writers import write_dataset
12
+ from gigaspatial.config import config as global_config
13
+ from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
14
+ from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
15
+ from gigaspatial.handlers.ghsl import GHSLDataHandler
16
+ from gigaspatial.processing.geo import (
17
+ convert_to_geodataframe,
18
+ buffer_geodataframe,
19
+ detect_coordinate_columns,
20
+ aggregate_polygons_to_zones,
21
+ )
22
+ from gigaspatial.processing.tif_processor import (
23
+ sample_multiple_tifs_by_polygons,
24
+ sample_multiple_tifs_by_coordinates,
25
+ TifProcessor,
26
+ )
27
+ from scipy.spatial import cKDTree
28
+
29
+
30
+ @dataclass
31
+ class PoiViewGeneratorConfig:
32
+ """
33
+ Configuration for POI (Point of Interest) view generation.
34
+
35
+ Attributes:
36
+ base_path (Path): The base directory where generated POI views will be saved.
37
+ Defaults to a path retrieved from `config`.
38
+ output_format (str): The default format for saving output files (e.g., "csv", "geojson").
39
+ Defaults to "csv".
40
+ """
41
+
42
+ base_path: Path = Field(default=global_config.get_path("poi", "views"))
43
+ output_format: str = "csv"
44
+ ensure_available: bool = True
45
+
46
+
47
+ class PoiViewGenerator:
48
+ """
49
+ POI View Generator for integrating various geospatial datasets
50
+ such as Google Open Buildings, Microsoft Global Buildings, GHSL Built Surface,
51
+ and GHSL Settlement Model (SMOD) data with Points of Interest (POIs).
52
+
53
+ This class provides methods to load, process, and map external geospatial
54
+ data to a given set of POIs, enriching them with relevant attributes.
55
+ It leverages handler/reader classes for efficient data access and processing.
56
+
57
+ The POIs can be initialized from a list of (latitude, longitude) tuples,
58
+ a list of dictionaries, a pandas DataFrame, or a geopandas GeoDataFrame.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ points: Union[
64
+ List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
65
+ ],
66
+ config: Optional[PoiViewGeneratorConfig] = None,
67
+ data_store: Optional[DataStore] = None,
68
+ logger: logging.Logger = None,
69
+ ):
70
+ """
71
+ Initializes the PoiViewGenerator with the input points and configurations.
72
+
73
+ The input `points` are converted into an internal GeoDataFrame
74
+ (`_points_gdf`) for consistent geospatial operations.
75
+
76
+ Args:
77
+ points (Union[List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame]):
78
+ The input points of interest. Can be:
79
+ - A list of (latitude, longitude) tuples.
80
+ - A list of dictionaries, where each dict must contain 'latitude' and 'longitude' keys.
81
+ - A pandas DataFrame with 'latitude' and 'longitude' columns.
82
+ - A geopandas GeoDataFrame (expected to have a 'geometry' column representing points).
83
+ generator_config (Optional[PoiViewGeneratorConfig]):
84
+ Configuration for the POI view generation process. If None, a
85
+ default `PoiViewGeneratorConfig` will be used.
86
+ data_store (Optional[DataStore]):
87
+ An instance of a data store for managing data access (e.g., LocalDataStore).
88
+ If None, a default `LocalDataStore` will be used.
89
+ """
90
+ self.config = config or PoiViewGeneratorConfig()
91
+ self.data_store = data_store or LocalDataStore()
92
+ self.logger = logger or global_config.get_logger(self.__class__.__name__)
93
+ self._points_gdf = self._init_points_gdf(points)
94
+
95
+ @staticmethod
96
+ def _init_points_gdf(
97
+ points: Union[
98
+ List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
99
+ ],
100
+ ) -> gpd.GeoDataFrame:
101
+ """
102
+ Internal static method to convert various point input formats into a GeoDataFrame.
103
+
104
+ This method standardizes coordinate column names to 'latitude' and 'longitude'
105
+ for consistent internal representation. It also ensures each point has a unique
106
+ identifier in the 'poi_id' column.
107
+
108
+ Args:
109
+ points: Input points in various formats:
110
+ - List of (latitude, longitude) tuples
111
+ - List of dictionaries with coordinate keys
112
+ - DataFrame with coordinate columns
113
+ - GeoDataFrame with point geometries
114
+
115
+ Returns:
116
+ gpd.GeoDataFrame: Standardized GeoDataFrame with 'latitude', 'longitude',
117
+ and 'poi_id' columns
118
+
119
+ Raises:
120
+ ValueError: If points format is not supported or coordinate columns cannot be detected
121
+ """
122
+ if isinstance(points, gpd.GeoDataFrame):
123
+ # Convert geometry to lat/lon if needed
124
+ if points.geometry.name == "geometry":
125
+ points = points.copy()
126
+ points["latitude"] = points.geometry.y
127
+ points["longitude"] = points.geometry.x
128
+ if "poi_id" not in points.columns:
129
+ points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
130
+ return points
131
+
132
+ elif isinstance(points, pd.DataFrame):
133
+ # Detect and standardize coordinate columns
134
+ try:
135
+ lat_col, lon_col = detect_coordinate_columns(points)
136
+ points = points.copy()
137
+ points["latitude"] = points[lat_col]
138
+ points["longitude"] = points[lon_col]
139
+ if "poi_id" not in points.columns:
140
+ points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
141
+ return convert_to_geodataframe(points)
142
+ except ValueError as e:
143
+ raise ValueError(
144
+ f"Could not detect coordinate columns in DataFrame: {str(e)}"
145
+ )
146
+
147
+ elif isinstance(points, list):
148
+ if len(points) == 0:
149
+ return gpd.GeoDataFrame(
150
+ columns=["latitude", "longitude", "poi_id", "geometry"],
151
+ geometry="geometry",
152
+ crs="EPSG:4326",
153
+ )
154
+
155
+ if isinstance(points[0], tuple) and len(points[0]) == 2:
156
+ # List of (lat, lon) tuples
157
+ df = pd.DataFrame(points, columns=["latitude", "longitude"])
158
+ df["poi_id"] = [f"poi_{i}" for i in range(len(points))]
159
+ return convert_to_geodataframe(df)
160
+
161
+ elif isinstance(points[0], dict):
162
+ # List of dictionaries
163
+ df = pd.DataFrame(points)
164
+ try:
165
+ lat_col, lon_col = detect_coordinate_columns(df)
166
+ df["latitude"] = df[lat_col]
167
+ df["longitude"] = df[lon_col]
168
+ if "poi_id" not in df.columns:
169
+ df["poi_id"] = [f"poi_{i}" for i in range(len(points))]
170
+ return convert_to_geodataframe(df)
171
+ except ValueError as e:
172
+ raise ValueError(
173
+ f"Could not detect coordinate columns in dictionary list: {str(e)}"
174
+ )
175
+
176
+ raise ValueError("Unsupported points input type for PoiViewGenerator.")
177
+
178
+ @property
179
+ def points_gdf(self) -> gpd.GeoDataFrame:
180
+ """Gets the internal GeoDataFrame of points of interest."""
181
+ return self._points_gdf
182
+
183
+ def map_nearest_points(
184
+ self,
185
+ points_df: Union[pd.DataFrame, gpd.GeoDataFrame],
186
+ id_column: str,
187
+ lat_column: Optional[str] = None,
188
+ lon_column: Optional[str] = None,
189
+ output_prefix: str = "nearest",
190
+ **kwargs,
191
+ ) -> pd.DataFrame:
192
+ """
193
+ Maps nearest points from a given DataFrame to the POIs.
194
+
195
+ Enriches the `points_gdf` with the ID and distance to the nearest point
196
+ from the input DataFrame for each POI.
197
+
198
+ Args:
199
+ points_df (Union[pd.DataFrame, gpd.GeoDataFrame]):
200
+ DataFrame containing points to find nearest neighbors from.
201
+ Must have latitude and longitude columns or point geometries.
202
+ id_column (str):
203
+ Name of the column containing unique identifiers for each point.
204
+ lat_column (str, optional):
205
+ Name of the latitude column in points_df. If None, will attempt to detect it
206
+ or extract from geometry if points_df is a GeoDataFrame.
207
+ lon_column (str, optional):
208
+ Name of the longitude column in points_df. If None, will attempt to detect it
209
+ or extract from geometry if points_df is a GeoDataFrame.
210
+ output_prefix (str, optional):
211
+ Prefix for the output column names. Defaults to "nearest".
212
+ **kwargs:
213
+ Additional keyword arguments passed to the data reader (if applicable).
214
+
215
+ Returns:
216
+ pd.DataFrame: The updated GeoDataFrame with new columns:
217
+ '{output_prefix}_id' and '{output_prefix}_distance'.
218
+ Returns a copy of the current `points_gdf` if no points are found.
219
+
220
+ Raises:
221
+ ValueError: If required columns are missing from points_df or if coordinate
222
+ columns cannot be detected or extracted from geometry.
223
+ """
224
+ self.logger.info(
225
+ f"Mapping nearest points from {points_df.__class__.__name__} to POIs"
226
+ )
227
+
228
+ # Validate input DataFrame
229
+ if points_df.empty:
230
+ self.logger.info("No points found in the input DataFrame")
231
+ return self.points_gdf.copy()
232
+
233
+ # Handle GeoDataFrame
234
+ if isinstance(points_df, gpd.GeoDataFrame):
235
+ points_df = points_df.copy()
236
+ if points_df.geometry.name == "geometry":
237
+ points_df["latitude"] = points_df.geometry.y
238
+ points_df["longitude"] = points_df.geometry.x
239
+ lat_column = "latitude"
240
+ lon_column = "longitude"
241
+ self.logger.info("Extracted coordinates from geometry")
242
+
243
+ # Detect coordinate columns if not provided
244
+ if lat_column is None or lon_column is None:
245
+ try:
246
+ detected_lat, detected_lon = detect_coordinate_columns(points_df)
247
+ lat_column = lat_column or detected_lat
248
+ lon_column = lon_column or detected_lon
249
+ self.logger.info(
250
+ f"Detected coordinate columns: {lat_column}, {lon_column}"
251
+ )
252
+ except ValueError as e:
253
+ raise ValueError(f"Could not detect coordinate columns: {str(e)}")
254
+
255
+ # Validate required columns
256
+ required_columns = [lat_column, lon_column, id_column]
257
+ missing_columns = [
258
+ col for col in required_columns if col not in points_df.columns
259
+ ]
260
+ if missing_columns:
261
+ raise ValueError(
262
+ f"Missing required columns in points_df: {missing_columns}"
263
+ )
264
+
265
+ from gigaspatial.processing.geo import calculate_distance
266
+
267
+ self.logger.info("Calculating nearest points for each POI")
268
+ tree = cKDTree(points_df[[lat_column, lon_column]])
269
+ points_df_poi = self.points_gdf.copy()
270
+ _, idx = tree.query(points_df_poi[["latitude", "longitude"]], k=1)
271
+ df_nearest = points_df.iloc[idx]
272
+ dist = calculate_distance(
273
+ lat1=points_df_poi.latitude,
274
+ lon1=points_df_poi.longitude,
275
+ lat2=df_nearest[lat_column],
276
+ lon2=df_nearest[lon_column],
277
+ )
278
+ result = points_df_poi.copy()
279
+ result[f"{output_prefix}_id"] = df_nearest[id_column].to_numpy()
280
+ result[f"{output_prefix}_distance"] = dist
281
+ self.logger.info(
282
+ f"Nearest points mapping complete with prefix '{output_prefix}'"
283
+ )
284
+ self._points_gdf = result
285
+ return result
286
+
287
+ def map_google_buildings(
288
+ self,
289
+ handler: Optional[GoogleOpenBuildingsHandler] = None,
290
+ **kwargs,
291
+ ) -> pd.DataFrame:
292
+ """
293
+ Maps Google Open Buildings data to the POIs by finding the nearest building.
294
+
295
+ Enriches the `points_gdf` with the ID and distance to the nearest
296
+ Google Open Building for each POI.
297
+
298
+ Args:
299
+ data_config (Optional[GoogleOpenBuildingsConfig]):
300
+ Configuration for accessing Google Open Buildings data. If None, a
301
+ default `GoogleOpenBuildingsConfig` will be used.
302
+ **kwargs:
303
+ Additional keyword arguments passed to the data reader (if applicable).
304
+
305
+ Returns:
306
+ pd.DataFrame: The updated GeoDataFrame with new columns:
307
+ 'nearest_google_building_id' and 'nearest_google_building_distance'.
308
+ Returns a copy of the current `points_gdf` if no buildings are found.
309
+ """
310
+ self.logger.info("Mapping Google Open Buildings data to POIs")
311
+ handler = handler or GoogleOpenBuildingsHandler(data_store=self.data_store)
312
+
313
+ self.logger.info("Loading Google Buildings point data")
314
+ buildings_df = handler.load_points(
315
+ self.points_gdf, ensure_available=self.config.ensure_available
316
+ )
317
+ if buildings_df is None or len(buildings_df) == 0:
318
+ self.logger.info("No Google buildings data found for the provided POIs")
319
+ return self.points_gdf.copy()
320
+
321
+ return self.map_nearest_points(
322
+ points_df=buildings_df,
323
+ id_column="full_plus_code",
324
+ output_prefix="nearest_google_building",
325
+ **kwargs,
326
+ )
327
+
328
+ def map_ms_buildings(
329
+ self,
330
+ handler: Optional[MSBuildingsHandler] = None,
331
+ **kwargs,
332
+ ) -> pd.DataFrame:
333
+ """
334
+ Maps Microsoft Global Buildings data to the POIs by finding the nearest building.
335
+
336
+ Enriches the `points_gdf` with the ID and distance to the nearest
337
+ Microsoft Global Building for each POI. If buildings don't have an ID column,
338
+ creates a unique ID using the building's coordinates.
339
+
340
+ Args:
341
+ data_config (Optional[MSBuildingsConfig]):
342
+ Configuration for accessing Microsoft Global Buildings data. If None, a
343
+ default `MSBuildingsConfig` will be used.
344
+ **kwargs:
345
+ Additional keyword arguments passed to the data reader (if applicable).
346
+
347
+ Returns:
348
+ pd.DataFrame: The updated GeoDataFrame with new columns:
349
+ 'nearest_ms_building_id' and 'nearest_ms_building_distance'.
350
+ Returns a copy of the current `points_gdf` if no buildings are found.
351
+ """
352
+ self.logger.info("Mapping Microsoft Global Buildings data to POIs")
353
+ handler = handler or MSBuildingsHandler(data_store=self.data_store)
354
+ self.logger.info("Loading Microsoft Buildings polygon data")
355
+ buildings_gdf = handler.load_data(
356
+ self.points_gdf, ensure_available=self.config.ensure_available
357
+ )
358
+ if buildings_gdf is None or len(buildings_gdf) == 0:
359
+ self.logger.info("No Microsoft buildings data found for the provided POIs")
360
+ return self.points_gdf.copy()
361
+
362
+ if "building_id" not in buildings_gdf:
363
+ self.logger.info("Creating building IDs from coordinates")
364
+ buildings_gdf = buildings_gdf.copy()
365
+ buildings_gdf["building_id"] = buildings_gdf.apply(
366
+ lambda row: f"{row.geometry.y:.6f}_{row.geometry.x:.6f}",
367
+ axis=1,
368
+ )
369
+
370
+ return self.map_nearest_points(
371
+ points_df=buildings_gdf,
372
+ id_column="building_id",
373
+ output_prefix="nearest_ms_building",
374
+ **kwargs,
375
+ )
376
+
377
+ def map_zonal_stats(
378
+ self,
379
+ data: Union[List[TifProcessor], gpd.GeoDataFrame],
380
+ stat: str = "mean",
381
+ map_radius_meters: Optional[float] = None,
382
+ output_column: str = "zonal_stat",
383
+ value_column: Optional[str] = None,
384
+ area_weighted: bool = False,
385
+ **kwargs,
386
+ ) -> pd.DataFrame:
387
+ """
388
+ Maps zonal statistics from raster or polygon data to POIs.
389
+
390
+ Can operate in three modes:
391
+ 1. Raster point sampling: Directly samples raster values at POI locations
392
+ 2. Raster zonal statistics: Creates buffers around POIs and calculates statistics within them
393
+ 3. Polygon aggregation: Aggregates polygon data to POI buffers with optional area weighting
394
+
395
+ Args:
396
+ data (Union[List[TifProcessor], gpd.GeoDataFrame]):
397
+ Either a list of TifProcessor objects containing raster data to sample,
398
+ or a GeoDataFrame containing polygon data to aggregate.
399
+ stat (str, optional):
400
+ For raster data: Statistic to calculate ("sum", "mean", "median", "min", "max").
401
+ For polygon data: Aggregation method to use.
402
+ Defaults to "mean".
403
+ map_radius_meters (float, optional):
404
+ If provided, creates circular buffers of this radius around each POI
405
+ and calculates statistics within the buffers. If None, samples directly
406
+ at POI locations (only for raster data).
407
+ output_column (str, optional):
408
+ Name of the output column to store the results. Defaults to "zonal_stat".
409
+ value_column (str, optional):
410
+ For polygon data: Name of the column to aggregate. Required for polygon data.
411
+ Not used for raster data.
412
+ area_weighted (bool, optional):
413
+ For polygon data: Whether to weight values by fractional area of
414
+ intersection. Defaults to False.
415
+ **kwargs:
416
+ Additional keyword arguments passed to the sampling/aggregation functions.
417
+
418
+ Returns:
419
+ pd.DataFrame: The updated GeoDataFrame with a new column containing the
420
+ calculated statistics. Returns a copy of the current `points_gdf`
421
+ if no valid data is found.
422
+
423
+ Raises:
424
+ ValueError: If no valid data is provided, if parameters are incompatible,
425
+ or if required parameters (value_column) are missing for polygon data.
426
+ """
427
+ if isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
428
+ # Handle raster data
429
+ if not data:
430
+ self.logger.info("No valid raster data found for the provided POIs")
431
+ return self.points_gdf.copy()
432
+
433
+ if map_radius_meters is not None:
434
+ self.logger.info(
435
+ f"Calculating {stat} within {map_radius_meters}m buffers around POIs"
436
+ )
437
+ # Create buffers around POIs
438
+ polygon_list = buffer_geodataframe(
439
+ self.points_gdf,
440
+ buffer_distance_meters=map_radius_meters,
441
+ cap_style="round",
442
+ ).geometry
443
+
444
+ # Calculate zonal statistics
445
+ sampled_values = sample_multiple_tifs_by_polygons(
446
+ tif_processors=data, polygon_list=polygon_list, stat=stat, **kwargs
447
+ )
448
+ else:
449
+ self.logger.info(f"Sampling {stat} at POI locations")
450
+ # Sample directly at POI locations
451
+ coord_list = self.points_gdf[["latitude", "longitude"]].to_numpy()
452
+ sampled_values = sample_multiple_tifs_by_coordinates(
453
+ tif_processors=data, coordinate_list=coord_list, **kwargs
454
+ )
455
+
456
+ elif isinstance(data, gpd.GeoDataFrame):
457
+ # Handle polygon data
458
+ if data.empty:
459
+ self.logger.info("No valid polygon data found for the provided POIs")
460
+ return self.points_gdf.copy()
461
+
462
+ if map_radius_meters is None:
463
+ raise ValueError("map_radius_meters must be provided for polygon data")
464
+
465
+ if value_column is None:
466
+ raise ValueError("value_column must be provided for polygon data")
467
+
468
+ self.logger.info(
469
+ f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
470
+ )
471
+
472
+ # Create buffers around POIs
473
+ buffer_gdf = buffer_geodataframe(
474
+ self.points_gdf,
475
+ buffer_distance_meters=map_radius_meters,
476
+ cap_style="round",
477
+ )
478
+
479
+ # Aggregate polygons to buffers
480
+ result = aggregate_polygons_to_zones(
481
+ polygons=data,
482
+ zones=buffer_gdf,
483
+ value_columns=value_column,
484
+ aggregation=stat,
485
+ area_weighted=area_weighted,
486
+ zone_id_column="poi_id",
487
+ **kwargs,
488
+ )
489
+
490
+ # Extract values for each POI
491
+ sampled_values = result[value_column].values
492
+
493
+ else:
494
+ raise ValueError(
495
+ "data must be either a list of TifProcessor objects or a GeoDataFrame"
496
+ )
497
+
498
+ result = self.points_gdf.copy()
499
+ result[output_column] = sampled_values
500
+ self.logger.info(f"Zonal statistics mapping complete: {output_column}")
501
+ self._points_gdf = result
502
+ return result
503
+
504
+ def map_built_s(
505
+ self,
506
+ map_radius_meters: float = 150,
507
+ stat: str = "sum",
508
+ dataset_year=2020,
509
+ dataset_resolution=100,
510
+ output_column="built_surface_m2",
511
+ **kwargs,
512
+ ) -> pd.DataFrame:
513
+ """
514
+ Maps GHSL Built Surface (GHS_BUILT_S) data to the POIs.
515
+
516
+ Calculates the sum of built surface area within a specified buffer
517
+ radius around each POI. Enriches `points_gdf` with the 'built_surface_m2' column.
518
+
519
+ Args:
520
+ data_config (Optional[GHSLDataConfig]):
521
+ Configuration for accessing GHSL Built Surface data. If None, a
522
+ default `GHSLDataConfig` for 'GHS_BUILT_S' will be used.
523
+ map_radius_meters (float):
524
+ The buffer distance in meters around each POI to calculate
525
+ zonal statistics for built surface. Defaults to 150 meters.
526
+ **kwargs:
527
+ Additional keyword arguments passed to the data reader (if applicable).
528
+
529
+ Returns:
530
+ pd.DataFrame: The updated GeoDataFrame with a new column:
531
+ 'built_surface_m2'. Returns a copy of the current
532
+ `points_gdf` if no GHSL Built Surface data is found.
533
+ """
534
+ self.logger.info("Mapping GHSL Built Surface data to POIs")
535
+ handler = GHSLDataHandler(
536
+ product="GHS_BUILT_S",
537
+ year=dataset_year,
538
+ resolution=dataset_resolution,
539
+ data_store=self.data_store,
540
+ **kwargs,
541
+ )
542
+ gdf_points = self.points_gdf.to_crs(handler.config.crs)
543
+ self.logger.info("Loading GHSL Built Surface raster tiles")
544
+ tif_processors = handler.load_data(
545
+ gdf_points, ensure_available=self.config.ensure_available
546
+ )
547
+
548
+ return self.map_zonal_stats(
549
+ data=tif_processors,
550
+ stat=stat,
551
+ map_radius_meters=map_radius_meters,
552
+ output_column=output_column,
553
+ **kwargs,
554
+ )
555
+
556
+ def map_smod(
557
+ self,
558
+ stat="median",
559
+ dataset_year=2020,
560
+ dataset_resolution=100,
561
+ output_column="smod_class",
562
+ **kwargs,
563
+ ) -> pd.DataFrame:
564
+ """
565
+ Maps GHSL Settlement Model (SMOD) data to the POIs.
566
+
567
+ Samples the SMOD class value at each POI's location. Enriches `points_gdf`
568
+ with the 'smod_class' column.
569
+
570
+ Args:
571
+ data_config (Optional[GHSLDataConfig]):
572
+ Configuration for accessing GHSL SMOD data. If None, a
573
+ default `GHSLDataConfig` for 'GHS_SMOD' will be used.
574
+ **kwargs:
575
+ Additional keyword arguments passed to the data reader (if applicable).
576
+
577
+ Returns:
578
+ pd.DataFrame: The updated GeoDataFrame with a new column:
579
+ 'smod_class'. Returns a copy of the current
580
+ `points_gdf` if no GHSL SMOD data is found.
581
+ """
582
+ self.logger.info("Mapping GHSL Settlement Model (SMOD) data to POIs")
583
+ handler = GHSLDataHandler(
584
+ product="GHS_SMOD",
585
+ year=dataset_year,
586
+ resolution=dataset_resolution,
587
+ data_store=self.data_store,
588
+ coord_system=54009,
589
+ **kwargs,
590
+ )
591
+
592
+ gdf_points = self.points_gdf.to_crs(handler.config.crs)
593
+ self.logger.info("Loading GHSL SMOD raster tiles")
594
+ tif_processors = handler.load_data(
595
+ gdf_points, ensure_available=self.config.ensure_available
596
+ )
597
+
598
+ return self.map_zonal_stats(
599
+ data=tif_processors,
600
+ stat=stat, # Use median for categorical data
601
+ output_column=output_column,
602
+ **kwargs,
603
+ )
604
+
605
+ def save_view(
606
+ self,
607
+ name: str,
608
+ output_format: Optional[str] = None,
609
+ ) -> Path:
610
+ """
611
+ Saves the current POI view (the enriched GeoDataFrame) to a file.
612
+
613
+ The output path and format are determined by the `generator_config`
614
+ or overridden by the `output_format` parameter.
615
+
616
+ Args:
617
+ name (str): The base name for the output file (without extension).
618
+ output_format (Optional[str]):
619
+ The desired output format (e.g., "csv", "geojson"). If None,
620
+ the `output_format` from `generator_config` will be used.
621
+
622
+ Returns:
623
+ Path: The full path to the saved output file.
624
+ """
625
+ format_to_use = output_format or self.generator_config.output_format
626
+ output_path = self.generator_config.base_path / f"{name}.{format_to_use}"
627
+
628
+ self.logger.info(f"Saving POI view to {output_path}")
629
+ write_dataset(
630
+ df=self.points_gdf,
631
+ path=str(output_path),
632
+ data_store=self.data_store,
633
+ format=format_to_use,
634
+ )
635
+
636
+ return output_path
@@ -0,0 +1,3 @@
1
+ from gigaspatial.generators.zonal.base import ZonalViewGeneratorConfig
2
+ from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
3
+ from gigaspatial.generators.poi import PoiViewGenerator, PoiViewGeneratorConfig