giga-spatial 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +3 -1
- giga_spatial-0.6.6.dist-info/RECORD +50 -0
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +29 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +319 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +300 -52
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +237 -81
- gigaspatial/generators/zonal/geometry.py +151 -53
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +93 -18
- gigaspatial/handlers/ghsl.py +92 -15
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +204 -102
- gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.4.dist-info/RECORD +0 -47
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
gigaspatial/generators/poi.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
from typing import List, Optional, Union, Tuple
|
2
|
+
from typing import List, Optional, Union, Tuple, Literal
|
3
3
|
from pydantic.dataclasses import dataclass, Field
|
4
4
|
|
5
5
|
import geopandas as gpd
|
@@ -13,11 +13,13 @@ from gigaspatial.config import config as global_config
|
|
13
13
|
from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
|
14
14
|
from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
|
15
15
|
from gigaspatial.handlers.ghsl import GHSLDataHandler
|
16
|
+
from gigaspatial.handlers.worldpop import WPPopulationHandler
|
16
17
|
from gigaspatial.processing.geo import (
|
17
18
|
convert_to_geodataframe,
|
18
19
|
buffer_geodataframe,
|
19
20
|
detect_coordinate_columns,
|
20
21
|
aggregate_polygons_to_zones,
|
22
|
+
get_centroids,
|
21
23
|
)
|
22
24
|
from gigaspatial.processing.tif_processor import (
|
23
25
|
sample_multiple_tifs_by_polygons,
|
@@ -63,6 +65,7 @@ class PoiViewGenerator:
|
|
63
65
|
points: Union[
|
64
66
|
List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
|
65
67
|
],
|
68
|
+
poi_id_column: str = "poi_id",
|
66
69
|
config: Optional[PoiViewGeneratorConfig] = None,
|
67
70
|
data_store: Optional[DataStore] = None,
|
68
71
|
logger: logging.Logger = None,
|
@@ -87,16 +90,21 @@ class PoiViewGenerator:
|
|
87
90
|
An instance of a data store for managing data access (e.g., LocalDataStore).
|
88
91
|
If None, a default `LocalDataStore` will be used.
|
89
92
|
"""
|
93
|
+
if hasattr(points, "__len__") and len(points) == 0:
|
94
|
+
raise ValueError("Points input cannot be empty")
|
95
|
+
|
90
96
|
self.config = config or PoiViewGeneratorConfig()
|
91
97
|
self.data_store = data_store or LocalDataStore()
|
92
98
|
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
93
|
-
self._points_gdf = self._init_points_gdf(points)
|
99
|
+
self._points_gdf = self._init_points_gdf(points, poi_id_column)
|
100
|
+
self._view: pd.DataFrame = self._points_gdf.drop(columns=["geometry"])
|
94
101
|
|
95
102
|
@staticmethod
|
96
103
|
def _init_points_gdf(
|
97
104
|
points: Union[
|
98
105
|
List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
|
99
106
|
],
|
107
|
+
poi_id_column: str,
|
100
108
|
) -> gpd.GeoDataFrame:
|
101
109
|
"""
|
102
110
|
Internal static method to convert various point input formats into a GeoDataFrame.
|
@@ -125,8 +133,19 @@ class PoiViewGenerator:
|
|
125
133
|
points = points.copy()
|
126
134
|
points["latitude"] = points.geometry.y
|
127
135
|
points["longitude"] = points.geometry.x
|
128
|
-
if
|
136
|
+
if poi_id_column not in points.columns:
|
129
137
|
points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
138
|
+
else:
|
139
|
+
points = points.rename(
|
140
|
+
columns={poi_id_column: "poi_id"},
|
141
|
+
)
|
142
|
+
if points["poi_id"].duplicated().any():
|
143
|
+
raise ValueError(
|
144
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
145
|
+
)
|
146
|
+
|
147
|
+
if points.crs != "EPSG:4326":
|
148
|
+
points = points.to_crs("EPSG:4326")
|
130
149
|
return points
|
131
150
|
|
132
151
|
elif isinstance(points, pd.DataFrame):
|
@@ -136,8 +155,16 @@ class PoiViewGenerator:
|
|
136
155
|
points = points.copy()
|
137
156
|
points["latitude"] = points[lat_col]
|
138
157
|
points["longitude"] = points[lon_col]
|
139
|
-
if
|
158
|
+
if poi_id_column not in points.columns:
|
140
159
|
points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
160
|
+
else:
|
161
|
+
points = points.rename(
|
162
|
+
columns={poi_id_column: "poi_id"},
|
163
|
+
)
|
164
|
+
if points["poi_id"].duplicated().any():
|
165
|
+
raise ValueError(
|
166
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
167
|
+
)
|
141
168
|
return convert_to_geodataframe(points)
|
142
169
|
except ValueError as e:
|
143
170
|
raise ValueError(
|
@@ -165,8 +192,16 @@ class PoiViewGenerator:
|
|
165
192
|
lat_col, lon_col = detect_coordinate_columns(df)
|
166
193
|
df["latitude"] = df[lat_col]
|
167
194
|
df["longitude"] = df[lon_col]
|
168
|
-
if
|
195
|
+
if poi_id_column not in df.columns:
|
169
196
|
df["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
197
|
+
else:
|
198
|
+
df = df.rename(
|
199
|
+
columns={poi_id_column: "poi_id"},
|
200
|
+
)
|
201
|
+
if df["poi_id"].duplicated().any():
|
202
|
+
raise ValueError(
|
203
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
204
|
+
)
|
170
205
|
return convert_to_geodataframe(df)
|
171
206
|
except ValueError as e:
|
172
207
|
raise ValueError(
|
@@ -180,6 +215,53 @@ class PoiViewGenerator:
|
|
180
215
|
"""Gets the internal GeoDataFrame of points of interest."""
|
181
216
|
return self._points_gdf
|
182
217
|
|
218
|
+
@property
|
219
|
+
def view(self) -> pd.DataFrame:
|
220
|
+
"""The DataFrame representing the current point of interest view."""
|
221
|
+
return self._view
|
222
|
+
|
223
|
+
def _update_view(self, new_data: pd.DataFrame) -> None:
|
224
|
+
"""
|
225
|
+
Internal helper to update the main view DataFrame with new columns.
|
226
|
+
This method is designed to be called by map_* methods.
|
227
|
+
|
228
|
+
Args:
|
229
|
+
new_data (pd.DataFrame): A DataFrame containing 'poi_id' and new columns
|
230
|
+
to be merged into the main view.
|
231
|
+
"""
|
232
|
+
if "poi_id" not in new_data.columns:
|
233
|
+
available_cols = list(new_data.columns)
|
234
|
+
raise ValueError(
|
235
|
+
f"new_data DataFrame must contain 'poi_id' column. "
|
236
|
+
f"Available columns: {available_cols}"
|
237
|
+
)
|
238
|
+
|
239
|
+
# Check for poi_id mismatches
|
240
|
+
original_poi_ids = set(self._view["poi_id"])
|
241
|
+
new_poi_ids = set(new_data["poi_id"])
|
242
|
+
missing_pois = original_poi_ids - new_poi_ids
|
243
|
+
|
244
|
+
if missing_pois:
|
245
|
+
self.logger.warning(
|
246
|
+
f"{len(missing_pois)} POIs will have NaN values for new columns"
|
247
|
+
)
|
248
|
+
|
249
|
+
# Ensure poi_id is the index for efficient merging
|
250
|
+
# Create a copy to avoid SettingWithCopyWarning if new_data is a slice
|
251
|
+
new_data_indexed = new_data.set_index("poi_id").copy()
|
252
|
+
|
253
|
+
# Merge on 'poi_id' (which is now the index of self._view and new_data_indexed)
|
254
|
+
# Using left join to keep all POIs from the original view
|
255
|
+
self._view = (
|
256
|
+
self._view.set_index("poi_id")
|
257
|
+
.join(new_data_indexed, how="left")
|
258
|
+
.reset_index()
|
259
|
+
)
|
260
|
+
|
261
|
+
self.logger.debug(
|
262
|
+
f"View updated with columns: {list(new_data_indexed.columns)}"
|
263
|
+
)
|
264
|
+
|
183
265
|
def map_nearest_points(
|
184
266
|
self,
|
185
267
|
points_df: Union[pd.DataFrame, gpd.GeoDataFrame],
|
@@ -228,7 +310,7 @@ class PoiViewGenerator:
|
|
228
310
|
# Validate input DataFrame
|
229
311
|
if points_df.empty:
|
230
312
|
self.logger.info("No points found in the input DataFrame")
|
231
|
-
return self.
|
313
|
+
return self.view
|
232
314
|
|
233
315
|
# Handle GeoDataFrame
|
234
316
|
if isinstance(points_df, gpd.GeoDataFrame):
|
@@ -275,14 +357,19 @@ class PoiViewGenerator:
|
|
275
357
|
lat2=df_nearest[lat_column],
|
276
358
|
lon2=df_nearest[lon_column],
|
277
359
|
)
|
278
|
-
|
279
|
-
|
280
|
-
|
360
|
+
# Create a temporary DataFrame to hold the results for merging
|
361
|
+
temp_result_df = pd.DataFrame(
|
362
|
+
{
|
363
|
+
"poi_id": points_df_poi["poi_id"],
|
364
|
+
f"{output_prefix}_id": points_df.iloc[idx][id_column].values,
|
365
|
+
f"{output_prefix}_distance": dist,
|
366
|
+
}
|
367
|
+
)
|
368
|
+
self._update_view(temp_result_df)
|
281
369
|
self.logger.info(
|
282
370
|
f"Nearest points mapping complete with prefix '{output_prefix}'"
|
283
371
|
)
|
284
|
-
self.
|
285
|
-
return result
|
372
|
+
return self.view
|
286
373
|
|
287
374
|
def map_google_buildings(
|
288
375
|
self,
|
@@ -316,7 +403,7 @@ class PoiViewGenerator:
|
|
316
403
|
)
|
317
404
|
if buildings_df is None or len(buildings_df) == 0:
|
318
405
|
self.logger.info("No Google buildings data found for the provided POIs")
|
319
|
-
return self.
|
406
|
+
return self.view
|
320
407
|
|
321
408
|
return self.map_nearest_points(
|
322
409
|
points_df=buildings_df,
|
@@ -359,16 +446,17 @@ class PoiViewGenerator:
|
|
359
446
|
self.logger.info("No Microsoft buildings data found for the provided POIs")
|
360
447
|
return self.points_gdf.copy()
|
361
448
|
|
449
|
+
building_centroids = get_centroids(buildings_gdf)
|
450
|
+
|
362
451
|
if "building_id" not in buildings_gdf:
|
363
452
|
self.logger.info("Creating building IDs from coordinates")
|
364
|
-
|
365
|
-
buildings_gdf["building_id"] = buildings_gdf.apply(
|
453
|
+
building_centroids["building_id"] = building_centroids.apply(
|
366
454
|
lambda row: f"{row.geometry.y:.6f}_{row.geometry.x:.6f}",
|
367
455
|
axis=1,
|
368
456
|
)
|
369
457
|
|
370
458
|
return self.map_nearest_points(
|
371
|
-
points_df=
|
459
|
+
points_df=building_centroids,
|
372
460
|
id_column="building_id",
|
373
461
|
output_prefix="nearest_ms_building",
|
374
462
|
**kwargs,
|
@@ -381,7 +469,7 @@ class PoiViewGenerator:
|
|
381
469
|
map_radius_meters: Optional[float] = None,
|
382
470
|
output_column: str = "zonal_stat",
|
383
471
|
value_column: Optional[str] = None,
|
384
|
-
|
472
|
+
predicate: Literal["intersects", "within", "fractional"] = "intersects",
|
385
473
|
**kwargs,
|
386
474
|
) -> pd.DataFrame:
|
387
475
|
"""
|
@@ -409,9 +497,8 @@ class PoiViewGenerator:
|
|
409
497
|
value_column (str, optional):
|
410
498
|
For polygon data: Name of the column to aggregate. Required for polygon data.
|
411
499
|
Not used for raster data.
|
412
|
-
|
413
|
-
|
414
|
-
intersection. Defaults to False.
|
500
|
+
predicate (Literal["intersects", "within", "fractional"], optional):
|
501
|
+
The spatial relationship to use for aggregation. Defaults to "intersects".
|
415
502
|
**kwargs:
|
416
503
|
Additional keyword arguments passed to the sampling/aggregation functions.
|
417
504
|
|
@@ -424,35 +511,52 @@ class PoiViewGenerator:
|
|
424
511
|
ValueError: If no valid data is provided, if parameters are incompatible,
|
425
512
|
or if required parameters (value_column) are missing for polygon data.
|
426
513
|
"""
|
514
|
+
|
427
515
|
if isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
|
516
|
+
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
517
|
+
|
428
518
|
# Handle raster data
|
429
519
|
if not data:
|
430
520
|
self.logger.info("No valid raster data found for the provided POIs")
|
431
|
-
return self.
|
521
|
+
return self.view
|
522
|
+
|
523
|
+
raster_crs = data[0].crs
|
524
|
+
|
525
|
+
if not all(tp.crs == raster_crs for tp in data):
|
526
|
+
raise ValueError(
|
527
|
+
"All TifProcessors must have the same CRS for zonal statistics."
|
528
|
+
)
|
432
529
|
|
433
530
|
if map_radius_meters is not None:
|
434
531
|
self.logger.info(
|
435
532
|
f"Calculating {stat} within {map_radius_meters}m buffers around POIs"
|
436
533
|
)
|
437
534
|
# Create buffers around POIs
|
438
|
-
|
535
|
+
buffers_gdf = buffer_geodataframe(
|
439
536
|
self.points_gdf,
|
440
537
|
buffer_distance_meters=map_radius_meters,
|
441
538
|
cap_style="round",
|
442
|
-
)
|
539
|
+
)
|
443
540
|
|
444
541
|
# Calculate zonal statistics
|
445
542
|
sampled_values = sample_multiple_tifs_by_polygons(
|
446
|
-
tif_processors=data,
|
543
|
+
tif_processors=data,
|
544
|
+
polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
|
545
|
+
stat=stat,
|
546
|
+
**kwargs,
|
447
547
|
)
|
448
548
|
else:
|
449
549
|
self.logger.info(f"Sampling {stat} at POI locations")
|
450
550
|
# Sample directly at POI locations
|
451
|
-
coord_list =
|
551
|
+
coord_list = (
|
552
|
+
self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
|
553
|
+
)
|
452
554
|
sampled_values = sample_multiple_tifs_by_coordinates(
|
453
555
|
tif_processors=data, coordinate_list=coord_list, **kwargs
|
454
556
|
)
|
455
557
|
|
558
|
+
results_df[output_column] = sampled_values
|
559
|
+
|
456
560
|
elif isinstance(data, gpd.GeoDataFrame):
|
457
561
|
# Handle polygon data
|
458
562
|
if data.empty:
|
@@ -465,8 +569,13 @@ class PoiViewGenerator:
|
|
465
569
|
if value_column is None:
|
466
570
|
raise ValueError("value_column must be provided for polygon data")
|
467
571
|
|
572
|
+
if value_column not in data.columns:
|
573
|
+
raise ValueError(
|
574
|
+
f"Value column '{value_column}' not found in input polygon GeoDataFrame."
|
575
|
+
)
|
576
|
+
|
468
577
|
self.logger.info(
|
469
|
-
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
|
578
|
+
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
|
470
579
|
)
|
471
580
|
|
472
581
|
# Create buffers around POIs
|
@@ -477,29 +586,33 @@ class PoiViewGenerator:
|
|
477
586
|
)
|
478
587
|
|
479
588
|
# Aggregate polygons to buffers
|
480
|
-
|
589
|
+
aggregation_result_gdf = aggregate_polygons_to_zones(
|
481
590
|
polygons=data,
|
482
591
|
zones=buffer_gdf,
|
483
592
|
value_columns=value_column,
|
484
593
|
aggregation=stat,
|
485
|
-
|
594
|
+
predicate=predicate,
|
486
595
|
zone_id_column="poi_id",
|
596
|
+
output_suffix="",
|
597
|
+
drop_geometry=True,
|
487
598
|
**kwargs,
|
488
599
|
)
|
489
600
|
|
490
|
-
|
491
|
-
|
601
|
+
results_df = aggregation_result_gdf[["poi_id", value_column]]
|
602
|
+
|
603
|
+
if output_column != "zonal_stat":
|
604
|
+
results_df = results_df.rename(columns={value_column: output_column})
|
492
605
|
|
493
606
|
else:
|
494
607
|
raise ValueError(
|
495
608
|
"data must be either a list of TifProcessor objects or a GeoDataFrame"
|
496
609
|
)
|
497
610
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
return
|
611
|
+
self._update_view(results_df)
|
612
|
+
self.logger.info(
|
613
|
+
f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
|
614
|
+
)
|
615
|
+
return self.view
|
503
616
|
|
504
617
|
def map_built_s(
|
505
618
|
self,
|
@@ -539,10 +652,9 @@ class PoiViewGenerator:
|
|
539
652
|
data_store=self.data_store,
|
540
653
|
**kwargs,
|
541
654
|
)
|
542
|
-
gdf_points = self.points_gdf.to_crs(handler.config.crs)
|
543
655
|
self.logger.info("Loading GHSL Built Surface raster tiles")
|
544
656
|
tif_processors = handler.load_data(
|
545
|
-
|
657
|
+
self.points_gdf.copy(), ensure_available=self.config.ensure_available
|
546
658
|
)
|
547
659
|
|
548
660
|
return self.map_zonal_stats(
|
@@ -555,9 +667,8 @@ class PoiViewGenerator:
|
|
555
667
|
|
556
668
|
def map_smod(
|
557
669
|
self,
|
558
|
-
stat="median",
|
559
670
|
dataset_year=2020,
|
560
|
-
dataset_resolution=
|
671
|
+
dataset_resolution=1000,
|
561
672
|
output_column="smod_class",
|
562
673
|
**kwargs,
|
563
674
|
) -> pd.DataFrame:
|
@@ -589,48 +700,185 @@ class PoiViewGenerator:
|
|
589
700
|
**kwargs,
|
590
701
|
)
|
591
702
|
|
592
|
-
gdf_points = self.points_gdf.to_crs(handler.config.crs)
|
593
703
|
self.logger.info("Loading GHSL SMOD raster tiles")
|
594
704
|
tif_processors = handler.load_data(
|
595
|
-
|
705
|
+
self.points_gdf.copy(), ensure_available=self.config.ensure_available
|
596
706
|
)
|
597
707
|
|
598
708
|
return self.map_zonal_stats(
|
599
709
|
data=tif_processors,
|
600
|
-
stat=stat, # Use median for categorical data
|
601
710
|
output_column=output_column,
|
602
711
|
**kwargs,
|
603
712
|
)
|
604
713
|
|
714
|
+
def map_wp_pop(
|
715
|
+
self,
|
716
|
+
country: Union[str, List[str]],
|
717
|
+
map_radius_meters: float,
|
718
|
+
resolution=1000,
|
719
|
+
predicate: Literal[
|
720
|
+
"centroid_within", "intersects", "fractional", "within"
|
721
|
+
] = "fractional",
|
722
|
+
output_column: str = "population",
|
723
|
+
**kwargs,
|
724
|
+
):
|
725
|
+
if isinstance(country, str):
|
726
|
+
country = [country]
|
727
|
+
|
728
|
+
handler = WPPopulationHandler(
|
729
|
+
project="pop", resolution=resolution, data_store=self.data_store, **kwargs
|
730
|
+
)
|
731
|
+
|
732
|
+
self.logger.info(
|
733
|
+
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
734
|
+
)
|
735
|
+
|
736
|
+
if predicate == "fractional" and resolution == 100:
|
737
|
+
self.logger.warning(
|
738
|
+
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
739
|
+
)
|
740
|
+
predicate = "intersects"
|
741
|
+
|
742
|
+
if predicate == "centroid_within":
|
743
|
+
data = []
|
744
|
+
for c in country:
|
745
|
+
data.extend(
|
746
|
+
handler.load_data(c, ensure_available=self.config.ensure_available)
|
747
|
+
)
|
748
|
+
else:
|
749
|
+
data = pd.concat(
|
750
|
+
[
|
751
|
+
handler.load_into_geodataframe(
|
752
|
+
c, ensure_available=self.config.ensure_available
|
753
|
+
)
|
754
|
+
for c in country
|
755
|
+
],
|
756
|
+
ignore_index=True,
|
757
|
+
)
|
758
|
+
|
759
|
+
self.logger.info(f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic")
|
760
|
+
|
761
|
+
return self.map_zonal_stats(
|
762
|
+
data,
|
763
|
+
stat="sum",
|
764
|
+
map_radius_meters=map_radius_meters,
|
765
|
+
value_column="pixel_value",
|
766
|
+
predicate=predicate,
|
767
|
+
output_column=output_column,
|
768
|
+
**kwargs
|
769
|
+
)
|
770
|
+
|
605
771
|
def save_view(
|
606
772
|
self,
|
607
773
|
name: str,
|
608
774
|
output_format: Optional[str] = None,
|
609
775
|
) -> Path:
|
610
776
|
"""
|
611
|
-
Saves the current POI view (the enriched
|
777
|
+
Saves the current POI view (the enriched DataFrame) to a file.
|
612
778
|
|
613
|
-
The output path and format are determined by the `
|
779
|
+
The output path and format are determined by the `config`
|
614
780
|
or overridden by the `output_format` parameter.
|
615
781
|
|
616
782
|
Args:
|
617
783
|
name (str): The base name for the output file (without extension).
|
618
784
|
output_format (Optional[str]):
|
619
785
|
The desired output format (e.g., "csv", "geojson"). If None,
|
620
|
-
the `output_format` from `
|
786
|
+
the `output_format` from `config` will be used.
|
621
787
|
|
622
788
|
Returns:
|
623
789
|
Path: The full path to the saved output file.
|
624
790
|
"""
|
625
|
-
format_to_use = output_format or self.
|
626
|
-
output_path = self.
|
791
|
+
format_to_use = output_format or self.config.output_format
|
792
|
+
output_path = self.config.base_path / f"{name}.{format_to_use}"
|
627
793
|
|
628
794
|
self.logger.info(f"Saving POI view to {output_path}")
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
795
|
+
# Save the current view, which is a pandas DataFrame, not a GeoDataFrame
|
796
|
+
# GeoJSON/Shapefile formats would require converting back to GeoDataFrame first.
|
797
|
+
# For CSV, Parquet, Feather, this is fine.
|
798
|
+
if format_to_use in ["geojson", "shp", "gpkg"]:
|
799
|
+
self.logger.warning(
|
800
|
+
f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
|
801
|
+
)
|
802
|
+
# Re-add geometry for saving to geospatial formats
|
803
|
+
view_to_save_gdf = self.view.merge(
|
804
|
+
self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
|
805
|
+
)
|
806
|
+
write_dataset(
|
807
|
+
data=view_to_save_gdf,
|
808
|
+
path=str(output_path),
|
809
|
+
data_store=self.data_store,
|
810
|
+
)
|
811
|
+
else:
|
812
|
+
write_dataset(
|
813
|
+
data=self.view, # Use the internal _view DataFrame
|
814
|
+
path=str(output_path),
|
815
|
+
data_store=self.data_store,
|
816
|
+
)
|
635
817
|
|
636
818
|
return output_path
|
819
|
+
|
820
|
+
def to_dataframe(self) -> pd.DataFrame:
|
821
|
+
"""
|
822
|
+
Returns the current POI view as a DataFrame.
|
823
|
+
|
824
|
+
This method combines all accumulated variables in the view
|
825
|
+
|
826
|
+
Returns:
|
827
|
+
pd.DataFrame: The current view.
|
828
|
+
"""
|
829
|
+
return self.view
|
830
|
+
|
831
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
832
|
+
"""
|
833
|
+
Returns the current POI view merged with the original point geometries as a GeoDataFrame.
|
834
|
+
|
835
|
+
This method combines all accumulated variables in the view with the corresponding
|
836
|
+
point geometries, providing a spatially-enabled DataFrame for further analysis or export.
|
837
|
+
|
838
|
+
Returns:
|
839
|
+
gpd.GeoDataFrame: The current view merged with point geometries.
|
840
|
+
"""
|
841
|
+
return gpd.GeoDataFrame(
|
842
|
+
self.view.merge(
|
843
|
+
self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
|
844
|
+
),
|
845
|
+
crs="EPSG:4326",
|
846
|
+
)
|
847
|
+
|
848
|
+
def chain_operations(self, operations: List[dict]) -> "PoiViewGenerator":
|
849
|
+
"""
|
850
|
+
Chain multiple mapping operations for fluent interface.
|
851
|
+
|
852
|
+
Args:
|
853
|
+
operations: List of dicts with 'method' and 'kwargs' keys
|
854
|
+
|
855
|
+
Example:
|
856
|
+
generator.chain_operations([
|
857
|
+
{'method': 'map_google_buildings', 'kwargs': {}},
|
858
|
+
{'method': 'map_built_s', 'kwargs': {'map_radius_meters': 200}},
|
859
|
+
])
|
860
|
+
"""
|
861
|
+
for op in operations:
|
862
|
+
method_name = op["method"]
|
863
|
+
kwargs = op.get("kwargs", {})
|
864
|
+
if hasattr(self, method_name):
|
865
|
+
getattr(self, method_name)(**kwargs)
|
866
|
+
else:
|
867
|
+
raise AttributeError(f"Method {method_name} not found")
|
868
|
+
return self
|
869
|
+
|
870
|
+
def validate_data_coverage(self, data_bounds: gpd.GeoDataFrame) -> dict:
|
871
|
+
"""
|
872
|
+
Validate how many POIs fall within the data coverage area.
|
873
|
+
|
874
|
+
Returns:
|
875
|
+
dict: Coverage statistics
|
876
|
+
"""
|
877
|
+
poi_within = self.points_gdf.within(data_bounds.union_all())
|
878
|
+
coverage_stats = {
|
879
|
+
"total_pois": len(self.points_gdf),
|
880
|
+
"covered_pois": poi_within.sum(),
|
881
|
+
"coverage_percentage": (poi_within.sum() / len(self.points_gdf)) * 100,
|
882
|
+
"uncovered_pois": (~poi_within).sum(),
|
883
|
+
}
|
884
|
+
return coverage_stats
|
@@ -1,3 +1,4 @@
|
|
1
1
|
from gigaspatial.generators.zonal.base import ZonalViewGeneratorConfig
|
2
2
|
from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
|
3
|
-
from gigaspatial.generators.
|
3
|
+
from gigaspatial.generators.zonal.mercator import MercatorViewGenerator
|
4
|
+
from gigaspatial.generators.zonal.admin import AdminBoundariesViewGenerator
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from gigaspatial.core.io.data_store import DataStore
|
7
|
+
from gigaspatial.handlers.boundaries import AdminBoundaries
|
8
|
+
from gigaspatial.generators.zonal.base import (
|
9
|
+
ZonalViewGeneratorConfig,
|
10
|
+
T,
|
11
|
+
)
|
12
|
+
from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
|
13
|
+
|
14
|
+
|
15
|
+
class AdminBoundariesViewGenerator(GeometryBasedZonalViewGenerator[T]):
|
16
|
+
"""
|
17
|
+
Generates zonal views using administrative boundaries as the zones.
|
18
|
+
|
19
|
+
This class specializes in creating zonal views where the zones are defined by
|
20
|
+
administrative boundaries (e.g., countries, states, districts) at a specified
|
21
|
+
administrative level. It extends the `GeometryBasedZonalViewGenerator` and
|
22
|
+
leverages the `AdminBoundaries` handler to load the necessary geographical data.
|
23
|
+
|
24
|
+
The administrative boundaries serve as the base geometries to which other
|
25
|
+
geospatial data (points, polygons, rasters) can be mapped and aggregated.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
country (str): The name or code of the country for which to load administrative boundaries.
|
29
|
+
admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states/provinces).
|
30
|
+
admin_path (Union[str, Path], optional): Optional path to a local GeoJSON/Shapefile
|
31
|
+
containing the administrative boundaries. If provided, this local file will be
|
32
|
+
used instead of downloading.
|
33
|
+
config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generation process.
|
34
|
+
data_store (Optional[DataStore]): A DataStore instance for accessing data.
|
35
|
+
logger (Optional[logging.Logger]): A logger instance for logging messages.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
country: str,
|
41
|
+
admin_level: int,
|
42
|
+
data_store: Optional[DataStore] = None,
|
43
|
+
admin_path: Optional[Union[str, Path]] = None,
|
44
|
+
config: Optional[ZonalViewGeneratorConfig] = None,
|
45
|
+
logger: logging.Logger = None,
|
46
|
+
):
|
47
|
+
"""
|
48
|
+
Initializes the AdminBoundariesViewGenerator.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
country (str): The name or code of the country (e.g., "USA", "Germany").
|
52
|
+
admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states, 2 for districts).
|
53
|
+
admin_path (Union[str, Path], optional): Path to a local administrative boundaries file (GeoJSON, Shapefile).
|
54
|
+
If provided, overrides default data loading.
|
55
|
+
config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generator.
|
56
|
+
If None, a default config will be used.
|
57
|
+
data_store (Optional[DataStore]): Data storage interface. If None, LocalDataStore is used.
|
58
|
+
logger (Optional[logging.Logger]): Custom logger instance. If None, a default logger is used.
|
59
|
+
"""
|
60
|
+
|
61
|
+
super().__init__(
|
62
|
+
zone_data=self._init_zone_data(
|
63
|
+
country, admin_level, data_store, admin_path
|
64
|
+
),
|
65
|
+
zone_id_column="id",
|
66
|
+
config=config,
|
67
|
+
data_store=data_store,
|
68
|
+
logger=logger,
|
69
|
+
)
|
70
|
+
self.logger.info(
|
71
|
+
f"Initialized AdminBoundariesViewGenerator for {country} (level {admin_level})"
|
72
|
+
)
|
73
|
+
|
74
|
+
def _init_zone_data(
|
75
|
+
self,
|
76
|
+
country,
|
77
|
+
admin_level,
|
78
|
+
data_store: Optional[DataStore] = None,
|
79
|
+
admin_path: Optional[Union[str, Path]] = None,
|
80
|
+
):
|
81
|
+
gdf_boundaries = AdminBoundaries.create(
|
82
|
+
country, admin_level, data_store, admin_path
|
83
|
+
).to_geodataframe()
|
84
|
+
return gdf_boundaries
|