giga-spatial 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/METADATA +2 -1
- giga_spatial-0.6.5.dist-info/RECORD +50 -0
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +35 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/database.py +316 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +228 -43
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +221 -64
- gigaspatial/generators/zonal/geometry.py +74 -31
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/__init__.py +5 -1
- gigaspatial/handlers/boundaries.py +226 -48
- gigaspatial/handlers/ghsl.py +79 -14
- gigaspatial/handlers/giga.py +641 -0
- gigaspatial/handlers/hdx.py +50 -51
- gigaspatial/handlers/maxar_image.py +1 -2
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +87 -25
- gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.3.dist-info/RECORD +0 -47
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.3.dist-info → giga_spatial-0.6.5.dist-info}/top_level.txt +0 -0
gigaspatial/generators/poi.py
CHANGED
@@ -18,6 +18,7 @@ from gigaspatial.processing.geo import (
|
|
18
18
|
buffer_geodataframe,
|
19
19
|
detect_coordinate_columns,
|
20
20
|
aggregate_polygons_to_zones,
|
21
|
+
get_centroids,
|
21
22
|
)
|
22
23
|
from gigaspatial.processing.tif_processor import (
|
23
24
|
sample_multiple_tifs_by_polygons,
|
@@ -63,6 +64,7 @@ class PoiViewGenerator:
|
|
63
64
|
points: Union[
|
64
65
|
List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
|
65
66
|
],
|
67
|
+
poi_id_column: str = "poi_id",
|
66
68
|
config: Optional[PoiViewGeneratorConfig] = None,
|
67
69
|
data_store: Optional[DataStore] = None,
|
68
70
|
logger: logging.Logger = None,
|
@@ -87,16 +89,21 @@ class PoiViewGenerator:
|
|
87
89
|
An instance of a data store for managing data access (e.g., LocalDataStore).
|
88
90
|
If None, a default `LocalDataStore` will be used.
|
89
91
|
"""
|
92
|
+
if hasattr(points, "__len__") and len(points) == 0:
|
93
|
+
raise ValueError("Points input cannot be empty")
|
94
|
+
|
90
95
|
self.config = config or PoiViewGeneratorConfig()
|
91
96
|
self.data_store = data_store or LocalDataStore()
|
92
97
|
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
93
|
-
self._points_gdf = self._init_points_gdf(points)
|
98
|
+
self._points_gdf = self._init_points_gdf(points, poi_id_column)
|
99
|
+
self._view: pd.DataFrame = self._points_gdf.drop(columns=["geometry"])
|
94
100
|
|
95
101
|
@staticmethod
|
96
102
|
def _init_points_gdf(
|
97
103
|
points: Union[
|
98
104
|
List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
|
99
105
|
],
|
106
|
+
poi_id_column: str,
|
100
107
|
) -> gpd.GeoDataFrame:
|
101
108
|
"""
|
102
109
|
Internal static method to convert various point input formats into a GeoDataFrame.
|
@@ -125,8 +132,19 @@ class PoiViewGenerator:
|
|
125
132
|
points = points.copy()
|
126
133
|
points["latitude"] = points.geometry.y
|
127
134
|
points["longitude"] = points.geometry.x
|
128
|
-
if
|
135
|
+
if poi_id_column not in points.columns:
|
129
136
|
points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
137
|
+
else:
|
138
|
+
points = points.rename(
|
139
|
+
columns={poi_id_column: "poi_id"},
|
140
|
+
)
|
141
|
+
if points["poi_id"].duplicated().any():
|
142
|
+
raise ValueError(
|
143
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
144
|
+
)
|
145
|
+
|
146
|
+
if points.crs != "EPSG:4326":
|
147
|
+
points = points.to_crs("EPSG:4326")
|
130
148
|
return points
|
131
149
|
|
132
150
|
elif isinstance(points, pd.DataFrame):
|
@@ -136,8 +154,16 @@ class PoiViewGenerator:
|
|
136
154
|
points = points.copy()
|
137
155
|
points["latitude"] = points[lat_col]
|
138
156
|
points["longitude"] = points[lon_col]
|
139
|
-
if
|
157
|
+
if poi_id_column not in points.columns:
|
140
158
|
points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
159
|
+
else:
|
160
|
+
points = points.rename(
|
161
|
+
columns={poi_id_column: "poi_id"},
|
162
|
+
)
|
163
|
+
if points["poi_id"].duplicated().any():
|
164
|
+
raise ValueError(
|
165
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
166
|
+
)
|
141
167
|
return convert_to_geodataframe(points)
|
142
168
|
except ValueError as e:
|
143
169
|
raise ValueError(
|
@@ -165,8 +191,16 @@ class PoiViewGenerator:
|
|
165
191
|
lat_col, lon_col = detect_coordinate_columns(df)
|
166
192
|
df["latitude"] = df[lat_col]
|
167
193
|
df["longitude"] = df[lon_col]
|
168
|
-
if
|
194
|
+
if poi_id_column not in df.columns:
|
169
195
|
df["poi_id"] = [f"poi_{i}" for i in range(len(points))]
|
196
|
+
else:
|
197
|
+
df = df.rename(
|
198
|
+
columns={poi_id_column: "poi_id"},
|
199
|
+
)
|
200
|
+
if df["poi_id"].duplicated().any():
|
201
|
+
raise ValueError(
|
202
|
+
f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
|
203
|
+
)
|
170
204
|
return convert_to_geodataframe(df)
|
171
205
|
except ValueError as e:
|
172
206
|
raise ValueError(
|
@@ -180,6 +214,53 @@ class PoiViewGenerator:
|
|
180
214
|
"""Gets the internal GeoDataFrame of points of interest."""
|
181
215
|
return self._points_gdf
|
182
216
|
|
217
|
+
@property
|
218
|
+
def view(self) -> pd.DataFrame:
|
219
|
+
"""The DataFrame representing the current point of interest view."""
|
220
|
+
return self._view
|
221
|
+
|
222
|
+
def _update_view(self, new_data: pd.DataFrame) -> None:
|
223
|
+
"""
|
224
|
+
Internal helper to update the main view DataFrame with new columns.
|
225
|
+
This method is designed to be called by map_* methods.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
new_data (pd.DataFrame): A DataFrame containing 'poi_id' and new columns
|
229
|
+
to be merged into the main view.
|
230
|
+
"""
|
231
|
+
if "poi_id" not in new_data.columns:
|
232
|
+
available_cols = list(new_data.columns)
|
233
|
+
raise ValueError(
|
234
|
+
f"new_data DataFrame must contain 'poi_id' column. "
|
235
|
+
f"Available columns: {available_cols}"
|
236
|
+
)
|
237
|
+
|
238
|
+
# Check for poi_id mismatches
|
239
|
+
original_poi_ids = set(self._view["poi_id"])
|
240
|
+
new_poi_ids = set(new_data["poi_id"])
|
241
|
+
missing_pois = original_poi_ids - new_poi_ids
|
242
|
+
|
243
|
+
if missing_pois:
|
244
|
+
self.logger.warning(
|
245
|
+
f"{len(missing_pois)} POIs will have NaN values for new columns"
|
246
|
+
)
|
247
|
+
|
248
|
+
# Ensure poi_id is the index for efficient merging
|
249
|
+
# Create a copy to avoid SettingWithCopyWarning if new_data is a slice
|
250
|
+
new_data_indexed = new_data.set_index("poi_id").copy()
|
251
|
+
|
252
|
+
# Merge on 'poi_id' (which is now the index of self._view and new_data_indexed)
|
253
|
+
# Using left join to keep all POIs from the original view
|
254
|
+
self._view = (
|
255
|
+
self._view.set_index("poi_id")
|
256
|
+
.join(new_data_indexed, how="left")
|
257
|
+
.reset_index()
|
258
|
+
)
|
259
|
+
|
260
|
+
self.logger.debug(
|
261
|
+
f"View updated with columns: {list(new_data_indexed.columns)}"
|
262
|
+
)
|
263
|
+
|
183
264
|
def map_nearest_points(
|
184
265
|
self,
|
185
266
|
points_df: Union[pd.DataFrame, gpd.GeoDataFrame],
|
@@ -228,7 +309,7 @@ class PoiViewGenerator:
|
|
228
309
|
# Validate input DataFrame
|
229
310
|
if points_df.empty:
|
230
311
|
self.logger.info("No points found in the input DataFrame")
|
231
|
-
return self.
|
312
|
+
return self.view
|
232
313
|
|
233
314
|
# Handle GeoDataFrame
|
234
315
|
if isinstance(points_df, gpd.GeoDataFrame):
|
@@ -275,14 +356,19 @@ class PoiViewGenerator:
|
|
275
356
|
lat2=df_nearest[lat_column],
|
276
357
|
lon2=df_nearest[lon_column],
|
277
358
|
)
|
278
|
-
|
279
|
-
|
280
|
-
|
359
|
+
# Create a temporary DataFrame to hold the results for merging
|
360
|
+
temp_result_df = pd.DataFrame(
|
361
|
+
{
|
362
|
+
"poi_id": points_df_poi["poi_id"],
|
363
|
+
f"{output_prefix}_id": points_df.iloc[idx][id_column].values,
|
364
|
+
f"{output_prefix}_distance": dist,
|
365
|
+
}
|
366
|
+
)
|
367
|
+
self._update_view(temp_result_df)
|
281
368
|
self.logger.info(
|
282
369
|
f"Nearest points mapping complete with prefix '{output_prefix}'"
|
283
370
|
)
|
284
|
-
self.
|
285
|
-
return result
|
371
|
+
return self.view
|
286
372
|
|
287
373
|
def map_google_buildings(
|
288
374
|
self,
|
@@ -316,7 +402,7 @@ class PoiViewGenerator:
|
|
316
402
|
)
|
317
403
|
if buildings_df is None or len(buildings_df) == 0:
|
318
404
|
self.logger.info("No Google buildings data found for the provided POIs")
|
319
|
-
return self.
|
405
|
+
return self.view
|
320
406
|
|
321
407
|
return self.map_nearest_points(
|
322
408
|
points_df=buildings_df,
|
@@ -359,16 +445,17 @@ class PoiViewGenerator:
|
|
359
445
|
self.logger.info("No Microsoft buildings data found for the provided POIs")
|
360
446
|
return self.points_gdf.copy()
|
361
447
|
|
448
|
+
building_centroids = get_centroids(buildings_gdf)
|
449
|
+
|
362
450
|
if "building_id" not in buildings_gdf:
|
363
451
|
self.logger.info("Creating building IDs from coordinates")
|
364
|
-
|
365
|
-
buildings_gdf["building_id"] = buildings_gdf.apply(
|
452
|
+
building_centroids["building_id"] = building_centroids.apply(
|
366
453
|
lambda row: f"{row.geometry.y:.6f}_{row.geometry.x:.6f}",
|
367
454
|
axis=1,
|
368
455
|
)
|
369
456
|
|
370
457
|
return self.map_nearest_points(
|
371
|
-
points_df=
|
458
|
+
points_df=building_centroids,
|
372
459
|
id_column="building_id",
|
373
460
|
output_prefix="nearest_ms_building",
|
374
461
|
**kwargs,
|
@@ -424,35 +511,52 @@ class PoiViewGenerator:
|
|
424
511
|
ValueError: If no valid data is provided, if parameters are incompatible,
|
425
512
|
or if required parameters (value_column) are missing for polygon data.
|
426
513
|
"""
|
514
|
+
|
427
515
|
if isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
|
516
|
+
results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
|
517
|
+
|
428
518
|
# Handle raster data
|
429
519
|
if not data:
|
430
520
|
self.logger.info("No valid raster data found for the provided POIs")
|
431
|
-
return self.
|
521
|
+
return self.view
|
522
|
+
|
523
|
+
raster_crs = data[0].crs
|
524
|
+
|
525
|
+
if not all(tp.crs == raster_crs for tp in data):
|
526
|
+
raise ValueError(
|
527
|
+
"All TifProcessors must have the same CRS for zonal statistics."
|
528
|
+
)
|
432
529
|
|
433
530
|
if map_radius_meters is not None:
|
434
531
|
self.logger.info(
|
435
532
|
f"Calculating {stat} within {map_radius_meters}m buffers around POIs"
|
436
533
|
)
|
437
534
|
# Create buffers around POIs
|
438
|
-
|
535
|
+
buffers_gdf = buffer_geodataframe(
|
439
536
|
self.points_gdf,
|
440
537
|
buffer_distance_meters=map_radius_meters,
|
441
538
|
cap_style="round",
|
442
|
-
)
|
539
|
+
)
|
443
540
|
|
444
541
|
# Calculate zonal statistics
|
445
542
|
sampled_values = sample_multiple_tifs_by_polygons(
|
446
|
-
tif_processors=data,
|
543
|
+
tif_processors=data,
|
544
|
+
polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
|
545
|
+
stat=stat,
|
546
|
+
**kwargs,
|
447
547
|
)
|
448
548
|
else:
|
449
549
|
self.logger.info(f"Sampling {stat} at POI locations")
|
450
550
|
# Sample directly at POI locations
|
451
|
-
coord_list =
|
551
|
+
coord_list = (
|
552
|
+
self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
|
553
|
+
)
|
452
554
|
sampled_values = sample_multiple_tifs_by_coordinates(
|
453
555
|
tif_processors=data, coordinate_list=coord_list, **kwargs
|
454
556
|
)
|
455
557
|
|
558
|
+
results_df[output_column] = sampled_values
|
559
|
+
|
456
560
|
elif isinstance(data, gpd.GeoDataFrame):
|
457
561
|
# Handle polygon data
|
458
562
|
if data.empty:
|
@@ -465,6 +569,11 @@ class PoiViewGenerator:
|
|
465
569
|
if value_column is None:
|
466
570
|
raise ValueError("value_column must be provided for polygon data")
|
467
571
|
|
572
|
+
if value_column not in data.columns:
|
573
|
+
raise ValueError(
|
574
|
+
f"Value column '{value_column}' not found in input polygon GeoDataFrame."
|
575
|
+
)
|
576
|
+
|
468
577
|
self.logger.info(
|
469
578
|
f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
|
470
579
|
)
|
@@ -477,7 +586,7 @@ class PoiViewGenerator:
|
|
477
586
|
)
|
478
587
|
|
479
588
|
# Aggregate polygons to buffers
|
480
|
-
|
589
|
+
aggregation_result_gdf = aggregate_polygons_to_zones(
|
481
590
|
polygons=data,
|
482
591
|
zones=buffer_gdf,
|
483
592
|
value_columns=value_column,
|
@@ -487,19 +596,18 @@ class PoiViewGenerator:
|
|
487
596
|
**kwargs,
|
488
597
|
)
|
489
598
|
|
490
|
-
|
491
|
-
sampled_values = result[value_column].values
|
599
|
+
results_df = aggregation_result_gdf[["poi_id", value_column]].copy()
|
492
600
|
|
493
601
|
else:
|
494
602
|
raise ValueError(
|
495
603
|
"data must be either a list of TifProcessor objects or a GeoDataFrame"
|
496
604
|
)
|
497
605
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
return
|
606
|
+
self._update_view(results_df)
|
607
|
+
self.logger.info(
|
608
|
+
f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
|
609
|
+
)
|
610
|
+
return self.view
|
503
611
|
|
504
612
|
def map_built_s(
|
505
613
|
self,
|
@@ -539,10 +647,9 @@ class PoiViewGenerator:
|
|
539
647
|
data_store=self.data_store,
|
540
648
|
**kwargs,
|
541
649
|
)
|
542
|
-
gdf_points = self.points_gdf.to_crs(handler.config.crs)
|
543
650
|
self.logger.info("Loading GHSL Built Surface raster tiles")
|
544
651
|
tif_processors = handler.load_data(
|
545
|
-
|
652
|
+
self.points_gdf.copy(), ensure_available=self.config.ensure_available
|
546
653
|
)
|
547
654
|
|
548
655
|
return self.map_zonal_stats(
|
@@ -557,7 +664,7 @@ class PoiViewGenerator:
|
|
557
664
|
self,
|
558
665
|
stat="median",
|
559
666
|
dataset_year=2020,
|
560
|
-
dataset_resolution=
|
667
|
+
dataset_resolution=1000,
|
561
668
|
output_column="smod_class",
|
562
669
|
**kwargs,
|
563
670
|
) -> pd.DataFrame:
|
@@ -589,10 +696,9 @@ class PoiViewGenerator:
|
|
589
696
|
**kwargs,
|
590
697
|
)
|
591
698
|
|
592
|
-
gdf_points = self.points_gdf.to_crs(handler.config.crs)
|
593
699
|
self.logger.info("Loading GHSL SMOD raster tiles")
|
594
700
|
tif_processors = handler.load_data(
|
595
|
-
|
701
|
+
self.points_gdf.copy(), ensure_available=self.config.ensure_available
|
596
702
|
)
|
597
703
|
|
598
704
|
return self.map_zonal_stats(
|
@@ -608,29 +714,108 @@ class PoiViewGenerator:
|
|
608
714
|
output_format: Optional[str] = None,
|
609
715
|
) -> Path:
|
610
716
|
"""
|
611
|
-
Saves the current POI view (the enriched
|
717
|
+
Saves the current POI view (the enriched DataFrame) to a file.
|
612
718
|
|
613
|
-
The output path and format are determined by the `
|
719
|
+
The output path and format are determined by the `config`
|
614
720
|
or overridden by the `output_format` parameter.
|
615
721
|
|
616
722
|
Args:
|
617
723
|
name (str): The base name for the output file (without extension).
|
618
724
|
output_format (Optional[str]):
|
619
725
|
The desired output format (e.g., "csv", "geojson"). If None,
|
620
|
-
the `output_format` from `
|
726
|
+
the `output_format` from `config` will be used.
|
621
727
|
|
622
728
|
Returns:
|
623
729
|
Path: The full path to the saved output file.
|
624
730
|
"""
|
625
|
-
format_to_use = output_format or self.
|
626
|
-
output_path = self.
|
731
|
+
format_to_use = output_format or self.config.output_format
|
732
|
+
output_path = self.config.base_path / f"{name}.{format_to_use}"
|
627
733
|
|
628
734
|
self.logger.info(f"Saving POI view to {output_path}")
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
735
|
+
# Save the current view, which is a pandas DataFrame, not a GeoDataFrame
|
736
|
+
# GeoJSON/Shapefile formats would require converting back to GeoDataFrame first.
|
737
|
+
# For CSV, Parquet, Feather, this is fine.
|
738
|
+
if format_to_use in ["geojson", "shp", "gpkg"]:
|
739
|
+
self.logger.warning(
|
740
|
+
f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
|
741
|
+
)
|
742
|
+
# Re-add geometry for saving to geospatial formats
|
743
|
+
view_to_save_gdf = self.view.merge(
|
744
|
+
self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
|
745
|
+
)
|
746
|
+
write_dataset(
|
747
|
+
data=view_to_save_gdf,
|
748
|
+
path=str(output_path),
|
749
|
+
data_store=self.data_store,
|
750
|
+
)
|
751
|
+
else:
|
752
|
+
write_dataset(
|
753
|
+
data=self.view, # Use the internal _view DataFrame
|
754
|
+
path=str(output_path),
|
755
|
+
data_store=self.data_store,
|
756
|
+
)
|
635
757
|
|
636
758
|
return output_path
|
759
|
+
|
760
|
+
def to_dataframe(self) -> pd.DataFrame:
|
761
|
+
"""
|
762
|
+
Returns the current POI view as a DataFrame.
|
763
|
+
|
764
|
+
This method combines all accumulated variables in the view
|
765
|
+
|
766
|
+
Returns:
|
767
|
+
pd.DataFrame: The current view.
|
768
|
+
"""
|
769
|
+
return self.view
|
770
|
+
|
771
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
772
|
+
"""
|
773
|
+
Returns the current POI view merged with the original point geometries as a GeoDataFrame.
|
774
|
+
|
775
|
+
This method combines all accumulated variables in the view with the corresponding
|
776
|
+
point geometries, providing a spatially-enabled DataFrame for further analysis or export.
|
777
|
+
|
778
|
+
Returns:
|
779
|
+
gpd.GeoDataFrame: The current view merged with point geometries.
|
780
|
+
"""
|
781
|
+
return self.view.merge(
|
782
|
+
self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
|
783
|
+
)
|
784
|
+
|
785
|
+
def chain_operations(self, operations: List[dict]) -> "PoiViewGenerator":
|
786
|
+
"""
|
787
|
+
Chain multiple mapping operations for fluent interface.
|
788
|
+
|
789
|
+
Args:
|
790
|
+
operations: List of dicts with 'method' and 'kwargs' keys
|
791
|
+
|
792
|
+
Example:
|
793
|
+
generator.chain_operations([
|
794
|
+
{'method': 'map_google_buildings', 'kwargs': {}},
|
795
|
+
{'method': 'map_built_s', 'kwargs': {'map_radius_meters': 200}},
|
796
|
+
])
|
797
|
+
"""
|
798
|
+
for op in operations:
|
799
|
+
method_name = op["method"]
|
800
|
+
kwargs = op.get("kwargs", {})
|
801
|
+
if hasattr(self, method_name):
|
802
|
+
getattr(self, method_name)(**kwargs)
|
803
|
+
else:
|
804
|
+
raise AttributeError(f"Method {method_name} not found")
|
805
|
+
return self
|
806
|
+
|
807
|
+
def validate_data_coverage(self, data_bounds: gpd.GeoDataFrame) -> dict:
|
808
|
+
"""
|
809
|
+
Validate how many POIs fall within the data coverage area.
|
810
|
+
|
811
|
+
Returns:
|
812
|
+
dict: Coverage statistics
|
813
|
+
"""
|
814
|
+
poi_within = self.points_gdf.within(data_bounds.union_all())
|
815
|
+
coverage_stats = {
|
816
|
+
"total_pois": len(self.points_gdf),
|
817
|
+
"covered_pois": poi_within.sum(),
|
818
|
+
"coverage_percentage": (poi_within.sum() / len(self.points_gdf)) * 100,
|
819
|
+
"uncovered_pois": (~poi_within).sum(),
|
820
|
+
}
|
821
|
+
return coverage_stats
|
@@ -1,3 +1,4 @@
|
|
1
1
|
from gigaspatial.generators.zonal.base import ZonalViewGeneratorConfig
|
2
2
|
from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
|
3
|
-
from gigaspatial.generators.
|
3
|
+
from gigaspatial.generators.zonal.mercator import MercatorViewGenerator
|
4
|
+
from gigaspatial.generators.zonal.admin import AdminBoundariesViewGenerator
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from gigaspatial.core.io.data_store import DataStore
|
7
|
+
from gigaspatial.handlers.boundaries import AdminBoundaries
|
8
|
+
from gigaspatial.generators.zonal.base import (
|
9
|
+
ZonalViewGeneratorConfig,
|
10
|
+
T,
|
11
|
+
)
|
12
|
+
from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
|
13
|
+
|
14
|
+
|
15
|
+
class AdminBoundariesViewGenerator(GeometryBasedZonalViewGenerator[T]):
|
16
|
+
"""
|
17
|
+
Generates zonal views using administrative boundaries as the zones.
|
18
|
+
|
19
|
+
This class specializes in creating zonal views where the zones are defined by
|
20
|
+
administrative boundaries (e.g., countries, states, districts) at a specified
|
21
|
+
administrative level. It extends the `GeometryBasedZonalViewGenerator` and
|
22
|
+
leverages the `AdminBoundaries` handler to load the necessary geographical data.
|
23
|
+
|
24
|
+
The administrative boundaries serve as the base geometries to which other
|
25
|
+
geospatial data (points, polygons, rasters) can be mapped and aggregated.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
country (str): The name or code of the country for which to load administrative boundaries.
|
29
|
+
admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states/provinces).
|
30
|
+
admin_path (Union[str, Path], optional): Optional path to a local GeoJSON/Shapefile
|
31
|
+
containing the administrative boundaries. If provided, this local file will be
|
32
|
+
used instead of downloading.
|
33
|
+
config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generation process.
|
34
|
+
data_store (Optional[DataStore]): A DataStore instance for accessing data.
|
35
|
+
logger (Optional[logging.Logger]): A logger instance for logging messages.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
country: str,
|
41
|
+
admin_level: int,
|
42
|
+
data_store: Optional[DataStore] = None,
|
43
|
+
admin_path: Optional[Union[str, Path]] = None,
|
44
|
+
config: Optional[ZonalViewGeneratorConfig] = None,
|
45
|
+
logger: logging.Logger = None,
|
46
|
+
):
|
47
|
+
"""
|
48
|
+
Initializes the AdminBoundariesViewGenerator.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
country (str): The name or code of the country (e.g., "USA", "Germany").
|
52
|
+
admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states, 2 for districts).
|
53
|
+
admin_path (Union[str, Path], optional): Path to a local administrative boundaries file (GeoJSON, Shapefile).
|
54
|
+
If provided, overrides default data loading.
|
55
|
+
config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generator.
|
56
|
+
If None, a default config will be used.
|
57
|
+
data_store (Optional[DataStore]): Data storage interface. If None, LocalDataStore is used.
|
58
|
+
logger (Optional[logging.Logger]): Custom logger instance. If None, a default logger is used.
|
59
|
+
"""
|
60
|
+
|
61
|
+
super().__init__(
|
62
|
+
zone_data=self._init_zone_data(
|
63
|
+
country, admin_level, data_store, admin_path
|
64
|
+
),
|
65
|
+
zone_id_column="id",
|
66
|
+
config=config,
|
67
|
+
data_store=data_store,
|
68
|
+
logger=logger,
|
69
|
+
)
|
70
|
+
self.logger.info(
|
71
|
+
f"Initialized AdminBoundariesViewGenerator for {country} (level {admin_level})"
|
72
|
+
)
|
73
|
+
|
74
|
+
def _init_zone_data(
|
75
|
+
self,
|
76
|
+
country,
|
77
|
+
admin_level,
|
78
|
+
data_store: Optional[DataStore] = None,
|
79
|
+
admin_path: Optional[Union[str, Path]] = None,
|
80
|
+
):
|
81
|
+
gdf_boundaries = AdminBoundaries.create(
|
82
|
+
country, admin_level, data_store, admin_path
|
83
|
+
).to_geodataframe()
|
84
|
+
return gdf_boundaries
|