giga-spatial 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import List, Optional, Union, Tuple
2
+ from typing import List, Optional, Union, Tuple, Literal
3
3
  from pydantic.dataclasses import dataclass, Field
4
4
 
5
5
  import geopandas as gpd
@@ -13,11 +13,13 @@ from gigaspatial.config import config as global_config
13
13
  from gigaspatial.handlers.google_open_buildings import GoogleOpenBuildingsHandler
14
14
  from gigaspatial.handlers.microsoft_global_buildings import MSBuildingsHandler
15
15
  from gigaspatial.handlers.ghsl import GHSLDataHandler
16
+ from gigaspatial.handlers.worldpop import WPPopulationHandler
16
17
  from gigaspatial.processing.geo import (
17
18
  convert_to_geodataframe,
18
19
  buffer_geodataframe,
19
20
  detect_coordinate_columns,
20
21
  aggregate_polygons_to_zones,
22
+ get_centroids,
21
23
  )
22
24
  from gigaspatial.processing.tif_processor import (
23
25
  sample_multiple_tifs_by_polygons,
@@ -63,6 +65,7 @@ class PoiViewGenerator:
63
65
  points: Union[
64
66
  List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
65
67
  ],
68
+ poi_id_column: str = "poi_id",
66
69
  config: Optional[PoiViewGeneratorConfig] = None,
67
70
  data_store: Optional[DataStore] = None,
68
71
  logger: logging.Logger = None,
@@ -87,16 +90,21 @@ class PoiViewGenerator:
87
90
  An instance of a data store for managing data access (e.g., LocalDataStore).
88
91
  If None, a default `LocalDataStore` will be used.
89
92
  """
93
+ if hasattr(points, "__len__") and len(points) == 0:
94
+ raise ValueError("Points input cannot be empty")
95
+
90
96
  self.config = config or PoiViewGeneratorConfig()
91
97
  self.data_store = data_store or LocalDataStore()
92
98
  self.logger = logger or global_config.get_logger(self.__class__.__name__)
93
- self._points_gdf = self._init_points_gdf(points)
99
+ self._points_gdf = self._init_points_gdf(points, poi_id_column)
100
+ self._view: pd.DataFrame = self._points_gdf.drop(columns=["geometry"])
94
101
 
95
102
  @staticmethod
96
103
  def _init_points_gdf(
97
104
  points: Union[
98
105
  List[Tuple[float, float]], List[dict], pd.DataFrame, gpd.GeoDataFrame
99
106
  ],
107
+ poi_id_column: str,
100
108
  ) -> gpd.GeoDataFrame:
101
109
  """
102
110
  Internal static method to convert various point input formats into a GeoDataFrame.
@@ -125,8 +133,19 @@ class PoiViewGenerator:
125
133
  points = points.copy()
126
134
  points["latitude"] = points.geometry.y
127
135
  points["longitude"] = points.geometry.x
128
- if "poi_id" not in points.columns:
136
+ if poi_id_column not in points.columns:
129
137
  points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
138
+ else:
139
+ points = points.rename(
140
+ columns={poi_id_column: "poi_id"},
141
+ )
142
+ if points["poi_id"].duplicated().any():
143
+ raise ValueError(
144
+ f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
145
+ )
146
+
147
+ if points.crs != "EPSG:4326":
148
+ points = points.to_crs("EPSG:4326")
130
149
  return points
131
150
 
132
151
  elif isinstance(points, pd.DataFrame):
@@ -136,8 +155,16 @@ class PoiViewGenerator:
136
155
  points = points.copy()
137
156
  points["latitude"] = points[lat_col]
138
157
  points["longitude"] = points[lon_col]
139
- if "poi_id" not in points.columns:
158
+ if poi_id_column not in points.columns:
140
159
  points["poi_id"] = [f"poi_{i}" for i in range(len(points))]
160
+ else:
161
+ points = points.rename(
162
+ columns={poi_id_column: "poi_id"},
163
+ )
164
+ if points["poi_id"].duplicated().any():
165
+ raise ValueError(
166
+ f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
167
+ )
141
168
  return convert_to_geodataframe(points)
142
169
  except ValueError as e:
143
170
  raise ValueError(
@@ -165,8 +192,16 @@ class PoiViewGenerator:
165
192
  lat_col, lon_col = detect_coordinate_columns(df)
166
193
  df["latitude"] = df[lat_col]
167
194
  df["longitude"] = df[lon_col]
168
- if "poi_id" not in df.columns:
195
+ if poi_id_column not in df.columns:
169
196
  df["poi_id"] = [f"poi_{i}" for i in range(len(points))]
197
+ else:
198
+ df = df.rename(
199
+ columns={poi_id_column: "poi_id"},
200
+ )
201
+ if df["poi_id"].duplicated().any():
202
+ raise ValueError(
203
+ f"Column '{poi_id_column}' provided as 'poi_id_column' contains duplicate values."
204
+ )
170
205
  return convert_to_geodataframe(df)
171
206
  except ValueError as e:
172
207
  raise ValueError(
@@ -180,6 +215,53 @@ class PoiViewGenerator:
180
215
  """Gets the internal GeoDataFrame of points of interest."""
181
216
  return self._points_gdf
182
217
 
218
+ @property
219
+ def view(self) -> pd.DataFrame:
220
+ """The DataFrame representing the current point of interest view."""
221
+ return self._view
222
+
223
+ def _update_view(self, new_data: pd.DataFrame) -> None:
224
+ """
225
+ Internal helper to update the main view DataFrame with new columns.
226
+ This method is designed to be called by map_* methods.
227
+
228
+ Args:
229
+ new_data (pd.DataFrame): A DataFrame containing 'poi_id' and new columns
230
+ to be merged into the main view.
231
+ """
232
+ if "poi_id" not in new_data.columns:
233
+ available_cols = list(new_data.columns)
234
+ raise ValueError(
235
+ f"new_data DataFrame must contain 'poi_id' column. "
236
+ f"Available columns: {available_cols}"
237
+ )
238
+
239
+ # Check for poi_id mismatches
240
+ original_poi_ids = set(self._view["poi_id"])
241
+ new_poi_ids = set(new_data["poi_id"])
242
+ missing_pois = original_poi_ids - new_poi_ids
243
+
244
+ if missing_pois:
245
+ self.logger.warning(
246
+ f"{len(missing_pois)} POIs will have NaN values for new columns"
247
+ )
248
+
249
+ # Ensure poi_id is the index for efficient merging
250
+ # Create a copy to avoid SettingWithCopyWarning if new_data is a slice
251
+ new_data_indexed = new_data.set_index("poi_id").copy()
252
+
253
+ # Merge on 'poi_id' (which is now the index of self._view and new_data_indexed)
254
+ # Using left join to keep all POIs from the original view
255
+ self._view = (
256
+ self._view.set_index("poi_id")
257
+ .join(new_data_indexed, how="left")
258
+ .reset_index()
259
+ )
260
+
261
+ self.logger.debug(
262
+ f"View updated with columns: {list(new_data_indexed.columns)}"
263
+ )
264
+
183
265
  def map_nearest_points(
184
266
  self,
185
267
  points_df: Union[pd.DataFrame, gpd.GeoDataFrame],
@@ -228,7 +310,7 @@ class PoiViewGenerator:
228
310
  # Validate input DataFrame
229
311
  if points_df.empty:
230
312
  self.logger.info("No points found in the input DataFrame")
231
- return self.points_gdf.copy()
313
+ return self.view
232
314
 
233
315
  # Handle GeoDataFrame
234
316
  if isinstance(points_df, gpd.GeoDataFrame):
@@ -275,14 +357,19 @@ class PoiViewGenerator:
275
357
  lat2=df_nearest[lat_column],
276
358
  lon2=df_nearest[lon_column],
277
359
  )
278
- result = points_df_poi.copy()
279
- result[f"{output_prefix}_id"] = df_nearest[id_column].to_numpy()
280
- result[f"{output_prefix}_distance"] = dist
360
+ # Create a temporary DataFrame to hold the results for merging
361
+ temp_result_df = pd.DataFrame(
362
+ {
363
+ "poi_id": points_df_poi["poi_id"],
364
+ f"{output_prefix}_id": points_df.iloc[idx][id_column].values,
365
+ f"{output_prefix}_distance": dist,
366
+ }
367
+ )
368
+ self._update_view(temp_result_df)
281
369
  self.logger.info(
282
370
  f"Nearest points mapping complete with prefix '{output_prefix}'"
283
371
  )
284
- self._points_gdf = result
285
- return result
372
+ return self.view
286
373
 
287
374
  def map_google_buildings(
288
375
  self,
@@ -316,7 +403,7 @@ class PoiViewGenerator:
316
403
  )
317
404
  if buildings_df is None or len(buildings_df) == 0:
318
405
  self.logger.info("No Google buildings data found for the provided POIs")
319
- return self.points_gdf.copy()
406
+ return self.view
320
407
 
321
408
  return self.map_nearest_points(
322
409
  points_df=buildings_df,
@@ -359,16 +446,17 @@ class PoiViewGenerator:
359
446
  self.logger.info("No Microsoft buildings data found for the provided POIs")
360
447
  return self.points_gdf.copy()
361
448
 
449
+ building_centroids = get_centroids(buildings_gdf)
450
+
362
451
  if "building_id" not in buildings_gdf:
363
452
  self.logger.info("Creating building IDs from coordinates")
364
- buildings_gdf = buildings_gdf.copy()
365
- buildings_gdf["building_id"] = buildings_gdf.apply(
453
+ building_centroids["building_id"] = building_centroids.apply(
366
454
  lambda row: f"{row.geometry.y:.6f}_{row.geometry.x:.6f}",
367
455
  axis=1,
368
456
  )
369
457
 
370
458
  return self.map_nearest_points(
371
- points_df=buildings_gdf,
459
+ points_df=building_centroids,
372
460
  id_column="building_id",
373
461
  output_prefix="nearest_ms_building",
374
462
  **kwargs,
@@ -381,7 +469,7 @@ class PoiViewGenerator:
381
469
  map_radius_meters: Optional[float] = None,
382
470
  output_column: str = "zonal_stat",
383
471
  value_column: Optional[str] = None,
384
- area_weighted: bool = False,
472
+ predicate: Literal["intersects", "within", "fractional"] = "intersects",
385
473
  **kwargs,
386
474
  ) -> pd.DataFrame:
387
475
  """
@@ -409,9 +497,8 @@ class PoiViewGenerator:
409
497
  value_column (str, optional):
410
498
  For polygon data: Name of the column to aggregate. Required for polygon data.
411
499
  Not used for raster data.
412
- area_weighted (bool, optional):
413
- For polygon data: Whether to weight values by fractional area of
414
- intersection. Defaults to False.
500
+ predicate (Literal["intersects", "within", "fractional"], optional):
501
+ The spatial relationship to use for aggregation. Defaults to "intersects".
415
502
  **kwargs:
416
503
  Additional keyword arguments passed to the sampling/aggregation functions.
417
504
 
@@ -424,35 +511,52 @@ class PoiViewGenerator:
424
511
  ValueError: If no valid data is provided, if parameters are incompatible,
425
512
  or if required parameters (value_column) are missing for polygon data.
426
513
  """
514
+
427
515
  if isinstance(data, list) and all(isinstance(x, TifProcessor) for x in data):
516
+ results_df = pd.DataFrame({"poi_id": self.points_gdf["poi_id"]})
517
+
428
518
  # Handle raster data
429
519
  if not data:
430
520
  self.logger.info("No valid raster data found for the provided POIs")
431
- return self.points_gdf.copy()
521
+ return self.view
522
+
523
+ raster_crs = data[0].crs
524
+
525
+ if not all(tp.crs == raster_crs for tp in data):
526
+ raise ValueError(
527
+ "All TifProcessors must have the same CRS for zonal statistics."
528
+ )
432
529
 
433
530
  if map_radius_meters is not None:
434
531
  self.logger.info(
435
532
  f"Calculating {stat} within {map_radius_meters}m buffers around POIs"
436
533
  )
437
534
  # Create buffers around POIs
438
- polygon_list = buffer_geodataframe(
535
+ buffers_gdf = buffer_geodataframe(
439
536
  self.points_gdf,
440
537
  buffer_distance_meters=map_radius_meters,
441
538
  cap_style="round",
442
- ).geometry
539
+ )
443
540
 
444
541
  # Calculate zonal statistics
445
542
  sampled_values = sample_multiple_tifs_by_polygons(
446
- tif_processors=data, polygon_list=polygon_list, stat=stat, **kwargs
543
+ tif_processors=data,
544
+ polygon_list=buffers_gdf.to_crs(raster_crs).geometry,
545
+ stat=stat,
546
+ **kwargs,
447
547
  )
448
548
  else:
449
549
  self.logger.info(f"Sampling {stat} at POI locations")
450
550
  # Sample directly at POI locations
451
- coord_list = self.points_gdf[["latitude", "longitude"]].to_numpy()
551
+ coord_list = (
552
+ self.points_gdf.to_crs(raster_crs).get_coordinates().to_numpy()
553
+ )
452
554
  sampled_values = sample_multiple_tifs_by_coordinates(
453
555
  tif_processors=data, coordinate_list=coord_list, **kwargs
454
556
  )
455
557
 
558
+ results_df[output_column] = sampled_values
559
+
456
560
  elif isinstance(data, gpd.GeoDataFrame):
457
561
  # Handle polygon data
458
562
  if data.empty:
@@ -465,8 +569,13 @@ class PoiViewGenerator:
465
569
  if value_column is None:
466
570
  raise ValueError("value_column must be provided for polygon data")
467
571
 
572
+ if value_column not in data.columns:
573
+ raise ValueError(
574
+ f"Value column '{value_column}' not found in input polygon GeoDataFrame."
575
+ )
576
+
468
577
  self.logger.info(
469
- f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs"
578
+ f"Aggregating {value_column} within {map_radius_meters}m buffers around POIs using predicate '{predicate}'"
470
579
  )
471
580
 
472
581
  # Create buffers around POIs
@@ -477,29 +586,33 @@ class PoiViewGenerator:
477
586
  )
478
587
 
479
588
  # Aggregate polygons to buffers
480
- result = aggregate_polygons_to_zones(
589
+ aggregation_result_gdf = aggregate_polygons_to_zones(
481
590
  polygons=data,
482
591
  zones=buffer_gdf,
483
592
  value_columns=value_column,
484
593
  aggregation=stat,
485
- area_weighted=area_weighted,
594
+ predicate=predicate,
486
595
  zone_id_column="poi_id",
596
+ output_suffix="",
597
+ drop_geometry=True,
487
598
  **kwargs,
488
599
  )
489
600
 
490
- # Extract values for each POI
491
- sampled_values = result[value_column].values
601
+ results_df = aggregation_result_gdf[["poi_id", value_column]]
602
+
603
+ if output_column != "zonal_stat":
604
+ results_df = results_df.rename(columns={value_column: output_column})
492
605
 
493
606
  else:
494
607
  raise ValueError(
495
608
  "data must be either a list of TifProcessor objects or a GeoDataFrame"
496
609
  )
497
610
 
498
- result = self.points_gdf.copy()
499
- result[output_column] = sampled_values
500
- self.logger.info(f"Zonal statistics mapping complete: {output_column}")
501
- self._points_gdf = result
502
- return result
611
+ self._update_view(results_df)
612
+ self.logger.info(
613
+ f"Zonal statistics mapping complete for column(s) derived from '{output_column}' or '{value_column}'"
614
+ )
615
+ return self.view
503
616
 
504
617
  def map_built_s(
505
618
  self,
@@ -539,10 +652,9 @@ class PoiViewGenerator:
539
652
  data_store=self.data_store,
540
653
  **kwargs,
541
654
  )
542
- gdf_points = self.points_gdf.to_crs(handler.config.crs)
543
655
  self.logger.info("Loading GHSL Built Surface raster tiles")
544
656
  tif_processors = handler.load_data(
545
- gdf_points, ensure_available=self.config.ensure_available
657
+ self.points_gdf.copy(), ensure_available=self.config.ensure_available
546
658
  )
547
659
 
548
660
  return self.map_zonal_stats(
@@ -555,9 +667,8 @@ class PoiViewGenerator:
555
667
 
556
668
  def map_smod(
557
669
  self,
558
- stat="median",
559
670
  dataset_year=2020,
560
- dataset_resolution=100,
671
+ dataset_resolution=1000,
561
672
  output_column="smod_class",
562
673
  **kwargs,
563
674
  ) -> pd.DataFrame:
@@ -589,48 +700,185 @@ class PoiViewGenerator:
589
700
  **kwargs,
590
701
  )
591
702
 
592
- gdf_points = self.points_gdf.to_crs(handler.config.crs)
593
703
  self.logger.info("Loading GHSL SMOD raster tiles")
594
704
  tif_processors = handler.load_data(
595
- gdf_points, ensure_available=self.config.ensure_available
705
+ self.points_gdf.copy(), ensure_available=self.config.ensure_available
596
706
  )
597
707
 
598
708
  return self.map_zonal_stats(
599
709
  data=tif_processors,
600
- stat=stat, # Use median for categorical data
601
710
  output_column=output_column,
602
711
  **kwargs,
603
712
  )
604
713
 
714
+ def map_wp_pop(
715
+ self,
716
+ country: Union[str, List[str]],
717
+ map_radius_meters: float,
718
+ resolution=1000,
719
+ predicate: Literal[
720
+ "centroid_within", "intersects", "fractional", "within"
721
+ ] = "fractional",
722
+ output_column: str = "population",
723
+ **kwargs,
724
+ ):
725
+ if isinstance(country, str):
726
+ country = [country]
727
+
728
+ handler = WPPopulationHandler(
729
+ project="pop", resolution=resolution, data_store=self.data_store, **kwargs
730
+ )
731
+
732
+ self.logger.info(
733
+ f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
734
+ )
735
+
736
+ if predicate == "fractional" and resolution == 100:
737
+ self.logger.warning(
738
+ "Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
739
+ )
740
+ predicate = "intersects"
741
+
742
+ if predicate == "centroid_within":
743
+ data = []
744
+ for c in country:
745
+ data.extend(
746
+ handler.load_data(c, ensure_available=self.config.ensure_available)
747
+ )
748
+ else:
749
+ data = pd.concat(
750
+ [
751
+ handler.load_into_geodataframe(
752
+ c, ensure_available=self.config.ensure_available
753
+ )
754
+ for c in country
755
+ ],
756
+ ignore_index=True,
757
+ )
758
+
759
+ self.logger.info(f"Mapping WorldPop Population data into {map_radius_meters}m zones around POIs using 'sum' statistic")
760
+
761
+ return self.map_zonal_stats(
762
+ data,
763
+ stat="sum",
764
+ map_radius_meters=map_radius_meters,
765
+ value_column="pixel_value",
766
+ predicate=predicate,
767
+ output_column=output_column,
768
+ **kwargs
769
+ )
770
+
605
771
  def save_view(
606
772
  self,
607
773
  name: str,
608
774
  output_format: Optional[str] = None,
609
775
  ) -> Path:
610
776
  """
611
- Saves the current POI view (the enriched GeoDataFrame) to a file.
777
+ Saves the current POI view (the enriched DataFrame) to a file.
612
778
 
613
- The output path and format are determined by the `generator_config`
779
+ The output path and format are determined by the `config`
614
780
  or overridden by the `output_format` parameter.
615
781
 
616
782
  Args:
617
783
  name (str): The base name for the output file (without extension).
618
784
  output_format (Optional[str]):
619
785
  The desired output format (e.g., "csv", "geojson"). If None,
620
- the `output_format` from `generator_config` will be used.
786
+ the `output_format` from `config` will be used.
621
787
 
622
788
  Returns:
623
789
  Path: The full path to the saved output file.
624
790
  """
625
- format_to_use = output_format or self.generator_config.output_format
626
- output_path = self.generator_config.base_path / f"{name}.{format_to_use}"
791
+ format_to_use = output_format or self.config.output_format
792
+ output_path = self.config.base_path / f"{name}.{format_to_use}"
627
793
 
628
794
  self.logger.info(f"Saving POI view to {output_path}")
629
- write_dataset(
630
- df=self.points_gdf,
631
- path=str(output_path),
632
- data_store=self.data_store,
633
- format=format_to_use,
634
- )
795
+ # Save the current view, which is a pandas DataFrame, not a GeoDataFrame
796
+ # GeoJSON/Shapefile formats would require converting back to GeoDataFrame first.
797
+ # For CSV, Parquet, Feather, this is fine.
798
+ if format_to_use in ["geojson", "shp", "gpkg"]:
799
+ self.logger.warning(
800
+ f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
801
+ )
802
+ # Re-add geometry for saving to geospatial formats
803
+ view_to_save_gdf = self.view.merge(
804
+ self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
805
+ )
806
+ write_dataset(
807
+ data=view_to_save_gdf,
808
+ path=str(output_path),
809
+ data_store=self.data_store,
810
+ )
811
+ else:
812
+ write_dataset(
813
+ data=self.view, # Use the internal _view DataFrame
814
+ path=str(output_path),
815
+ data_store=self.data_store,
816
+ )
635
817
 
636
818
  return output_path
819
+
820
+ def to_dataframe(self) -> pd.DataFrame:
821
+ """
822
+ Returns the current POI view as a DataFrame.
823
+
824
+ This method combines all accumulated variables in the view
825
+
826
+ Returns:
827
+ pd.DataFrame: The current view.
828
+ """
829
+ return self.view
830
+
831
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
832
+ """
833
+ Returns the current POI view merged with the original point geometries as a GeoDataFrame.
834
+
835
+ This method combines all accumulated variables in the view with the corresponding
836
+ point geometries, providing a spatially-enabled DataFrame for further analysis or export.
837
+
838
+ Returns:
839
+ gpd.GeoDataFrame: The current view merged with point geometries.
840
+ """
841
+ return gpd.GeoDataFrame(
842
+ self.view.merge(
843
+ self.points_gdf[["poi_id", "geometry"]], on="poi_id", how="left"
844
+ ),
845
+ crs="EPSG:4326",
846
+ )
847
+
848
+ def chain_operations(self, operations: List[dict]) -> "PoiViewGenerator":
849
+ """
850
+ Chain multiple mapping operations for fluent interface.
851
+
852
+ Args:
853
+ operations: List of dicts with 'method' and 'kwargs' keys
854
+
855
+ Example:
856
+ generator.chain_operations([
857
+ {'method': 'map_google_buildings', 'kwargs': {}},
858
+ {'method': 'map_built_s', 'kwargs': {'map_radius_meters': 200}},
859
+ ])
860
+ """
861
+ for op in operations:
862
+ method_name = op["method"]
863
+ kwargs = op.get("kwargs", {})
864
+ if hasattr(self, method_name):
865
+ getattr(self, method_name)(**kwargs)
866
+ else:
867
+ raise AttributeError(f"Method {method_name} not found")
868
+ return self
869
+
870
+ def validate_data_coverage(self, data_bounds: gpd.GeoDataFrame) -> dict:
871
+ """
872
+ Validate how many POIs fall within the data coverage area.
873
+
874
+ Returns:
875
+ dict: Coverage statistics
876
+ """
877
+ poi_within = self.points_gdf.within(data_bounds.union_all())
878
+ coverage_stats = {
879
+ "total_pois": len(self.points_gdf),
880
+ "covered_pois": poi_within.sum(),
881
+ "coverage_percentage": (poi_within.sum() / len(self.points_gdf)) * 100,
882
+ "uncovered_pois": (~poi_within).sum(),
883
+ }
884
+ return coverage_stats
@@ -1,3 +1,4 @@
1
1
  from gigaspatial.generators.zonal.base import ZonalViewGeneratorConfig
2
2
  from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
3
- from gigaspatial.generators.poi import PoiViewGenerator, PoiViewGeneratorConfig
3
+ from gigaspatial.generators.zonal.mercator import MercatorViewGenerator
4
+ from gigaspatial.generators.zonal.admin import AdminBoundariesViewGenerator
@@ -0,0 +1,84 @@
1
+ from typing import Optional, Union
2
+ from pathlib import Path
3
+
4
+ import logging
5
+
6
+ from gigaspatial.core.io.data_store import DataStore
7
+ from gigaspatial.handlers.boundaries import AdminBoundaries
8
+ from gigaspatial.generators.zonal.base import (
9
+ ZonalViewGeneratorConfig,
10
+ T,
11
+ )
12
+ from gigaspatial.generators.zonal.geometry import GeometryBasedZonalViewGenerator
13
+
14
+
15
+ class AdminBoundariesViewGenerator(GeometryBasedZonalViewGenerator[T]):
16
+ """
17
+ Generates zonal views using administrative boundaries as the zones.
18
+
19
+ This class specializes in creating zonal views where the zones are defined by
20
+ administrative boundaries (e.g., countries, states, districts) at a specified
21
+ administrative level. It extends the `GeometryBasedZonalViewGenerator` and
22
+ leverages the `AdminBoundaries` handler to load the necessary geographical data.
23
+
24
+ The administrative boundaries serve as the base geometries to which other
25
+ geospatial data (points, polygons, rasters) can be mapped and aggregated.
26
+
27
+ Attributes:
28
+ country (str): The name or code of the country for which to load administrative boundaries.
29
+ admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states/provinces).
30
+ admin_path (Union[str, Path], optional): Optional path to a local GeoJSON/Shapefile
31
+ containing the administrative boundaries. If provided, this local file will be
32
+ used instead of downloading.
33
+ config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generation process.
34
+ data_store (Optional[DataStore]): A DataStore instance for accessing data.
35
+ logger (Optional[logging.Logger]): A logger instance for logging messages.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ country: str,
41
+ admin_level: int,
42
+ data_store: Optional[DataStore] = None,
43
+ admin_path: Optional[Union[str, Path]] = None,
44
+ config: Optional[ZonalViewGeneratorConfig] = None,
45
+ logger: logging.Logger = None,
46
+ ):
47
+ """
48
+ Initializes the AdminBoundariesViewGenerator.
49
+
50
+ Args:
51
+ country (str): The name or code of the country (e.g., "USA", "Germany").
52
+ admin_level (int): The administrative level to load (e.g., 0 for country, 1 for states, 2 for districts).
53
+ admin_path (Union[str, Path], optional): Path to a local administrative boundaries file (GeoJSON, Shapefile).
54
+ If provided, overrides default data loading.
55
+ config (Optional[ZonalViewGeneratorConfig]): Configuration for the zonal view generator.
56
+ If None, a default config will be used.
57
+ data_store (Optional[DataStore]): Data storage interface. If None, LocalDataStore is used.
58
+ logger (Optional[logging.Logger]): Custom logger instance. If None, a default logger is used.
59
+ """
60
+
61
+ super().__init__(
62
+ zone_data=self._init_zone_data(
63
+ country, admin_level, data_store, admin_path
64
+ ),
65
+ zone_id_column="id",
66
+ config=config,
67
+ data_store=data_store,
68
+ logger=logger,
69
+ )
70
+ self.logger.info(
71
+ f"Initialized AdminBoundariesViewGenerator for {country} (level {admin_level})"
72
+ )
73
+
74
+ def _init_zone_data(
75
+ self,
76
+ country,
77
+ admin_level,
78
+ data_store: Optional[DataStore] = None,
79
+ admin_path: Optional[Union[str, Path]] = None,
80
+ ):
81
+ gdf_boundaries = AdminBoundaries.create(
82
+ country, admin_level, data_store, admin_path
83
+ ).to_geodataframe()
84
+ return gdf_boundaries