giga-spatial 0.6.9__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gigaspatial/grid/h3.py ADDED
@@ -0,0 +1,417 @@
1
+ import pandas as pd
2
+ import geopandas as gpd
3
+ import h3
4
+ from shapely.geometry import Polygon, Point, shape
5
+ from shapely.geometry.base import BaseGeometry
6
+ from shapely.strtree import STRtree
7
+ import json
8
+ from pathlib import Path
9
+ from pydantic import BaseModel, Field
10
+ from typing import List, Union, Iterable, Optional, Tuple, ClassVar, Literal
11
+ import pycountry
12
+
13
+ from gigaspatial.core.io.data_store import DataStore
14
+ from gigaspatial.core.io.local_data_store import LocalDataStore
15
+ from gigaspatial.config import config
16
+
17
+
18
+ class H3Hexagons(BaseModel):
19
+ resolution: int = Field(..., ge=0, le=15)
20
+ hexagons: List[str] = Field(default_factory=list)
21
+ data_store: DataStore = Field(default_factory=LocalDataStore, exclude=True)
22
+ logger: ClassVar = config.get_logger("H3Hexagons")
23
+
24
+ class Config:
25
+ arbitrary_types_allowed = True
26
+
27
+ @classmethod
28
+ def from_hexagons(cls, hexagons: List[str]):
29
+ """Create H3Hexagons from list of H3 cell IDs."""
30
+ if not hexagons:
31
+ cls.logger.warning("No hexagons provided to from_hexagons.")
32
+ return cls(resolution=0, hexagons=[])
33
+
34
+ cls.logger.info(
35
+ f"Initializing H3Hexagons from {len(hexagons)} provided hexagons."
36
+ )
37
+ # Get resolution from first hexagon
38
+ resolution = h3.get_resolution(hexagons[0])
39
+ return cls(resolution=resolution, hexagons=list(set(hexagons)))
40
+
41
+ @classmethod
42
+ def from_bounds(
43
+ cls, xmin: float, ymin: float, xmax: float, ymax: float, resolution: int
44
+ ):
45
+ """Create H3Hexagons from boundary coordinates."""
46
+ cls.logger.info(
47
+ f"Creating H3Hexagons from bounds: ({xmin}, {ymin}, {xmax}, {ymax}) at resolution: {resolution}"
48
+ )
49
+
50
+ # Create a LatLong bounding box polygon
51
+ latlong_bbox_coords = [
52
+ [ymin, xmin],
53
+ [ymax, xmin],
54
+ [ymax, xmax],
55
+ [ymin, xmax],
56
+ [ymin, xmin],
57
+ ]
58
+
59
+ # Get H3 cells that intersect with the bounding box
60
+ poly = h3.LatLngPoly(latlong_bbox_coords)
61
+ hexagons = h3.h3shape_to_cells(poly, res=resolution)
62
+
63
+ return cls(resolution=resolution, hexagons=list(hexagons))
64
+
65
+ @classmethod
66
+ def from_spatial(
67
+ cls,
68
+ source: Union[
69
+ BaseGeometry,
70
+ gpd.GeoDataFrame,
71
+ List[Union[Point, Tuple[float, float]]], # points
72
+ ],
73
+ resolution: int,
74
+ contain: Literal["center", "full", "overlap", "bbox_overlap"] = "overlap",
75
+ **kwargs,
76
+ ):
77
+ cls.logger.info(
78
+ f"Creating H3Hexagons from spatial source (type: {type(source)}) at resolution: {resolution} with predicate: {contain}"
79
+ )
80
+ if isinstance(source, gpd.GeoDataFrame):
81
+ if source.crs != "EPSG:4326":
82
+ source = source.to_crs("EPSG:4326")
83
+
84
+ is_point_series = source.geometry.geom_type == "Point"
85
+ all_are_points = is_point_series.all()
86
+
87
+ if all_are_points:
88
+ source = source.geometry.to_list()
89
+ else:
90
+ source = source.geometry.unary_union
91
+
92
+ if isinstance(source, BaseGeometry):
93
+ return cls.from_geometry(
94
+ geometry=source, resolution=resolution, contain=contain, **kwargs
95
+ )
96
+ elif isinstance(source, Iterable) and all(
97
+ isinstance(pt, Point) or len(pt) == 2 for pt in source
98
+ ):
99
+ return cls.from_points(points=source, resolution=resolution, **kwargs)
100
+ else:
101
+ raise ValueError("Unsupported source type for H3Hexagons.from_spatial")
102
+
103
+ @classmethod
104
+ def from_geometry(
105
+ cls,
106
+ geometry: BaseGeometry,
107
+ resolution: int,
108
+ contain: Literal["center", "full", "overlap", "bbox_overlap"] = "overlap",
109
+ **kwargs,
110
+ ):
111
+ """Create H3Hexagons from a geometry."""
112
+ cls.logger.info(
113
+ f"Creating H3Hexagons from geometry (bounds: {geometry.bounds}) at resolution: {resolution} with predicate: {contain}"
114
+ )
115
+
116
+ if isinstance(geometry, Point):
117
+ return cls.from_points([geometry])
118
+
119
+ # Convert shapely geometry to GeoJSON-like format
120
+ if hasattr(geometry, "__geo_interface__"):
121
+ geojson_geom = geometry.__geo_interface__
122
+ else:
123
+ # Fallback for complex geometries
124
+ import json
125
+ from shapely.geometry import mapping
126
+
127
+ geojson_geom = mapping(geometry)
128
+
129
+ h3_geom = h3.geo_to_h3shape(geojson_geom)
130
+
131
+ hexagons = h3.h3shape_to_cells_experimental(
132
+ h3_geom, resolution, contain=contain
133
+ )
134
+
135
+ cls.logger.info(
136
+ f"Generated {len(hexagons)} hexagons using `{contain}` spatial predicate."
137
+ )
138
+ return cls(resolution=resolution, hexagons=list(hexagons), **kwargs)
139
+
140
+ @classmethod
141
+ def from_points(
142
+ cls, points: List[Union[Point, Tuple[float, float]]], resolution: int, **kwargs
143
+ ) -> "H3Hexagons":
144
+ """Create H3Hexagons from a list of points or lat-lon pairs."""
145
+ cls.logger.info(
146
+ f"Creating H3Hexagons from {len(points)} points at resolution: {resolution}"
147
+ )
148
+ hexagons = set(cls.get_hexagons_from_points(points, resolution))
149
+ cls.logger.info(f"Generated {len(hexagons)} unique hexagons from points.")
150
+ return cls(resolution=resolution, hexagons=list(hexagons), **kwargs)
151
+
152
+ @classmethod
153
+ def from_json(
154
+ cls, data_store: DataStore, file: Union[str, Path], **kwargs
155
+ ) -> "H3Hexagons":
156
+ """Load H3Hexagons from a JSON file."""
157
+ cls.logger.info(
158
+ f"Loading H3Hexagons from JSON file: {file} using data store: {type(data_store).__name__}"
159
+ )
160
+ with data_store.open(str(file), "r") as f:
161
+ data = json.load(f)
162
+ if isinstance(data, list): # If file contains only hexagon IDs
163
+ # Get resolution from first hexagon if available
164
+ resolution = h3.get_resolution(data[0]) if data else 0
165
+ data = {
166
+ "resolution": resolution,
167
+ "hexagons": data,
168
+ **kwargs,
169
+ }
170
+ else:
171
+ data.update(kwargs)
172
+ instance = cls(**data)
173
+ instance.data_store = data_store
174
+ cls.logger.info(
175
+ f"Successfully loaded {len(instance.hexagons)} hexagons from JSON file."
176
+ )
177
+ return instance
178
+
179
+ @property
180
+ def average_hexagon_area(self):
181
+ return h3.average_hexagon_area(self.resolution)
182
+
183
+ @property
184
+ def average_hexagon_edge_length(self):
185
+ return h3.average_hexagon_edge_length(self.resolution)
186
+
187
+ def filter_hexagons(self, hexagons: Iterable[str]) -> "H3Hexagons":
188
+ """Filter hexagons by a given set of hexagon IDs."""
189
+ original_count = len(self.hexagons)
190
+ incoming_count = len(
191
+ list(hexagons)
192
+ ) # Convert to list to get length if it's an iterator
193
+
194
+ self.logger.info(
195
+ f"Filtering {original_count} hexagons with an incoming set of {incoming_count} hexagons."
196
+ )
197
+ filtered_hexagons = list(set(self.hexagons) & set(hexagons))
198
+ self.logger.info(f"Resulting in {len(filtered_hexagons)} filtered hexagons.")
199
+ return H3Hexagons(
200
+ resolution=self.resolution,
201
+ hexagons=filtered_hexagons,
202
+ )
203
+
204
+ def to_dataframe(self) -> pd.DataFrame:
205
+ """Convert to pandas DataFrame with hexagon ID and centroid coordinates."""
206
+ self.logger.info(
207
+ f"Converting {len(self.hexagons)} hexagons to pandas DataFrame."
208
+ )
209
+ if not self.hexagons:
210
+ self.logger.warning(
211
+ "No hexagons to convert to DataFrame. Returning empty DataFrame."
212
+ )
213
+ return pd.DataFrame(columns=["hexagon", "latitude", "longitude"])
214
+
215
+ centroids = [h3.cell_to_latlng(hex_id) for hex_id in self.hexagons]
216
+
217
+ self.logger.info(f"Successfully converted to DataFrame.")
218
+
219
+ return pd.DataFrame(
220
+ {
221
+ "hexagon": self.hexagons,
222
+ "latitude": [c[0] for c in centroids],
223
+ "longitude": [c[1] for c in centroids],
224
+ }
225
+ )
226
+
227
+ def to_geoms(self) -> List[Polygon]:
228
+ """Convert hexagons to shapely Polygon geometries."""
229
+ self.logger.info(
230
+ f"Converting {len(self.hexagons)} hexagons to shapely Polygon geometries."
231
+ )
232
+ return [shape(h3.cells_to_geo([hex_id])) for hex_id in self.hexagons]
233
+
234
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
235
+ """Convert to GeoPandas GeoDataFrame."""
236
+ return gpd.GeoDataFrame(
237
+ {"h3": self.hexagons, "geometry": self.to_geoms()}, crs="EPSG:4326"
238
+ )
239
+
240
+ @staticmethod
241
+ def get_hexagons_from_points(
242
+ points: List[Union[Point, Tuple[float, float]]], resolution: int
243
+ ) -> List[str]:
244
+ """Get list of H3 hexagon IDs for the provided points at specified resolution.
245
+
246
+ Args:
247
+ points: List of points as either shapely Points or (lon, lat) tuples
248
+ resolution: H3 resolution level
249
+
250
+ Returns:
251
+ List of H3 hexagon ID strings
252
+ """
253
+ hexagons = []
254
+ for p in points:
255
+ if isinstance(p, Point):
256
+ # Shapely Point has x=lon, y=lat
257
+ hex_id = h3.latlng_to_cell(p.y, p.x, resolution)
258
+ else:
259
+ # Assume tuple is (lon, lat) - convert to (lat, lon) for h3
260
+ hex_id = h3.latlng_to_cell(p[1], p[0], resolution)
261
+ hexagons.append(hex_id)
262
+ return hexagons
263
+
264
+ def get_neighbors(self, k: int = 1) -> "H3Hexagons":
265
+ """Get k-ring neighbors of all hexagons.
266
+
267
+ Args:
268
+ k: Distance of neighbors (1 for immediate neighbors, 2 for neighbors of neighbors, etc.)
269
+
270
+ Returns:
271
+ New H3Hexagons instance with neighbors included
272
+ """
273
+ self.logger.info(
274
+ f"Getting k-ring neighbors (k={k}) for {len(self.hexagons)} hexagons."
275
+ )
276
+
277
+ all_neighbors = set()
278
+ for hex_id in self.hexagons:
279
+ neighbors = h3.grid_ring(hex_id, k)
280
+ all_neighbors.update(neighbors)
281
+
282
+ self.logger.info(
283
+ f"Found {len(all_neighbors)} total hexagons including neighbors."
284
+ )
285
+ return H3Hexagons(resolution=self.resolution, hexagons=list(all_neighbors))
286
+
287
+ def get_compact_representation(self) -> "H3Hexagons":
288
+ """Get compact representation by merging adjacent hexagons into parent cells where possible."""
289
+ self.logger.info(f"Compacting {len(self.hexagons)} hexagons.")
290
+
291
+ # Convert to set for h3.compact
292
+ hex_set = set(self.hexagons)
293
+ compacted = h3.compact_cells(hex_set)
294
+
295
+ self.logger.info(f"Compacted to {len(compacted)} hexagons.")
296
+
297
+ # Note: compacted representation may have mixed resolutions
298
+ # We'll keep the original resolution as the "target" resolution
299
+ return H3Hexagons(resolution=self.resolution, hexagons=list(compacted))
300
+
301
+ def get_children(self, target_resolution: int) -> "H3Hexagons":
302
+ """Get children hexagons at higher resolution.
303
+
304
+ Args:
305
+ target_resolution: Target resolution (must be higher than current)
306
+
307
+ Returns:
308
+ New H3Hexagons instance with children at target resolution
309
+ """
310
+ if target_resolution <= self.resolution:
311
+ raise ValueError("Target resolution must be higher than current resolution")
312
+
313
+ self.logger.info(
314
+ f"Getting children at resolution {target_resolution} for {len(self.hexagons)} hexagons."
315
+ )
316
+
317
+ all_children = []
318
+ for hex_id in self.hexagons:
319
+ children = h3.cell_to_children(hex_id, target_resolution)
320
+ all_children.extend(children)
321
+
322
+ self.logger.info(f"Generated {len(all_children)} children hexagons.")
323
+ return H3Hexagons(resolution=target_resolution, hexagons=all_children)
324
+
325
+ def get_parents(self, target_resolution: int) -> "H3Hexagons":
326
+ """Get parent hexagons at lower resolution.
327
+
328
+ Args:
329
+ target_resolution: Target resolution (must be lower than current)
330
+
331
+ Returns:
332
+ New H3Hexagons instance with parents at target resolution
333
+ """
334
+ if target_resolution >= self.resolution:
335
+ raise ValueError("Target resolution must be lower than current resolution")
336
+
337
+ self.logger.info(
338
+ f"Getting parents at resolution {target_resolution} for {len(self.hexagons)} hexagons."
339
+ )
340
+
341
+ parents = set()
342
+ for hex_id in self.hexagons:
343
+ parent = h3.cell_to_parent(hex_id, target_resolution)
344
+ parents.add(parent)
345
+
346
+ self.logger.info(f"Generated {len(parents)} parent hexagons.")
347
+ return H3Hexagons(resolution=target_resolution, hexagons=list(parents))
348
+
349
+ def save(self, file: Union[str, Path], format: str = "json") -> None:
350
+ """Save H3Hexagons to file in specified format."""
351
+ with self.data_store.open(str(file), "wb" if format == "parquet" else "w") as f:
352
+ if format == "parquet":
353
+ self.to_geodataframe().to_parquet(f, index=False)
354
+ elif format == "geojson":
355
+ f.write(self.to_geodataframe().to_json(drop_id=True))
356
+ elif format == "json":
357
+ json.dump(self.hexagons, f)
358
+ else:
359
+ raise ValueError(f"Unsupported format: {format}")
360
+
361
+ def __len__(self) -> int:
362
+ return len(self.hexagons)
363
+
364
+
365
+ class CountryH3Hexagons(H3Hexagons):
366
+ """H3Hexagons specialized for country-level operations.
367
+
368
+ This class extends H3Hexagons to work specifically with country boundaries.
369
+ It can only be instantiated through the create() classmethod.
370
+ """
371
+
372
+ country: str = Field(..., exclude=True)
373
+
374
+ def __init__(self, *args, **kwargs):
375
+ raise TypeError(
376
+ "CountryH3Hexagons cannot be instantiated directly. "
377
+ "Use CountryH3Hexagons.create() instead."
378
+ )
379
+
380
+ @classmethod
381
+ def create(
382
+ cls,
383
+ country: str,
384
+ resolution: int,
385
+ contain: Literal["center", "full", "overlap", "bbox_overlap"] = "overlap",
386
+ data_store: Optional[DataStore] = None,
387
+ country_geom_path: Optional[Union[str, Path]] = None,
388
+ ):
389
+ """Create CountryH3Hexagons for a specific country."""
390
+ from gigaspatial.handlers.boundaries import AdminBoundaries
391
+
392
+ instance = super().__new__(cls)
393
+ super(CountryH3Hexagons, instance).__init__(
394
+ resolution=resolution,
395
+ hexagons=[],
396
+ data_store=data_store or LocalDataStore(),
397
+ country=pycountry.countries.lookup(country).alpha_3,
398
+ )
399
+
400
+ cls.logger.info(
401
+ f"Initializing H3 hexagons for country: {country} at resolution {resolution}"
402
+ )
403
+
404
+ country_geom = (
405
+ AdminBoundaries.create(
406
+ country_code=country,
407
+ data_store=data_store,
408
+ path=country_geom_path,
409
+ )
410
+ .boundaries[0]
411
+ .geometry
412
+ )
413
+
414
+ hexagons = H3Hexagons.from_geometry(country_geom, resolution, contain=contain)
415
+
416
+ instance.hexagons = hexagons.hexagons
417
+ return instance
@@ -77,7 +77,7 @@ class MercatorTiles(BaseModel):
77
77
  geometry=source, zoom_level=zoom_level, predicate=predicate, **kwargs
78
78
  )
79
79
  elif isinstance(source, Iterable) and all(
80
- len(pt) == 2 or isinstance(pt, Point) for pt in source
80
+ isinstance(pt, Point) or len(pt) == 2 for pt in source
81
81
  ):
82
82
  return cls.from_points(geometry=source, zoom_level=zoom_level, **kwargs)
83
83
  else:
@@ -232,7 +232,7 @@ def convert_to_geodataframe(
232
232
 
233
233
  def buffer_geodataframe(
234
234
  gdf: gpd.GeoDataFrame,
235
- buffer_distance_meters: float,
235
+ buffer_distance_meters: Union[float, np.array, pd.Series],
236
236
  cap_style: Literal["round", "square", "flat"] = "round",
237
237
  copy=True,
238
238
  ) -> gpd.GeoDataFrame:
@@ -256,9 +256,6 @@ def buffer_geodataframe(
256
256
  if not isinstance(gdf, gpd.GeoDataFrame):
257
257
  raise TypeError("Input must be a GeoDataFrame")
258
258
 
259
- if not isinstance(buffer_distance_meters, (float, int)):
260
- raise TypeError("Buffer distance must be a number")
261
-
262
259
  if cap_style not in ["round", "square", "flat"]:
263
260
  raise ValueError("cap_style must be round, flat or square.")
264
261
 
@@ -283,7 +280,7 @@ def buffer_geodataframe(
283
280
  # Transform to UTM, create buffer, and transform back
284
281
  gdf_work = gdf_work.to_crs(utm_crs)
285
282
  gdf_work["geometry"] = gdf_work["geometry"].buffer(
286
- buffer_distance_meters, cap_style=cap_style
283
+ distance=buffer_distance_meters, cap_style=cap_style
287
284
  )
288
285
  gdf_work = gdf_work.to_crs(input_crs)
289
286
 
@@ -995,6 +992,14 @@ def aggregate_polygons_to_zones(
995
992
  if missing_cols:
996
993
  raise ValueError(f"Value columns not found in polygons data: {missing_cols}")
997
994
 
995
+ # Check for column name conflicts with zone_id_column
996
+ if zone_id_column in polygons_gdf.columns:
997
+ raise ValueError(
998
+ f"Column name conflict: polygons DataFrame contains column '{zone_id_column}' "
999
+ f"which conflicts with the zone identifier column. Please rename this column "
1000
+ f"in the polygons data to avoid confusion."
1001
+ )
1002
+
998
1003
  # Ensure CRS match
999
1004
  if polygons_gdf.crs != zones.crs:
1000
1005
  polygons_gdf = polygons_gdf.to_crs(zones.crs)