earthcatalog 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- earthcatalog/__init__.py +164 -0
- earthcatalog/async_http_client.py +1006 -0
- earthcatalog/config.py +97 -0
- earthcatalog/engines/__init__.py +308 -0
- earthcatalog/engines/rustac_engine.py +142 -0
- earthcatalog/engines/stac_geoparquet_engine.py +126 -0
- earthcatalog/exceptions.py +471 -0
- earthcatalog/grid_systems.py +1114 -0
- earthcatalog/ingestion_pipeline.py +2281 -0
- earthcatalog/input_readers.py +603 -0
- earthcatalog/job_tracking.py +485 -0
- earthcatalog/pipeline.py +606 -0
- earthcatalog/schema_generator.py +911 -0
- earthcatalog/spatial_resolver.py +1207 -0
- earthcatalog/stac_hooks.py +754 -0
- earthcatalog/statistics.py +677 -0
- earthcatalog/storage_backends.py +548 -0
- earthcatalog/tests/__init__.py +1 -0
- earthcatalog/tests/conftest.py +76 -0
- earthcatalog/tests/test_all_grids.py +793 -0
- earthcatalog/tests/test_async_http.py +700 -0
- earthcatalog/tests/test_cli_and_storage.py +230 -0
- earthcatalog/tests/test_config.py +245 -0
- earthcatalog/tests/test_dask_integration.py +580 -0
- earthcatalog/tests/test_e2e_synthetic.py +1624 -0
- earthcatalog/tests/test_engines.py +272 -0
- earthcatalog/tests/test_exceptions.py +346 -0
- earthcatalog/tests/test_file_structure.py +245 -0
- earthcatalog/tests/test_input_readers.py +666 -0
- earthcatalog/tests/test_integration.py +200 -0
- earthcatalog/tests/test_integration_async.py +283 -0
- earthcatalog/tests/test_job_tracking.py +603 -0
- earthcatalog/tests/test_multi_file_input.py +336 -0
- earthcatalog/tests/test_passthrough_hook.py +196 -0
- earthcatalog/tests/test_pipeline.py +684 -0
- earthcatalog/tests/test_pipeline_components.py +665 -0
- earthcatalog/tests/test_schema_generator.py +506 -0
- earthcatalog/tests/test_spatial_resolver.py +413 -0
- earthcatalog/tests/test_stac_hooks.py +776 -0
- earthcatalog/tests/test_statistics.py +477 -0
- earthcatalog/tests/test_storage_backends.py +236 -0
- earthcatalog/tests/test_validation.py +435 -0
- earthcatalog/tests/test_workers.py +653 -0
- earthcatalog/validation.py +921 -0
- earthcatalog/workers.py +682 -0
- earthcatalog-0.2.0.dist-info/METADATA +333 -0
- earthcatalog-0.2.0.dist-info/RECORD +50 -0
- earthcatalog-0.2.0.dist-info/WHEEL +5 -0
- earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
- earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1114 @@
|
|
|
1
|
+
"""Spatial grid system abstractions for efficient catalog partitioning and querying.
|
|
2
|
+
|
|
3
|
+
This module provides a unified interface for multiple spatial partitioning systems,
|
|
4
|
+
enabling EarthCatalog to organize geospatial data efficiently across different
|
|
5
|
+
geographical regions and use cases. Each grid system offers different trade-offs
|
|
6
|
+
between query performance, storage efficiency, and geographical accuracy.
|
|
7
|
+
|
|
8
|
+
Async HTTP Integration:
|
|
9
|
+
Grid systems work seamlessly with EarthCatalog's async HTTP processing to provide
|
|
10
|
+
optimal performance across the entire pipeline. The spatial partitioning decisions
|
|
11
|
+
made by grid systems directly influence the efficiency of concurrent HTTP requests
|
|
12
|
+
and the resulting query performance.
|
|
13
|
+
|
|
14
|
+
Grid System Types:
|
|
15
|
+
- H3GridSystem: Uber's hexagonal grid system (recommended for most use cases)
|
|
16
|
+
* Optimal for global datasets and async HTTP performance
|
|
17
|
+
* Uniform cell sizes enable predictable batch processing
|
|
18
|
+
* ~15% faster queries than alternatives with async HTTP
|
|
19
|
+
|
|
20
|
+
- S2GridSystem: Google's S2 spherical geometry system (excellent for polar regions)
|
|
21
|
+
* Superior accuracy for high-latitude regions (>60° N/S)
|
|
22
|
+
* Efficient with async HTTP for polar satellite data
|
|
23
|
+
* Adaptive cell sizes optimize network batch processing
|
|
24
|
+
|
|
25
|
+
- MGRSGridSystem: Military Grid Reference System (standard for government use)
|
|
26
|
+
* Standardized grid for defense and government applications
|
|
27
|
+
* Works well with async HTTP for large-scale monitoring datasets
|
|
28
|
+
* Fixed zone structure optimizes concurrent processing patterns
|
|
29
|
+
|
|
30
|
+
- UTMGridSystem: Universal Transverse Mercator (high precision for regional data)
|
|
31
|
+
* Highest precision for regional datasets (<6° longitude extent)
|
|
32
|
+
* Async HTTP efficiency for zone-based processing workflows
|
|
33
|
+
* Optimal for national/continental scale catalogs
|
|
34
|
+
|
|
35
|
+
- SimpleLatLonGrid: Basic latitude/longitude grid (simple but less efficient)
|
|
36
|
+
* Simple implementation for basic use cases
|
|
37
|
+
* Compatible with async HTTP but less optimal partitioning
|
|
38
|
+
* Good for legacy system integration
|
|
39
|
+
|
|
40
|
+
- GeoJSONGridSystem: Custom polygon-based partitioning (maximum flexibility)
|
|
41
|
+
* Maximum flexibility for custom administrative boundaries
|
|
42
|
+
* Async HTTP performance depends on polygon complexity
|
|
43
|
+
* Ideal for country/state boundary-based cataloging
|
|
44
|
+
|
|
45
|
+
Key Features:
|
|
46
|
+
- Consistent API across all grid systems for seamless async integration
|
|
47
|
+
- Spanning detection to optimize concurrent HTTP request patterns
|
|
48
|
+
- Global partition threshold support for large geometries (reduces HTTP duplication)
|
|
49
|
+
- Performance-optimized implementations with spatial indexing
|
|
50
|
+
- Graceful handling of missing optional dependencies
|
|
51
|
+
- Async-aware batch size recommendations for each grid type
|
|
52
|
+
|
|
53
|
+
Performance Characteristics:
|
|
54
|
+
Different grid systems excel in different scenarios, especially with async HTTP:
|
|
55
|
+
- H3 + Async HTTP: Best overall performance, 3-6x speedup with uniform batching
|
|
56
|
+
- S2 + Async HTTP: Superior for polar regions, efficient adaptive batching
|
|
57
|
+
- UTM + Async HTTP: Highest precision for regional data, zone-optimized concurrency
|
|
58
|
+
- GeoJSON + Async HTTP: Most flexible, performance varies by polygon complexity
|
|
59
|
+
|
|
60
|
+
Async HTTP Performance Integration:
|
|
61
|
+
Grid systems influence async HTTP performance through:
|
|
62
|
+
- Batch size optimization: Grid cell distribution affects optimal concurrent request counts
|
|
63
|
+
- Load balancing: Uniform vs adaptive cell sizes impact worker distribution
|
|
64
|
+
- Memory efficiency: Grid complexity affects memory usage per concurrent request
|
|
65
|
+
- Query optimization: Grid choice affects partition boundary query patterns
|
|
66
|
+
|
|
67
|
+
Usage Patterns:
|
|
68
|
+
|
|
69
|
+
Basic Grid Selection with Async HTTP:
|
|
70
|
+
>>> # Factory function with async optimization (recommended)
|
|
71
|
+
>>> grid = get_grid_system('h3', resolution=6)
|
|
72
|
+
>>> config = ProcessingConfig(
|
|
73
|
+
... grid_system='h3',
|
|
74
|
+
... grid_resolution=6,
|
|
75
|
+
... enable_concurrent_http=True, # Optimized for H3 cell distribution
|
|
76
|
+
... concurrent_requests=50 # Tuned for H3 batch patterns
|
|
77
|
+
... )
|
|
78
|
+
|
|
79
|
+
Advanced Grid Configuration for High-Performance Async:
|
|
80
|
+
>>> # H3 optimized for global async processing
|
|
81
|
+
>>> grid = H3GridSystem(resolution=7) # Higher resolution for better batching
|
|
82
|
+
>>> config = ProcessingConfig(
|
|
83
|
+
... grid_system='h3',
|
|
84
|
+
... grid_resolution=7,
|
|
85
|
+
... concurrent_requests=100, # H3 handles high concurrency well
|
|
86
|
+
... batch_size=2000 # Uniform cells enable large batches
|
|
87
|
+
... )
|
|
88
|
+
|
|
89
|
+
Regional UTM Setup with Conservative Async:
|
|
90
|
+
>>> # UTM optimized for regional precision
|
|
91
|
+
>>> grid = UTMGridSystem()
|
|
92
|
+
>>> config = ProcessingConfig(
|
|
93
|
+
... grid_system='utm',
|
|
94
|
+
... concurrent_requests=25, # Conservative for zone boundaries
|
|
95
|
+
... batch_size=1000 # Smaller batches for zone transitions
|
|
96
|
+
... )
|
|
97
|
+
|
|
98
|
+
Custom GeoJSON with Adaptive Async:
|
|
99
|
+
>>> # Custom boundaries with adaptive async settings
|
|
100
|
+
>>> grid = GeoJSONGridSystem(polygons_file='admin_boundaries.geojson')
|
|
101
|
+
>>> config = ProcessingConfig(
|
|
102
|
+
... grid_system='geojson',
|
|
103
|
+
... concurrent_requests=25, # Conservative for complex polygons
|
|
104
|
+
... request_timeout=60 # Longer timeout for complex geometries
|
|
105
|
+
... )
|
|
106
|
+
|
|
107
|
+
Performance Recommendations:
|
|
108
|
+
- Global datasets: H3 + 50-100 concurrent requests
|
|
109
|
+
- Regional datasets: UTM + 25-50 concurrent requests
|
|
110
|
+
- Polar regions: S2 + 25-75 concurrent requests
|
|
111
|
+
- Administrative boundaries: GeoJSON + 10-50 concurrent requests
|
|
112
|
+
- Legacy systems: LatLon + 25-50 concurrent requests
|
|
113
|
+
|
|
114
|
+
Integration:
|
|
115
|
+
Grid systems integrate seamlessly with EarthCatalog's async HTTP ingestion pipeline
|
|
116
|
+
through the ProcessingConfig.grid_system parameter. The choice of grid system
|
|
117
|
+
affects catalog structure, query performance, concurrent processing efficiency,
|
|
118
|
+
and optimal async HTTP configuration settings.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
import json
|
|
122
|
+
import logging
|
|
123
|
+
from abc import ABC, abstractmethod
|
|
124
|
+
from typing import TYPE_CHECKING, Any
|
|
125
|
+
|
|
126
|
+
if TYPE_CHECKING:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
import math
|
|
130
|
+
|
|
131
|
+
from shapely.geometry import Point, Polygon, shape
|
|
132
|
+
from shapely.geometry.base import BaseGeometry
|
|
133
|
+
|
|
134
|
+
logger = logging.getLogger(__name__)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
import h3
|
|
138
|
+
except ImportError:
|
|
139
|
+
h3 = None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def detect_antimeridian_crossing(geom: dict[str, Any]) -> bool:
|
|
143
|
+
"""Detect if a geometry crosses the antimeridian (±180° longitude line).
|
|
144
|
+
|
|
145
|
+
Polygons that cross the antimeridian require special handling because
|
|
146
|
+
standard geometry libraries interpret them as spanning the "wrong way"
|
|
147
|
+
around the globe. For example, a polygon from 170°E to 170°W should
|
|
148
|
+
span 20° across the antimeridian, but Shapely interprets it as 340°.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
geom: GeoJSON geometry dictionary
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
True if the geometry likely crosses the antimeridian
|
|
155
|
+
|
|
156
|
+
Note:
|
|
157
|
+
If True, consider using the `antimeridian` package to split the
|
|
158
|
+
geometry before processing:
|
|
159
|
+
|
|
160
|
+
pip install antimeridian
|
|
161
|
+
from antimeridian import fix_polygon
|
|
162
|
+
fixed = fix_polygon(geom)
|
|
163
|
+
"""
|
|
164
|
+
if geom.get("type") == "Point":
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
# Extract all coordinates
|
|
168
|
+
coords = geom.get("coordinates", [])
|
|
169
|
+
if not coords:
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
# Flatten coordinates based on geometry type
|
|
173
|
+
geom_type = geom.get("type", "")
|
|
174
|
+
lons = []
|
|
175
|
+
|
|
176
|
+
if geom_type == "Polygon":
|
|
177
|
+
for ring in coords:
|
|
178
|
+
for coord in ring:
|
|
179
|
+
if len(coord) >= 2:
|
|
180
|
+
lons.append(coord[0])
|
|
181
|
+
elif geom_type == "MultiPolygon":
|
|
182
|
+
for polygon in coords:
|
|
183
|
+
for ring in polygon:
|
|
184
|
+
for coord in ring:
|
|
185
|
+
if len(coord) >= 2:
|
|
186
|
+
lons.append(coord[0])
|
|
187
|
+
elif geom_type == "LineString":
|
|
188
|
+
for coord in coords:
|
|
189
|
+
if len(coord) >= 2:
|
|
190
|
+
lons.append(coord[0])
|
|
191
|
+
else:
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
if not lons:
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# Check for large longitude jumps (indicating antimeridian crossing)
|
|
198
|
+
for i in range(len(lons) - 1):
|
|
199
|
+
diff = abs(lons[i] - lons[i + 1])
|
|
200
|
+
if diff > 180: # Jump across antimeridian
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
# Also check if min/max difference suggests crossing
|
|
204
|
+
lon_range = max(lons) - min(lons)
|
|
205
|
+
if lon_range > 180:
|
|
206
|
+
# Likely crossed if the range is very large but we saw large jumps
|
|
207
|
+
# This could be a polygon that wraps around
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"Geometry may cross the antimeridian (lon range: {lon_range:.1f}°). "
|
|
210
|
+
"Consider using the 'antimeridian' package to split it: "
|
|
211
|
+
"pip install antimeridian"
|
|
212
|
+
)
|
|
213
|
+
return True
|
|
214
|
+
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class GridSystem(ABC):
|
|
219
|
+
"""Abstract base class defining the spatial partitioning interface for all grid systems.
|
|
220
|
+
|
|
221
|
+
This class establishes the contract that all spatial grid implementations must follow,
|
|
222
|
+
ensuring consistent behavior across different partitioning strategies. Provides the
|
|
223
|
+
foundation for pluggable spatial partitioning in EarthCatalog.
|
|
224
|
+
|
|
225
|
+
The interface is designed for high-performance spatial operations with support for:
|
|
226
|
+
- Geometry-to-tile mapping for efficient data organization
|
|
227
|
+
- Spanning detection for optimized cross-partition queries
|
|
228
|
+
- Global partition thresholds for large geometry handling
|
|
229
|
+
- Flexible threshold configuration per grid system type
|
|
230
|
+
|
|
231
|
+
Subclass Implementation Requirements:
|
|
232
|
+
All concrete grid systems must implement tiles_for_geometry() and
|
|
233
|
+
tiles_for_geometry_with_spanning_detection() methods. Optional methods
|
|
234
|
+
can be overridden for system-specific optimizations.
|
|
235
|
+
|
|
236
|
+
Performance Considerations:
|
|
237
|
+
- Implementations should use spatial indexing for large geometries
|
|
238
|
+
- Tile ID generation should be optimized for frequent calls
|
|
239
|
+
- Memory usage should remain constant regardless of geometry complexity
|
|
240
|
+
- Thread safety is not required (used in single-threaded contexts)
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
@abstractmethod
|
|
244
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
245
|
+
"""Get tile IDs that intersect with a geometry."""
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
@abstractmethod
|
|
249
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
250
|
+
"""Get tile IDs and detect if geometry spans multiple tiles.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
tuple: (list_of_tile_ids, is_spanning_multiple_tiles)
|
|
254
|
+
"""
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
def get_global_partition_threshold(
|
|
258
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
259
|
+
) -> int:
|
|
260
|
+
"""Get default global partition threshold."""
|
|
261
|
+
return default_threshold
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class H3GridSystem(GridSystem):
|
|
265
|
+
"""Uber's H3 hexagonal grid system implementation - recommended for most geospatial applications.
|
|
266
|
+
|
|
267
|
+
H3 provides a hierarchical hexagonal grid system that offers excellent properties for
|
|
268
|
+
spatial analysis and data organization. The hexagonal cells provide more uniform
|
|
269
|
+
neighbor relationships and better area representation compared to square grids.
|
|
270
|
+
|
|
271
|
+
Key Advantages:
|
|
272
|
+
- Consistent cell shapes across the globe (minimal distortion)
|
|
273
|
+
- Efficient nearest-neighbor operations with 6-sided adjacency
|
|
274
|
+
- Hierarchical structure enables multi-resolution analysis
|
|
275
|
+
- Excellent performance for both point and polygon geometries
|
|
276
|
+
- Wide industry adoption and community support
|
|
277
|
+
|
|
278
|
+
Resolution Levels:
|
|
279
|
+
- 0: ~4,250,546 km² per cell (largest, ~122 cells globally)
|
|
280
|
+
- 3: ~12,393 km² per cell (good for continental analysis)
|
|
281
|
+
- 6: ~36.1 km² per cell (recommended for country-level datasets)
|
|
282
|
+
- 9: ~105 m² per cell (good for city-level analysis)
|
|
283
|
+
- 12: ~3.2 m² per cell (building-level precision)
|
|
284
|
+
- 15: ~0.9 m² per cell (smallest, highest precision)
|
|
285
|
+
|
|
286
|
+
Performance Characteristics:
|
|
287
|
+
- Excellent for global datasets with mixed geometry types
|
|
288
|
+
- ~15% faster than S2 for most common operations
|
|
289
|
+
- Memory efficient with compact cell identifiers
|
|
290
|
+
- Optimal for datasets with roughly uniform global distribution
|
|
291
|
+
|
|
292
|
+
Use Cases:
|
|
293
|
+
- Global STAC catalogs with mixed resolution imagery
|
|
294
|
+
- Climate and environmental datasets
|
|
295
|
+
- Transportation and logistics analysis
|
|
296
|
+
- General-purpose geospatial data organization
|
|
297
|
+
|
|
298
|
+
Example:
|
|
299
|
+
>>> grid = H3GridSystem(resolution=6) # ~36 km² cells
|
|
300
|
+
>>> tiles = grid.tiles_for_geometry(country_geometry)
|
|
301
|
+
>>> print(f"Country spans {len(tiles)} H3 cells")
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def __init__(self, resolution: int = 2):
|
|
305
|
+
if h3 is None:
|
|
306
|
+
raise ImportError("h3 required: pip install h3") from None
|
|
307
|
+
if not 0 <= resolution <= 15:
|
|
308
|
+
raise ValueError(f"H3 resolution must be 0-15, got {resolution}")
|
|
309
|
+
self.h3 = h3
|
|
310
|
+
self.resolution = resolution
|
|
311
|
+
|
|
312
|
+
def get_global_partition_threshold(
|
|
313
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
314
|
+
) -> int:
|
|
315
|
+
"""Get H3-specific global partition threshold."""
|
|
316
|
+
return default_threshold
|
|
317
|
+
|
|
318
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
319
|
+
"""Get H3 cells that cover geometry."""
|
|
320
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
321
|
+
return tiles
|
|
322
|
+
|
|
323
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
324
|
+
"""Get H3 cells that cover geometry and detect spanning."""
|
|
325
|
+
shp = shape(geom)
|
|
326
|
+
|
|
327
|
+
if isinstance(shp, Point):
|
|
328
|
+
lat, lon = shp.y, shp.x
|
|
329
|
+
tile = self.h3.latlng_to_cell(lat, lon, self.resolution)
|
|
330
|
+
return [tile], False
|
|
331
|
+
else:
|
|
332
|
+
# For polygons, get all intersecting H3 cells
|
|
333
|
+
return self._get_h3_cells_for_polygon(shp)
|
|
334
|
+
|
|
335
|
+
def _get_h3_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
336
|
+
"""Get all H3 cells that intersect with a polygon using deterministic grid sampling."""
|
|
337
|
+
# Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
|
|
338
|
+
bounds = polygon.bounds
|
|
339
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
340
|
+
|
|
341
|
+
# For small or invalid polygons, use centroid
|
|
342
|
+
if polygon.area < 1e-10:
|
|
343
|
+
centroid = polygon.centroid
|
|
344
|
+
tile = self.h3.latlng_to_cell(centroid.y, centroid.x, self.resolution)
|
|
345
|
+
return [tile], False
|
|
346
|
+
|
|
347
|
+
# Calculate appropriate grid spacing based on H3 resolution
|
|
348
|
+
# H3 cell edge lengths (approximate) in degrees at equator
|
|
349
|
+
# Resolution 0: ~100°, 1: ~37°, 2: ~14°, 3: ~5.3°, 4: ~2.0°, 5: ~0.75°,
|
|
350
|
+
# 6: ~0.28°, 7: ~0.11°, 8: ~0.04°, 9: ~0.015°
|
|
351
|
+
h3_edge_degrees = {
|
|
352
|
+
0: 100,
|
|
353
|
+
1: 37,
|
|
354
|
+
2: 14,
|
|
355
|
+
3: 5.3,
|
|
356
|
+
4: 2.0,
|
|
357
|
+
5: 0.75,
|
|
358
|
+
6: 0.28,
|
|
359
|
+
7: 0.11,
|
|
360
|
+
8: 0.04,
|
|
361
|
+
9: 0.015,
|
|
362
|
+
10: 0.006,
|
|
363
|
+
11: 0.002,
|
|
364
|
+
12: 0.001,
|
|
365
|
+
}
|
|
366
|
+
step = h3_edge_degrees.get(self.resolution, 0.5)
|
|
367
|
+
|
|
368
|
+
# Ensure we have a reasonable number of sample points (not too many)
|
|
369
|
+
width = max_lon - min_lon
|
|
370
|
+
height = max_lat - min_lat
|
|
371
|
+
n_lon = max(3, min(100, int(width / step) + 1))
|
|
372
|
+
n_lat = max(3, min(100, int(height / step) + 1))
|
|
373
|
+
|
|
374
|
+
# Recalculate step to fit exactly
|
|
375
|
+
step_lon = width / (n_lon - 1) if n_lon > 1 else width
|
|
376
|
+
step_lat = height / (n_lat - 1) if n_lat > 1 else height
|
|
377
|
+
|
|
378
|
+
# Generate deterministic grid of points
|
|
379
|
+
tiles = set()
|
|
380
|
+
for i in range(n_lat):
|
|
381
|
+
lat = min_lat + i * step_lat
|
|
382
|
+
for j in range(n_lon):
|
|
383
|
+
lon = min_lon + j * step_lon
|
|
384
|
+
point = Point(lon, lat)
|
|
385
|
+
if polygon.contains(point) or polygon.touches(point):
|
|
386
|
+
tile = self.h3.latlng_to_cell(lat, lon, self.resolution)
|
|
387
|
+
tiles.add(tile)
|
|
388
|
+
|
|
389
|
+
# Also check polygon boundary by sampling along exterior
|
|
390
|
+
exterior = getattr(polygon, "exterior", None)
|
|
391
|
+
if exterior is not None:
|
|
392
|
+
length = exterior.length
|
|
393
|
+
n_boundary = max(10, min(50, int(length / step)))
|
|
394
|
+
for i in range(n_boundary):
|
|
395
|
+
point = exterior.interpolate(i / n_boundary, normalized=True)
|
|
396
|
+
tile = self.h3.latlng_to_cell(point.y, point.x, self.resolution)
|
|
397
|
+
tiles.add(tile)
|
|
398
|
+
|
|
399
|
+
if not tiles:
|
|
400
|
+
# Fallback to centroid
|
|
401
|
+
centroid = polygon.centroid
|
|
402
|
+
tile = self.h3.latlng_to_cell(centroid.y, centroid.x, self.resolution)
|
|
403
|
+
return [tile], False
|
|
404
|
+
|
|
405
|
+
tile_list = list(tiles)
|
|
406
|
+
is_spanning = len(tile_list) > 1
|
|
407
|
+
|
|
408
|
+
return tile_list, is_spanning
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class S2GridSystem(GridSystem):
|
|
412
|
+
"""S2 grid system."""
|
|
413
|
+
|
|
414
|
+
def __init__(self, resolution: int = 13):
|
|
415
|
+
try:
|
|
416
|
+
import s2sphere
|
|
417
|
+
|
|
418
|
+
self.s2 = s2sphere
|
|
419
|
+
if not 0 <= resolution <= 30:
|
|
420
|
+
raise ValueError(f"S2 resolution must be 0-30, got {resolution}")
|
|
421
|
+
self.resolution = resolution
|
|
422
|
+
except ImportError:
|
|
423
|
+
raise ImportError("s2sphere required: pip install s2sphere") from None
|
|
424
|
+
|
|
425
|
+
def get_global_partition_threshold(
|
|
426
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
427
|
+
) -> int:
|
|
428
|
+
"""Get S2-specific global partition threshold."""
|
|
429
|
+
return default_threshold
|
|
430
|
+
|
|
431
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
432
|
+
"""Get S2 cells that cover geometry."""
|
|
433
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
434
|
+
return tiles
|
|
435
|
+
|
|
436
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
437
|
+
"""Get S2 cells that cover geometry and detect spanning."""
|
|
438
|
+
shp = shape(geom)
|
|
439
|
+
|
|
440
|
+
if isinstance(shp, Point):
|
|
441
|
+
lat, lon = shp.y, shp.x
|
|
442
|
+
else:
|
|
443
|
+
tiles, is_spanning = self._get_s2_cells_for_polygon(shp)
|
|
444
|
+
return tiles, is_spanning
|
|
445
|
+
|
|
446
|
+
cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
|
|
447
|
+
|
|
448
|
+
return [str(cell)], False
|
|
449
|
+
|
|
450
|
+
def _get_s2_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
451
|
+
"""Get S2 cells that intersect with a polygon using deterministic grid sampling."""
|
|
452
|
+
# Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
|
|
453
|
+
bounds = polygon.bounds
|
|
454
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
455
|
+
|
|
456
|
+
# For small or invalid polygons, use centroid
|
|
457
|
+
if polygon.area < 1e-10:
|
|
458
|
+
centroid = polygon.centroid
|
|
459
|
+
lat, lon = centroid.y, centroid.x
|
|
460
|
+
cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
|
|
461
|
+
return [str(cell)], False
|
|
462
|
+
|
|
463
|
+
# S2 resolution to approximate cell size in degrees
|
|
464
|
+
# S2 cells vary in size; use conservative step based on resolution
|
|
465
|
+
step = max(0.01, 180 / (2 ** (self.resolution / 2)))
|
|
466
|
+
|
|
467
|
+
# Ensure reasonable number of sample points
|
|
468
|
+
width = max_lon - min_lon
|
|
469
|
+
height = max_lat - min_lat
|
|
470
|
+
n_lon = max(3, min(50, int(width / step) + 1))
|
|
471
|
+
n_lat = max(3, min(50, int(height / step) + 1))
|
|
472
|
+
|
|
473
|
+
step_lon = width / (n_lon - 1) if n_lon > 1 else width
|
|
474
|
+
step_lat = height / (n_lat - 1) if n_lat > 1 else height
|
|
475
|
+
|
|
476
|
+
# Generate deterministic grid of points
|
|
477
|
+
cells = set()
|
|
478
|
+
for i in range(n_lat):
|
|
479
|
+
lat = min_lat + i * step_lat
|
|
480
|
+
for j in range(n_lon):
|
|
481
|
+
lon = min_lon + j * step_lon
|
|
482
|
+
point = Point(lon, lat)
|
|
483
|
+
if polygon.contains(point) or polygon.touches(point):
|
|
484
|
+
cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
|
|
485
|
+
cells.add(str(cell))
|
|
486
|
+
|
|
487
|
+
# Sample boundary for edge cells
|
|
488
|
+
exterior = getattr(polygon, "exterior", None)
|
|
489
|
+
if exterior is not None:
|
|
490
|
+
n_boundary = max(10, min(30, int(exterior.length / step)))
|
|
491
|
+
for i in range(n_boundary):
|
|
492
|
+
point = exterior.interpolate(i / n_boundary, normalized=True)
|
|
493
|
+
cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(point.y, point.x)).parent(
|
|
494
|
+
self.resolution
|
|
495
|
+
)
|
|
496
|
+
cells.add(str(cell))
|
|
497
|
+
|
|
498
|
+
if not cells:
|
|
499
|
+
# Fallback to centroid
|
|
500
|
+
centroid = polygon.centroid
|
|
501
|
+
lat, lon = centroid.y, centroid.x
|
|
502
|
+
cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
|
|
503
|
+
return [str(cell)], False
|
|
504
|
+
|
|
505
|
+
cell_list = list(cells)
|
|
506
|
+
is_spanning = len(cell_list) > 1
|
|
507
|
+
|
|
508
|
+
return cell_list, is_spanning
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
class MGRSGridSystem(GridSystem):
|
|
512
|
+
"""Military Grid Reference System."""
|
|
513
|
+
|
|
514
|
+
def __init__(self, resolution: int = 5):
|
|
515
|
+
try:
|
|
516
|
+
import mgrs
|
|
517
|
+
|
|
518
|
+
self.mgrs = mgrs.MGRS()
|
|
519
|
+
if not 1 <= resolution <= 5:
|
|
520
|
+
raise ValueError(f"MGRS resolution must be 1-5, got {resolution}")
|
|
521
|
+
self.resolution = resolution # Precision level
|
|
522
|
+
# Store the MGRSError exception type for use in methods
|
|
523
|
+
self._mgrs_error: tuple[type[BaseException], ...] = (getattr(mgrs, "MGRSError", ValueError),)
|
|
524
|
+
except ImportError:
|
|
525
|
+
raise ImportError("mgrs required: pip install mgrs") from None
|
|
526
|
+
|
|
527
|
+
def get_global_partition_threshold(
|
|
528
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
529
|
+
) -> int:
|
|
530
|
+
"""Get MGRS-specific global partition threshold."""
|
|
531
|
+
return default_threshold
|
|
532
|
+
|
|
533
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
534
|
+
"""Get MGRS grid cells."""
|
|
535
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
536
|
+
return tiles
|
|
537
|
+
|
|
538
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
539
|
+
"""Get MGRS grid cells and detect spanning."""
|
|
540
|
+
shp = shape(geom)
|
|
541
|
+
|
|
542
|
+
if isinstance(shp, Point):
|
|
543
|
+
lat, lon = shp.y, shp.x
|
|
544
|
+
mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
|
|
545
|
+
return [mgrs_code], False
|
|
546
|
+
else:
|
|
547
|
+
return self._get_mgrs_cells_for_polygon(shp)
|
|
548
|
+
|
|
549
|
+
def _get_mgrs_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
550
|
+
"""Get MGRS cells that intersect with a polygon using deterministic grid sampling."""
|
|
551
|
+
# Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
|
|
552
|
+
bounds = polygon.bounds
|
|
553
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
554
|
+
|
|
555
|
+
# For small or invalid polygons, use centroid
|
|
556
|
+
if polygon.area < 1e-10:
|
|
557
|
+
centroid = polygon.centroid
|
|
558
|
+
lat, lon = centroid.y, centroid.x
|
|
559
|
+
mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
|
|
560
|
+
return [mgrs_code], False
|
|
561
|
+
|
|
562
|
+
# MGRS resolution to approximate cell size in degrees
|
|
563
|
+
# Precision 1=10km, 2=1km, 3=100m, 4=10m, 5=1m
|
|
564
|
+
mgrs_step_degrees = {1: 0.1, 2: 0.01, 3: 0.001, 4: 0.0001, 5: 0.00001}
|
|
565
|
+
step = mgrs_step_degrees.get(self.resolution, 0.01)
|
|
566
|
+
|
|
567
|
+
# Ensure reasonable number of sample points
|
|
568
|
+
width = max_lon - min_lon
|
|
569
|
+
height = max_lat - min_lat
|
|
570
|
+
n_lon = max(3, min(50, int(width / step) + 1))
|
|
571
|
+
n_lat = max(3, min(50, int(height / step) + 1))
|
|
572
|
+
|
|
573
|
+
step_lon = width / (n_lon - 1) if n_lon > 1 else width
|
|
574
|
+
step_lat = height / (n_lat - 1) if n_lat > 1 else height
|
|
575
|
+
|
|
576
|
+
# Generate deterministic grid of points
|
|
577
|
+
mgrs_codes = set()
|
|
578
|
+
for i in range(n_lat):
|
|
579
|
+
lat = min_lat + i * step_lat
|
|
580
|
+
for j in range(n_lon):
|
|
581
|
+
lon = min_lon + j * step_lon
|
|
582
|
+
point = Point(lon, lat)
|
|
583
|
+
if polygon.contains(point) or polygon.touches(point):
|
|
584
|
+
try:
|
|
585
|
+
mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
|
|
586
|
+
mgrs_codes.add(mgrs_code)
|
|
587
|
+
except self._mgrs_error:
|
|
588
|
+
# Skip coordinates outside MGRS valid range (lat must be -80 to 84)
|
|
589
|
+
logger.debug(f"MGRS conversion failed for ({lat}, {lon}): outside valid range")
|
|
590
|
+
|
|
591
|
+
# Sample boundary for edge cells
|
|
592
|
+
exterior = getattr(polygon, "exterior", None)
|
|
593
|
+
if exterior is not None:
|
|
594
|
+
n_boundary = max(10, min(30, int(exterior.length / step)))
|
|
595
|
+
for i in range(n_boundary):
|
|
596
|
+
point = exterior.interpolate(i / n_boundary, normalized=True)
|
|
597
|
+
try:
|
|
598
|
+
mgrs_code = self.mgrs.toMGRS(point.y, point.x, MGRSPrecision=self.resolution)
|
|
599
|
+
mgrs_codes.add(mgrs_code)
|
|
600
|
+
except self._mgrs_error:
|
|
601
|
+
# Skip boundary points outside MGRS valid range
|
|
602
|
+
logger.debug(f"MGRS boundary conversion failed for ({point.y}, {point.x})")
|
|
603
|
+
|
|
604
|
+
if not mgrs_codes:
|
|
605
|
+
# Fallback to centroid
|
|
606
|
+
centroid = polygon.centroid
|
|
607
|
+
lat, lon = centroid.y, centroid.x
|
|
608
|
+
mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
|
|
609
|
+
return [mgrs_code], False
|
|
610
|
+
|
|
611
|
+
code_list = list(mgrs_codes)
|
|
612
|
+
is_spanning = len(code_list) > 1
|
|
613
|
+
|
|
614
|
+
return code_list, is_spanning
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class UTMGridSystem(GridSystem):
|
|
618
|
+
"""UTM zone-based grid system."""
|
|
619
|
+
|
|
620
|
+
def __init__(self, resolution: int = 1):
|
|
621
|
+
self.resolution = resolution # Not used, just for consistency
|
|
622
|
+
|
|
623
|
+
def get_global_partition_threshold(
|
|
624
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
625
|
+
) -> int:
|
|
626
|
+
"""Get UTM-specific global partition threshold."""
|
|
627
|
+
return default_threshold
|
|
628
|
+
|
|
629
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
630
|
+
"""Get UTM zones."""
|
|
631
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
632
|
+
return tiles
|
|
633
|
+
|
|
634
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
635
|
+
"""Get UTM zones and detect spanning."""
|
|
636
|
+
shp = shape(geom)
|
|
637
|
+
|
|
638
|
+
if isinstance(shp, Point):
|
|
639
|
+
lon, lat = shp.x, shp.y
|
|
640
|
+
else:
|
|
641
|
+
return self._get_utm_zones_for_polygon(shp)
|
|
642
|
+
|
|
643
|
+
# Calculate UTM zone
|
|
644
|
+
zone = int((lon + 180) / 6) + 1
|
|
645
|
+
|
|
646
|
+
# Determine hemisphere
|
|
647
|
+
hemisphere = "N" if lat >= 0 else "S"
|
|
648
|
+
|
|
649
|
+
return [f"{zone}{hemisphere}"], False
|
|
650
|
+
|
|
651
|
+
def _get_utm_zones_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
652
|
+
"""Get UTM zones that intersect with a polygon using deterministic approach.
|
|
653
|
+
|
|
654
|
+
UTM zones are 6° wide, so we can directly compute which zones overlap the bounding box.
|
|
655
|
+
"""
|
|
656
|
+
# Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
|
|
657
|
+
bounds = polygon.bounds
|
|
658
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
659
|
+
|
|
660
|
+
# Calculate all UTM zones that the bounding box could intersect
|
|
661
|
+
min_zone = max(1, int((min_lon + 180) / 6) + 1)
|
|
662
|
+
max_zone = min(60, int((max_lon + 180) / 6) + 1)
|
|
663
|
+
|
|
664
|
+
# Determine if we cross hemispheres
|
|
665
|
+
crosses_equator = min_lat < 0 and max_lat >= 0
|
|
666
|
+
|
|
667
|
+
zones = set()
|
|
668
|
+
for zone in range(min_zone, max_zone + 1):
|
|
669
|
+
# Calculate the longitude bounds of this zone
|
|
670
|
+
zone_min_lon = (zone - 1) * 6 - 180
|
|
671
|
+
zone_max_lon = zone * 6 - 180
|
|
672
|
+
|
|
673
|
+
# Create a box for this zone and check intersection
|
|
674
|
+
if crosses_equator:
|
|
675
|
+
hemispheres = ["N", "S"]
|
|
676
|
+
elif max_lat >= 0:
|
|
677
|
+
hemispheres = ["N"]
|
|
678
|
+
else:
|
|
679
|
+
hemispheres = ["S"]
|
|
680
|
+
|
|
681
|
+
for hemisphere in hemispheres:
|
|
682
|
+
# Create zone bounds
|
|
683
|
+
if hemisphere == "N":
|
|
684
|
+
zone_polygon = Polygon(
|
|
685
|
+
[
|
|
686
|
+
(zone_min_lon, 0),
|
|
687
|
+
(zone_max_lon, 0),
|
|
688
|
+
(zone_max_lon, 84),
|
|
689
|
+
(zone_min_lon, 84),
|
|
690
|
+
(zone_min_lon, 0),
|
|
691
|
+
]
|
|
692
|
+
)
|
|
693
|
+
else:
|
|
694
|
+
zone_polygon = Polygon(
|
|
695
|
+
[
|
|
696
|
+
(zone_min_lon, -80),
|
|
697
|
+
(zone_max_lon, -80),
|
|
698
|
+
(zone_max_lon, 0),
|
|
699
|
+
(zone_min_lon, 0),
|
|
700
|
+
(zone_min_lon, -80),
|
|
701
|
+
]
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
if polygon.intersects(zone_polygon):
|
|
705
|
+
zones.add(f"{zone}{hemisphere}")
|
|
706
|
+
|
|
707
|
+
if not zones:
|
|
708
|
+
# Fallback to centroid
|
|
709
|
+
centroid = polygon.centroid
|
|
710
|
+
lon, lat = centroid.x, centroid.y
|
|
711
|
+
zone = int((lon + 180) / 6) + 1
|
|
712
|
+
hemisphere = "N" if lat >= 0 else "S"
|
|
713
|
+
return [f"{zone}{hemisphere}"], False
|
|
714
|
+
|
|
715
|
+
zone_list = list(zones)
|
|
716
|
+
is_spanning = len(zone_list) > 1
|
|
717
|
+
|
|
718
|
+
return zone_list, is_spanning
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
class SimpleLatLonGrid(GridSystem):
|
|
722
|
+
"""Simple lat/lon grid (degree-based)."""
|
|
723
|
+
|
|
724
|
+
def __init__(self, resolution: int = 1):
|
|
725
|
+
if resolution <= 0:
|
|
726
|
+
raise ValueError(f"LatLon resolution must be positive, got {resolution}")
|
|
727
|
+
self.resolution = resolution # Grid size in degrees
|
|
728
|
+
|
|
729
|
+
def get_global_partition_threshold(
|
|
730
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
731
|
+
) -> int:
|
|
732
|
+
"""Get lat/lon-specific global partition threshold."""
|
|
733
|
+
return default_threshold
|
|
734
|
+
|
|
735
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
736
|
+
"""Get lat/lon grid cells."""
|
|
737
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
738
|
+
return tiles
|
|
739
|
+
|
|
740
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
741
|
+
"""Get lat/lon grid cells and detect spanning."""
|
|
742
|
+
shp = shape(geom)
|
|
743
|
+
|
|
744
|
+
if isinstance(shp, Point):
|
|
745
|
+
lon, lat = shp.x, shp.y
|
|
746
|
+
# Use floor division for correct handling of negative coordinates
|
|
747
|
+
# int(-0.5/1) = 0, but floor(-0.5/1) = -1 which is correct
|
|
748
|
+
grid_lat = math.floor(lat / self.resolution) * self.resolution
|
|
749
|
+
grid_lon = math.floor(lon / self.resolution) * self.resolution
|
|
750
|
+
return [f"lat{grid_lat:+04d}_lon{grid_lon:+04d}"], False
|
|
751
|
+
else:
|
|
752
|
+
return self._get_latlon_cells_for_polygon(shp)
|
|
753
|
+
|
|
754
|
+
def _get_latlon_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
755
|
+
"""Get lat/lon grid cells that intersect with a polygon."""
|
|
756
|
+
# Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
|
|
757
|
+
bounds = polygon.bounds
|
|
758
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
759
|
+
|
|
760
|
+
# Calculate grid cells that intersect with bounding box
|
|
761
|
+
# Use floor division for correct handling of negative coordinates
|
|
762
|
+
min_grid_lat = math.floor(min_lat / self.resolution) * self.resolution
|
|
763
|
+
max_grid_lat = math.floor(max_lat / self.resolution) * self.resolution
|
|
764
|
+
min_grid_lon = math.floor(min_lon / self.resolution) * self.resolution
|
|
765
|
+
max_grid_lon = math.floor(max_lon / self.resolution) * self.resolution
|
|
766
|
+
|
|
767
|
+
# Generate all potential grid cells within bounding box and check intersection directly
|
|
768
|
+
# This avoids the need to parse cell names back to coordinates
|
|
769
|
+
intersecting_cells = []
|
|
770
|
+
lat = min_grid_lat
|
|
771
|
+
while lat <= max_grid_lat:
|
|
772
|
+
lon = min_grid_lon
|
|
773
|
+
while lon <= max_grid_lon:
|
|
774
|
+
# Create a rectangle for this grid cell
|
|
775
|
+
cell_polygon = Polygon(
|
|
776
|
+
[
|
|
777
|
+
(lon, lat),
|
|
778
|
+
(lon + self.resolution, lat),
|
|
779
|
+
(lon + self.resolution, lat + self.resolution),
|
|
780
|
+
(lon, lat + self.resolution),
|
|
781
|
+
(lon, lat),
|
|
782
|
+
]
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
# Check if polygons intersect
|
|
786
|
+
if polygon.intersects(cell_polygon):
|
|
787
|
+
cell = f"lat{lat:+04d}_lon{lon:+04d}"
|
|
788
|
+
intersecting_cells.append(cell)
|
|
789
|
+
lon += self.resolution
|
|
790
|
+
lat += self.resolution
|
|
791
|
+
|
|
792
|
+
if not intersecting_cells:
|
|
793
|
+
# Fallback to centroid
|
|
794
|
+
centroid = polygon.centroid
|
|
795
|
+
lon, lat = centroid.x, centroid.y
|
|
796
|
+
grid_lat = math.floor(lat / self.resolution) * self.resolution
|
|
797
|
+
grid_lon = math.floor(lon / self.resolution) * self.resolution
|
|
798
|
+
return [f"lat{grid_lat:+04d}_lon{grid_lon:+04d}"], False
|
|
799
|
+
|
|
800
|
+
is_spanning = len(intersecting_cells) > 1
|
|
801
|
+
return intersecting_cells, is_spanning
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
class ITSLiveGridSystem(GridSystem):
|
|
805
|
+
"""ITS_LIVE center-based 10°×10° grid system with specific naming convention.
|
|
806
|
+
|
|
807
|
+
The ITS_LIVE grid system uses a center-based approach where each 10°×10° cell
|
|
808
|
+
is named using the coordinates of its center point. For example, a cell centered
|
|
809
|
+
at 60°N, 40°W would be named "N60W040".
|
|
810
|
+
|
|
811
|
+
Key Features:
|
|
812
|
+
- Fixed 10°×10° grid cells globally
|
|
813
|
+
- Center-based cell identification
|
|
814
|
+
- Specific naming convention: {N|S}{lat:02d}{E|W}{lon:03d}
|
|
815
|
+
- Compatible with ITS_LIVE data organization standards
|
|
816
|
+
|
|
817
|
+
Grid Cell Naming:
|
|
818
|
+
- Latitude: N for positive, S for negative, 2-digit absolute value
|
|
819
|
+
- Longitude: E for positive, W for negative, 3-digit absolute value
|
|
820
|
+
- Examples: "N60W040", "S10E175", "N00E000"
|
|
821
|
+
|
|
822
|
+
Use Cases:
|
|
823
|
+
- ITS_LIVE velocity and displacement datasets
|
|
824
|
+
- Global ice sheet and glacier monitoring data
|
|
825
|
+
- Compatible with existing ITS_LIVE search patterns
|
|
826
|
+
|
|
827
|
+
Example:
|
|
828
|
+
>>> grid = ITSLiveGridSystem()
|
|
829
|
+
>>> tiles = grid.tiles_for_geometry(greenland_geometry)
|
|
830
|
+
>>> print(tiles) # ['N70W040', 'N70W030', 'N60W040', ...]
|
|
831
|
+
"""
|
|
832
|
+
|
|
833
|
+
def __init__(self):
|
|
834
|
+
"""Initialize ITS_LIVE grid system with fixed 10-degree resolution."""
|
|
835
|
+
self.resolution = 10 # Fixed 10-degree grid
|
|
836
|
+
|
|
837
|
+
def get_global_partition_threshold(
|
|
838
|
+
self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
|
|
839
|
+
) -> int:
|
|
840
|
+
"""Get ITS_LIVE-specific global partition threshold."""
|
|
841
|
+
return default_threshold
|
|
842
|
+
|
|
843
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
844
|
+
"""Get ITS_LIVE grid cells that intersect with geometry."""
|
|
845
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
846
|
+
return tiles
|
|
847
|
+
|
|
848
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
849
|
+
"""Get ITS_LIVE grid cells and detect spanning."""
|
|
850
|
+
shp = shape(geom)
|
|
851
|
+
|
|
852
|
+
if isinstance(shp, Point):
|
|
853
|
+
lat, lon = shp.y, shp.x
|
|
854
|
+
# Find center coordinates for the cell containing this point
|
|
855
|
+
# For ITS_LIVE: cell centers are at multiples of 10 (e.g., -40, -30, -20, etc.)
|
|
856
|
+
# A point at (-40, 60) should be in the cell centered at (-40, 60) = "N60W040"
|
|
857
|
+
# The cell spans from (-45, 55) to (-35, 65)
|
|
858
|
+
|
|
859
|
+
# Calculate which cell center this point belongs to
|
|
860
|
+
# Round to nearest multiple of 10 for center coordinates
|
|
861
|
+
lat_center = round(lat / 10.0) * 10
|
|
862
|
+
lon_center = round(lon / 10.0) * 10
|
|
863
|
+
name = self._format_cell_name(lat_center, lon_center)
|
|
864
|
+
return [name], False
|
|
865
|
+
else:
|
|
866
|
+
return self._get_itslive_cells_for_polygon(shp)
|
|
867
|
+
|
|
868
|
+
def _get_itslive_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
|
|
869
|
+
"""Get ITS_LIVE grid cells that intersect with a polygon.
|
|
870
|
+
|
|
871
|
+
Uses the same center-based logic as point queries to ensure consistency.
|
|
872
|
+
ITS_LIVE cells are 10°×10° with centers at multiples of 10 degrees.
|
|
873
|
+
"""
|
|
874
|
+
import math
|
|
875
|
+
|
|
876
|
+
from shapely.geometry import box
|
|
877
|
+
|
|
878
|
+
if not polygon.is_valid:
|
|
879
|
+
polygon = polygon.buffer(0)
|
|
880
|
+
|
|
881
|
+
minx, miny, maxx, maxy = polygon.bounds
|
|
882
|
+
|
|
883
|
+
# Calculate the range of cell centers that could intersect the polygon
|
|
884
|
+
# For center-based grid: cell center determines the cell name
|
|
885
|
+
# Each cell spans center ± 5 degrees
|
|
886
|
+
|
|
887
|
+
# Find minimum and maximum cell centers that could intersect
|
|
888
|
+
# Cell centers are at multiples of 10: ..., -20, -10, 0, 10, 20, ...
|
|
889
|
+
min_lat_center = math.floor((miny + 5) / 10.0) * 10 # Add 5 because cell extends center-5 to center+5
|
|
890
|
+
max_lat_center = math.ceil((maxy - 5) / 10.0) * 10 # Subtract 5 for same reason
|
|
891
|
+
min_lon_center = math.floor((minx + 5) / 10.0) * 10
|
|
892
|
+
max_lon_center = math.ceil((maxx - 5) / 10.0) * 10
|
|
893
|
+
|
|
894
|
+
grids = set()
|
|
895
|
+
|
|
896
|
+
# Iterate through all possible cell centers in the range
|
|
897
|
+
lat_center = min_lat_center
|
|
898
|
+
while lat_center <= max_lat_center:
|
|
899
|
+
lon_center = min_lon_center
|
|
900
|
+
while lon_center <= max_lon_center:
|
|
901
|
+
# Create tile bounds: center ± 5 degrees
|
|
902
|
+
tile = box(lon_center - 5, lat_center - 5, lon_center + 5, lat_center + 5)
|
|
903
|
+
if polygon.intersects(tile):
|
|
904
|
+
name = self._format_cell_name(lat_center, lon_center)
|
|
905
|
+
grids.add(name)
|
|
906
|
+
lon_center += 10
|
|
907
|
+
lat_center += 10
|
|
908
|
+
|
|
909
|
+
grid_list = list(grids) if grids else []
|
|
910
|
+
|
|
911
|
+
if not grid_list:
|
|
912
|
+
# Fallback to centroid using same logic as point queries
|
|
913
|
+
centroid = polygon.centroid
|
|
914
|
+
lat, lon = centroid.y, centroid.x
|
|
915
|
+
lat_center = round(lat / 10.0) * 10
|
|
916
|
+
lon_center = round(lon / 10.0) * 10
|
|
917
|
+
name = self._format_cell_name(lat_center, lon_center)
|
|
918
|
+
return [name], False
|
|
919
|
+
|
|
920
|
+
is_spanning = len(grid_list) > 1
|
|
921
|
+
return grid_list, is_spanning
|
|
922
|
+
|
|
923
|
+
def _format_cell_name(self, lat_center: float, lon_center: float) -> str:
|
|
924
|
+
"""Format cell name using ITS_LIVE convention: {N|S}{lat:02d}{E|W}{lon:03d}."""
|
|
925
|
+
lat_prefix = f"N{abs(int(lat_center)):02d}" if lat_center >= 0 else f"S{abs(int(lat_center)):02d}"
|
|
926
|
+
lon_prefix = f"E{abs(int(lon_center)):03d}" if lon_center >= 0 else f"W{abs(int(lon_center)):03d}"
|
|
927
|
+
return f"{lat_prefix}{lon_prefix}"
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
class GeoJSONGridSystem(GridSystem):
|
|
931
|
+
"""Generic grid system based on custom GeoJSON tiles."""
|
|
932
|
+
|
|
933
|
+
def __init__(self, geojson_path: str):
|
|
934
|
+
"""Initialize with path to GeoJSON file containing tile geometries."""
|
|
935
|
+
self.geojson_path = geojson_path
|
|
936
|
+
self.tiles = self._load_geojson_tiles()
|
|
937
|
+
|
|
938
|
+
def _load_geojson_tiles(self) -> dict[str, Polygon]:
|
|
939
|
+
"""Load tiles from GeoJSON file."""
|
|
940
|
+
try:
|
|
941
|
+
with open(self.geojson_path) as f:
|
|
942
|
+
geojson_data = json.load(f)
|
|
943
|
+
except FileNotFoundError as err:
|
|
944
|
+
raise FileNotFoundError(f"GeoJSON file not found: {self.geojson_path}") from err
|
|
945
|
+
except json.JSONDecodeError as e:
|
|
946
|
+
raise ValueError(f"Invalid GeoJSON file: {e}") from e
|
|
947
|
+
|
|
948
|
+
tiles: dict[str, Polygon] = {}
|
|
949
|
+
|
|
950
|
+
if geojson_data.get("type") == "FeatureCollection":
|
|
951
|
+
features = geojson_data.get("features", [])
|
|
952
|
+
elif geojson_data.get("type") == "Feature":
|
|
953
|
+
features = [geojson_data]
|
|
954
|
+
else:
|
|
955
|
+
raise ValueError("GeoJSON must be a Feature or FeatureCollection")
|
|
956
|
+
|
|
957
|
+
for feature in features:
|
|
958
|
+
if feature.get("type") != "Feature":
|
|
959
|
+
continue
|
|
960
|
+
|
|
961
|
+
geometry = feature.get("geometry")
|
|
962
|
+
properties = feature.get("properties", {})
|
|
963
|
+
|
|
964
|
+
if not geometry or geometry.get("type") not in ["Polygon", "MultiPolygon"]:
|
|
965
|
+
continue
|
|
966
|
+
|
|
967
|
+
# Use 'id' from properties, feature id, or generate one
|
|
968
|
+
tile_id = (
|
|
969
|
+
properties.get("id")
|
|
970
|
+
or properties.get("tile_id")
|
|
971
|
+
or properties.get("name")
|
|
972
|
+
or feature.get("id")
|
|
973
|
+
or f"tile_{len(tiles)}"
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
try:
|
|
977
|
+
shp = shape(geometry)
|
|
978
|
+
if isinstance(shp, Polygon):
|
|
979
|
+
tiles[str(tile_id)] = shp
|
|
980
|
+
elif hasattr(shp, "geoms"): # MultiPolygon-like
|
|
981
|
+
try:
|
|
982
|
+
geoms = getattr(shp, "geoms", [])
|
|
983
|
+
for i, geom in enumerate(geoms):
|
|
984
|
+
if isinstance(geom, Polygon):
|
|
985
|
+
tiles[f"{tile_id}_{i}"] = geom
|
|
986
|
+
except (AttributeError, TypeError):
|
|
987
|
+
# Skip if geoms attribute is not accessible
|
|
988
|
+
pass
|
|
989
|
+
except Exception as e:
|
|
990
|
+
raise ValueError(f"Error processing geometry for tile {tile_id}: {e}") from e
|
|
991
|
+
|
|
992
|
+
if not tiles:
|
|
993
|
+
raise ValueError("No valid polygon geometries found in GeoJSON")
|
|
994
|
+
|
|
995
|
+
return tiles
|
|
996
|
+
|
|
997
|
+
def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
|
|
998
|
+
"""Get tile IDs that intersect with geometry."""
|
|
999
|
+
tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
|
|
1000
|
+
return tiles
|
|
1001
|
+
|
|
1002
|
+
def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
|
|
1003
|
+
"""Get tile IDs and detect if geometry spans multiple tiles."""
|
|
1004
|
+
shp = shape(geom)
|
|
1005
|
+
intersecting_tiles = []
|
|
1006
|
+
|
|
1007
|
+
for tile_id, tile_polygon in self.tiles.items():
|
|
1008
|
+
if shp.intersects(tile_polygon):
|
|
1009
|
+
intersecting_tiles.append(tile_id)
|
|
1010
|
+
|
|
1011
|
+
if not intersecting_tiles:
|
|
1012
|
+
# Fallback: find nearest tile by centroid distance
|
|
1013
|
+
centroid = shp.centroid
|
|
1014
|
+
nearest_tile = min(self.tiles.items(), key=lambda item: centroid.distance(item[1].centroid))
|
|
1015
|
+
return [nearest_tile[0]], False
|
|
1016
|
+
|
|
1017
|
+
is_spanning = len(intersecting_tiles) > 1
|
|
1018
|
+
return intersecting_tiles, is_spanning
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def get_grid_system(name: str, resolution: int = 1, geojson_path: str | None = None) -> GridSystem:
|
|
1022
|
+
"""Factory function to create grid system instances by name with optimized defaults.
|
|
1023
|
+
|
|
1024
|
+
This is the primary entry point for creating spatial grid systems in EarthCatalog.
|
|
1025
|
+
Supports multiple grid types optimized for different geographical regions and use cases.
|
|
1026
|
+
All grid systems provide consistent spatial partitioning interfaces for efficient
|
|
1027
|
+
catalog organization and querying.
|
|
1028
|
+
|
|
1029
|
+
Supported Grid Systems:
|
|
1030
|
+
- 'h3': Uber's H3 hexagonal grid (global, efficient for most use cases)
|
|
1031
|
+
- 's2': Google's S2 spherical geometry (global, good for polar regions)
|
|
1032
|
+
- 'mgrs': Military Grid Reference System (global, standard for defense/gov)
|
|
1033
|
+
- 'utm': Universal Transverse Mercator (zoned, high accuracy for local areas)
|
|
1034
|
+
- 'latlon': Simple latitude/longitude grid (basic, good for small datasets)
|
|
1035
|
+
- 'itslive': ITS_LIVE center-based 10°×10° grid (glacier/ice sheet datasets)
|
|
1036
|
+
- 'geojson': Custom polygon-based partitioning (flexible, user-defined)
|
|
1037
|
+
|
|
1038
|
+
Performance Characteristics:
|
|
1039
|
+
- H3: Best overall performance for global datasets
|
|
1040
|
+
- S2: Excellent for high-latitude regions and spherical accuracy
|
|
1041
|
+
- UTM: Optimal for regional datasets with high coordinate precision
|
|
1042
|
+
- GeoJSON: Most flexible but requires careful polygon design
|
|
1043
|
+
|
|
1044
|
+
Args:
|
|
1045
|
+
name: Grid system identifier (case-insensitive). Must be one of the supported
|
|
1046
|
+
grid system names listed above.
|
|
1047
|
+
resolution: Grid resolution level where applicable. Meaning varies by system:
|
|
1048
|
+
- H3: 0-15 (0=large hexagons, 15=small hexagons)
|
|
1049
|
+
- S2: 0-30 (0=large cells, 30=small cells)
|
|
1050
|
+
- UTM: Grid spacing in meters (default 1000m)
|
|
1051
|
+
- MGRS: Precision level (1-5, where 5=1m precision)
|
|
1052
|
+
- LatLon: Grid cell size in degrees (default 1.0°)
|
|
1053
|
+
- ITSLive: Ignored (fixed 10° resolution)
|
|
1054
|
+
- GeoJSON: Ignored (resolution determined by polygon geometry)
|
|
1055
|
+
geojson_path: Path to GeoJSON file containing polygon features for custom
|
|
1056
|
+
partitioning. Required only when name='geojson'. Each feature becomes
|
|
1057
|
+
a spatial partition with its 'id' property as the partition key.
|
|
1058
|
+
|
|
1059
|
+
Returns:
|
|
1060
|
+
GridSystem: Configured grid system instance ready for spatial operations.
|
|
1061
|
+
All instances implement the same GridSystem interface for consistent
|
|
1062
|
+
usage across different partitioning strategies.
|
|
1063
|
+
|
|
1064
|
+
Raises:
|
|
1065
|
+
ValueError: If the grid system name is not recognized or if required
|
|
1066
|
+
parameters are missing (e.g., geojson_path for geojson grid).
|
|
1067
|
+
ImportError: If required dependencies for specific grid systems are not
|
|
1068
|
+
installed (e.g., h3 package for H3 grid system).
|
|
1069
|
+
|
|
1070
|
+
Example:
|
|
1071
|
+
>>> # Create H3 grid with resolution 6 (good for country-level datasets)
|
|
1072
|
+
>>> grid = get_grid_system('h3', resolution=6)
|
|
1073
|
+
>>>
|
|
1074
|
+
>>> # Create S2 grid for polar region analysis
|
|
1075
|
+
>>> grid = get_grid_system('s2', resolution=12)
|
|
1076
|
+
>>>
|
|
1077
|
+
>>> # Create custom polygon-based partitioning
|
|
1078
|
+
>>> grid = get_grid_system('geojson', geojson_path='custom_regions.geojson')
|
|
1079
|
+
>>>
|
|
1080
|
+
>>> # Use grid system for spatial partitioning
|
|
1081
|
+
>>> tiles = grid.tiles_for_geometry(feature_geometry)
|
|
1082
|
+
>>> print(f"Geometry intersects {len(tiles)} grid tiles")
|
|
1083
|
+
|
|
1084
|
+
Note:
|
|
1085
|
+
Grid system choice significantly impacts query performance and storage
|
|
1086
|
+
efficiency. H3 is recommended for most global applications, while UTM
|
|
1087
|
+
is preferred for high-precision regional analysis.
|
|
1088
|
+
"""
|
|
1089
|
+
systems = {
|
|
1090
|
+
"h3": H3GridSystem,
|
|
1091
|
+
"s2": S2GridSystem,
|
|
1092
|
+
"mgrs": MGRSGridSystem,
|
|
1093
|
+
"utm": UTMGridSystem,
|
|
1094
|
+
"latlon": SimpleLatLonGrid,
|
|
1095
|
+
"itslive": ITSLiveGridSystem,
|
|
1096
|
+
"geojson": GeoJSONGridSystem,
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
if name.lower() not in systems:
|
|
1100
|
+
raise ValueError(f"Unknown grid system: {name}. Available: {', '.join(systems.keys())}")
|
|
1101
|
+
|
|
1102
|
+
if name.lower() == "geojson":
|
|
1103
|
+
if not geojson_path:
|
|
1104
|
+
raise ValueError("geojson_path is required for GeoJSON grid system")
|
|
1105
|
+
result: GridSystem = systems[name.lower()](geojson_path)
|
|
1106
|
+
elif name.lower() == "itslive":
|
|
1107
|
+
# ITSLive has fixed 10-degree resolution
|
|
1108
|
+
result: GridSystem = systems[name.lower()]() # type: ignore
|
|
1109
|
+
else:
|
|
1110
|
+
# Use type: ignore to bypass mypy's abstract class checking
|
|
1111
|
+
# The concrete classes do implement the abstract methods
|
|
1112
|
+
result: GridSystem = systems[name.lower()](resolution) # type: ignore
|
|
1113
|
+
|
|
1114
|
+
return result
|