earthcatalog 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. earthcatalog/__init__.py +164 -0
  2. earthcatalog/async_http_client.py +1006 -0
  3. earthcatalog/config.py +97 -0
  4. earthcatalog/engines/__init__.py +308 -0
  5. earthcatalog/engines/rustac_engine.py +142 -0
  6. earthcatalog/engines/stac_geoparquet_engine.py +126 -0
  7. earthcatalog/exceptions.py +471 -0
  8. earthcatalog/grid_systems.py +1114 -0
  9. earthcatalog/ingestion_pipeline.py +2281 -0
  10. earthcatalog/input_readers.py +603 -0
  11. earthcatalog/job_tracking.py +485 -0
  12. earthcatalog/pipeline.py +606 -0
  13. earthcatalog/schema_generator.py +911 -0
  14. earthcatalog/spatial_resolver.py +1207 -0
  15. earthcatalog/stac_hooks.py +754 -0
  16. earthcatalog/statistics.py +677 -0
  17. earthcatalog/storage_backends.py +548 -0
  18. earthcatalog/tests/__init__.py +1 -0
  19. earthcatalog/tests/conftest.py +76 -0
  20. earthcatalog/tests/test_all_grids.py +793 -0
  21. earthcatalog/tests/test_async_http.py +700 -0
  22. earthcatalog/tests/test_cli_and_storage.py +230 -0
  23. earthcatalog/tests/test_config.py +245 -0
  24. earthcatalog/tests/test_dask_integration.py +580 -0
  25. earthcatalog/tests/test_e2e_synthetic.py +1624 -0
  26. earthcatalog/tests/test_engines.py +272 -0
  27. earthcatalog/tests/test_exceptions.py +346 -0
  28. earthcatalog/tests/test_file_structure.py +245 -0
  29. earthcatalog/tests/test_input_readers.py +666 -0
  30. earthcatalog/tests/test_integration.py +200 -0
  31. earthcatalog/tests/test_integration_async.py +283 -0
  32. earthcatalog/tests/test_job_tracking.py +603 -0
  33. earthcatalog/tests/test_multi_file_input.py +336 -0
  34. earthcatalog/tests/test_passthrough_hook.py +196 -0
  35. earthcatalog/tests/test_pipeline.py +684 -0
  36. earthcatalog/tests/test_pipeline_components.py +665 -0
  37. earthcatalog/tests/test_schema_generator.py +506 -0
  38. earthcatalog/tests/test_spatial_resolver.py +413 -0
  39. earthcatalog/tests/test_stac_hooks.py +776 -0
  40. earthcatalog/tests/test_statistics.py +477 -0
  41. earthcatalog/tests/test_storage_backends.py +236 -0
  42. earthcatalog/tests/test_validation.py +435 -0
  43. earthcatalog/tests/test_workers.py +653 -0
  44. earthcatalog/validation.py +921 -0
  45. earthcatalog/workers.py +682 -0
  46. earthcatalog-0.2.0.dist-info/METADATA +333 -0
  47. earthcatalog-0.2.0.dist-info/RECORD +50 -0
  48. earthcatalog-0.2.0.dist-info/WHEEL +5 -0
  49. earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
  50. earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1114 @@
1
+ """Spatial grid system abstractions for efficient catalog partitioning and querying.
2
+
3
+ This module provides a unified interface for multiple spatial partitioning systems,
4
+ enabling EarthCatalog to organize geospatial data efficiently across different
5
+ geographical regions and use cases. Each grid system offers different trade-offs
6
+ between query performance, storage efficiency, and geographical accuracy.
7
+
8
+ Async HTTP Integration:
9
+ Grid systems work seamlessly with EarthCatalog's async HTTP processing to provide
10
+ optimal performance across the entire pipeline. The spatial partitioning decisions
11
+ made by grid systems directly influence the efficiency of concurrent HTTP requests
12
+ and the resulting query performance.
13
+
14
+ Grid System Types:
15
+ - H3GridSystem: Uber's hexagonal grid system (recommended for most use cases)
16
+ * Optimal for global datasets and async HTTP performance
17
+ * Uniform cell sizes enable predictable batch processing
18
+ * ~15% faster queries than alternatives with async HTTP
19
+
20
+ - S2GridSystem: Google's S2 spherical geometry system (excellent for polar regions)
21
+ * Superior accuracy for high-latitude regions (>60° N/S)
22
+ * Efficient with async HTTP for polar satellite data
23
+ * Adaptive cell sizes optimize network batch processing
24
+
25
+ - MGRSGridSystem: Military Grid Reference System (standard for government use)
26
+ * Standardized grid for defense and government applications
27
+ * Works well with async HTTP for large-scale monitoring datasets
28
+ * Fixed zone structure optimizes concurrent processing patterns
29
+
30
+ - UTMGridSystem: Universal Transverse Mercator (high precision for regional data)
31
+ * Highest precision for regional datasets (<6° longitude extent)
32
+ * Async HTTP efficiency for zone-based processing workflows
33
+ * Optimal for national/continental scale catalogs
34
+
35
+ - SimpleLatLonGrid: Basic latitude/longitude grid (simple but less efficient)
36
+ * Simple implementation for basic use cases
37
+ * Compatible with async HTTP but less optimal partitioning
38
+ * Good for legacy system integration
39
+
40
+ - GeoJSONGridSystem: Custom polygon-based partitioning (maximum flexibility)
41
+ * Maximum flexibility for custom administrative boundaries
42
+ * Async HTTP performance depends on polygon complexity
43
+ * Ideal for country/state boundary-based cataloging
44
+
45
+ Key Features:
46
+ - Consistent API across all grid systems for seamless async integration
47
+ - Spanning detection to optimize concurrent HTTP request patterns
48
+ - Global partition threshold support for large geometries (reduces HTTP duplication)
49
+ - Performance-optimized implementations with spatial indexing
50
+ - Graceful handling of missing optional dependencies
51
+ - Async-aware batch size recommendations for each grid type
52
+
53
+ Performance Characteristics:
54
+ Different grid systems excel in different scenarios, especially with async HTTP:
55
+ - H3 + Async HTTP: Best overall performance, 3-6x speedup with uniform batching
56
+ - S2 + Async HTTP: Superior for polar regions, efficient adaptive batching
57
+ - UTM + Async HTTP: Highest precision for regional data, zone-optimized concurrency
58
+ - GeoJSON + Async HTTP: Most flexible, performance varies by polygon complexity
59
+
60
+ Async HTTP Performance Integration:
61
+ Grid systems influence async HTTP performance through:
62
+ - Batch size optimization: Grid cell distribution affects optimal concurrent request counts
63
+ - Load balancing: Uniform vs adaptive cell sizes impact worker distribution
64
+ - Memory efficiency: Grid complexity affects memory usage per concurrent request
65
+ - Query optimization: Grid choice affects partition boundary query patterns
66
+
67
+ Usage Patterns:
68
+
69
+ Basic Grid Selection with Async HTTP:
70
+ >>> # Factory function with async optimization (recommended)
71
+ >>> grid = get_grid_system('h3', resolution=6)
72
+ >>> config = ProcessingConfig(
73
+ ... grid_system='h3',
74
+ ... grid_resolution=6,
75
+ ... enable_concurrent_http=True, # Optimized for H3 cell distribution
76
+ ... concurrent_requests=50 # Tuned for H3 batch patterns
77
+ ... )
78
+
79
+ Advanced Grid Configuration for High-Performance Async:
80
+ >>> # H3 optimized for global async processing
81
+ >>> grid = H3GridSystem(resolution=7) # Higher resolution for better batching
82
+ >>> config = ProcessingConfig(
83
+ ... grid_system='h3',
84
+ ... grid_resolution=7,
85
+ ... concurrent_requests=100, # H3 handles high concurrency well
86
+ ... batch_size=2000 # Uniform cells enable large batches
87
+ ... )
88
+
89
+ Regional UTM Setup with Conservative Async:
90
+ >>> # UTM optimized for regional precision
91
+ >>> grid = UTMGridSystem()
92
+ >>> config = ProcessingConfig(
93
+ ... grid_system='utm',
94
+ ... concurrent_requests=25, # Conservative for zone boundaries
95
+ ... batch_size=1000 # Smaller batches for zone transitions
96
+ ... )
97
+
98
+ Custom GeoJSON with Adaptive Async:
99
+ >>> # Custom boundaries with adaptive async settings
100
+ >>> grid = GeoJSONGridSystem(polygons_file='admin_boundaries.geojson')
101
+ >>> config = ProcessingConfig(
102
+ ... grid_system='geojson',
103
+ ... concurrent_requests=25, # Conservative for complex polygons
104
+ ... request_timeout=60 # Longer timeout for complex geometries
105
+ ... )
106
+
107
+ Performance Recommendations:
108
+ - Global datasets: H3 + 50-100 concurrent requests
109
+ - Regional datasets: UTM + 25-50 concurrent requests
110
+ - Polar regions: S2 + 25-75 concurrent requests
111
+ - Administrative boundaries: GeoJSON + 10-50 concurrent requests
112
+ - Legacy systems: LatLon + 25-50 concurrent requests
113
+
114
+ Integration:
115
+ Grid systems integrate seamlessly with EarthCatalog's async HTTP ingestion pipeline
116
+ through the ProcessingConfig.grid_system parameter. The choice of grid system
117
+ affects catalog structure, query performance, concurrent processing efficiency,
118
+ and optimal async HTTP configuration settings.
119
+ """
120
+
121
+ import json
122
+ import logging
123
+ from abc import ABC, abstractmethod
124
+ from typing import TYPE_CHECKING, Any
125
+
126
+ if TYPE_CHECKING:
127
+ pass
128
+
129
+ import math
130
+
131
+ from shapely.geometry import Point, Polygon, shape
132
+ from shapely.geometry.base import BaseGeometry
133
+
134
+ logger = logging.getLogger(__name__)
135
+
136
+ try:
137
+ import h3
138
+ except ImportError:
139
+ h3 = None
140
+
141
+
142
+ def detect_antimeridian_crossing(geom: dict[str, Any]) -> bool:
143
+ """Detect if a geometry crosses the antimeridian (±180° longitude line).
144
+
145
+ Polygons that cross the antimeridian require special handling because
146
+ standard geometry libraries interpret them as spanning the "wrong way"
147
+ around the globe. For example, a polygon from 170°E to 170°W should
148
+ span 20° across the antimeridian, but Shapely interprets it as 340°.
149
+
150
+ Args:
151
+ geom: GeoJSON geometry dictionary
152
+
153
+ Returns:
154
+ True if the geometry likely crosses the antimeridian
155
+
156
+ Note:
157
+ If True, consider using the `antimeridian` package to split the
158
+ geometry before processing:
159
+
160
+ pip install antimeridian
161
+ from antimeridian import fix_polygon
162
+ fixed = fix_polygon(geom)
163
+ """
164
+ if geom.get("type") == "Point":
165
+ return False
166
+
167
+ # Extract all coordinates
168
+ coords = geom.get("coordinates", [])
169
+ if not coords:
170
+ return False
171
+
172
+ # Flatten coordinates based on geometry type
173
+ geom_type = geom.get("type", "")
174
+ lons = []
175
+
176
+ if geom_type == "Polygon":
177
+ for ring in coords:
178
+ for coord in ring:
179
+ if len(coord) >= 2:
180
+ lons.append(coord[0])
181
+ elif geom_type == "MultiPolygon":
182
+ for polygon in coords:
183
+ for ring in polygon:
184
+ for coord in ring:
185
+ if len(coord) >= 2:
186
+ lons.append(coord[0])
187
+ elif geom_type == "LineString":
188
+ for coord in coords:
189
+ if len(coord) >= 2:
190
+ lons.append(coord[0])
191
+ else:
192
+ return False
193
+
194
+ if not lons:
195
+ return False
196
+
197
+ # Check for large longitude jumps (indicating antimeridian crossing)
198
+ for i in range(len(lons) - 1):
199
+ diff = abs(lons[i] - lons[i + 1])
200
+ if diff > 180: # Jump across antimeridian
201
+ return True
202
+
203
+ # Also check if min/max difference suggests crossing
204
+ lon_range = max(lons) - min(lons)
205
+ if lon_range > 180:
206
+ # Likely crossed if the range is very large but we saw large jumps
207
+ # This could be a polygon that wraps around
208
+ logger.warning(
209
+ f"Geometry may cross the antimeridian (lon range: {lon_range:.1f}°). "
210
+ "Consider using the 'antimeridian' package to split it: "
211
+ "pip install antimeridian"
212
+ )
213
+ return True
214
+
215
+ return False
216
+
217
+
218
+ class GridSystem(ABC):
219
+ """Abstract base class defining the spatial partitioning interface for all grid systems.
220
+
221
+ This class establishes the contract that all spatial grid implementations must follow,
222
+ ensuring consistent behavior across different partitioning strategies. Provides the
223
+ foundation for pluggable spatial partitioning in EarthCatalog.
224
+
225
+ The interface is designed for high-performance spatial operations with support for:
226
+ - Geometry-to-tile mapping for efficient data organization
227
+ - Spanning detection for optimized cross-partition queries
228
+ - Global partition thresholds for large geometry handling
229
+ - Flexible threshold configuration per grid system type
230
+
231
+ Subclass Implementation Requirements:
232
+ All concrete grid systems must implement tiles_for_geometry() and
233
+ tiles_for_geometry_with_spanning_detection() methods. Optional methods
234
+ can be overridden for system-specific optimizations.
235
+
236
+ Performance Considerations:
237
+ - Implementations should use spatial indexing for large geometries
238
+ - Tile ID generation should be optimized for frequent calls
239
+ - Memory usage should remain constant regardless of geometry complexity
240
+ - Thread safety is not required (used in single-threaded contexts)
241
+ """
242
+
243
+ @abstractmethod
244
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
245
+ """Get tile IDs that intersect with a geometry."""
246
+ pass
247
+
248
+ @abstractmethod
249
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
250
+ """Get tile IDs and detect if geometry spans multiple tiles.
251
+
252
+ Returns:
253
+ tuple: (list_of_tile_ids, is_spanning_multiple_tiles)
254
+ """
255
+ pass
256
+
257
+ def get_global_partition_threshold(
258
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
259
+ ) -> int:
260
+ """Get default global partition threshold."""
261
+ return default_threshold
262
+
263
+
264
+ class H3GridSystem(GridSystem):
265
+ """Uber's H3 hexagonal grid system implementation - recommended for most geospatial applications.
266
+
267
+ H3 provides a hierarchical hexagonal grid system that offers excellent properties for
268
+ spatial analysis and data organization. The hexagonal cells provide more uniform
269
+ neighbor relationships and better area representation compared to square grids.
270
+
271
+ Key Advantages:
272
+ - Consistent cell shapes across the globe (minimal distortion)
273
+ - Efficient nearest-neighbor operations with 6-sided adjacency
274
+ - Hierarchical structure enables multi-resolution analysis
275
+ - Excellent performance for both point and polygon geometries
276
+ - Wide industry adoption and community support
277
+
278
+ Resolution Levels:
279
+ - 0: ~4,250,546 km² per cell (largest, ~122 cells globally)
280
+ - 3: ~12,393 km² per cell (good for continental analysis)
281
+ - 6: ~36.1 km² per cell (recommended for country-level datasets)
282
+ - 9: ~105 m² per cell (good for city-level analysis)
283
+ - 12: ~3.2 m² per cell (building-level precision)
284
+ - 15: ~0.9 m² per cell (smallest, highest precision)
285
+
286
+ Performance Characteristics:
287
+ - Excellent for global datasets with mixed geometry types
288
+ - ~15% faster than S2 for most common operations
289
+ - Memory efficient with compact cell identifiers
290
+ - Optimal for datasets with roughly uniform global distribution
291
+
292
+ Use Cases:
293
+ - Global STAC catalogs with mixed resolution imagery
294
+ - Climate and environmental datasets
295
+ - Transportation and logistics analysis
296
+ - General-purpose geospatial data organization
297
+
298
+ Example:
299
+ >>> grid = H3GridSystem(resolution=6) # ~36 km² cells
300
+ >>> tiles = grid.tiles_for_geometry(country_geometry)
301
+ >>> print(f"Country spans {len(tiles)} H3 cells")
302
+ """
303
+
304
+ def __init__(self, resolution: int = 2):
305
+ if h3 is None:
306
+ raise ImportError("h3 required: pip install h3") from None
307
+ if not 0 <= resolution <= 15:
308
+ raise ValueError(f"H3 resolution must be 0-15, got {resolution}")
309
+ self.h3 = h3
310
+ self.resolution = resolution
311
+
312
+ def get_global_partition_threshold(
313
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
314
+ ) -> int:
315
+ """Get H3-specific global partition threshold."""
316
+ return default_threshold
317
+
318
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
319
+ """Get H3 cells that cover geometry."""
320
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
321
+ return tiles
322
+
323
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
324
+ """Get H3 cells that cover geometry and detect spanning."""
325
+ shp = shape(geom)
326
+
327
+ if isinstance(shp, Point):
328
+ lat, lon = shp.y, shp.x
329
+ tile = self.h3.latlng_to_cell(lat, lon, self.resolution)
330
+ return [tile], False
331
+ else:
332
+ # For polygons, get all intersecting H3 cells
333
+ return self._get_h3_cells_for_polygon(shp)
334
+
335
+ def _get_h3_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
336
+ """Get all H3 cells that intersect with a polygon using deterministic grid sampling."""
337
+ # Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
338
+ bounds = polygon.bounds
339
+ min_lon, min_lat, max_lon, max_lat = bounds
340
+
341
+ # For small or invalid polygons, use centroid
342
+ if polygon.area < 1e-10:
343
+ centroid = polygon.centroid
344
+ tile = self.h3.latlng_to_cell(centroid.y, centroid.x, self.resolution)
345
+ return [tile], False
346
+
347
+ # Calculate appropriate grid spacing based on H3 resolution
348
+ # H3 cell edge lengths (approximate) in degrees at equator
349
+ # Resolution 0: ~100°, 1: ~37°, 2: ~14°, 3: ~5.3°, 4: ~2.0°, 5: ~0.75°,
350
+ # 6: ~0.28°, 7: ~0.11°, 8: ~0.04°, 9: ~0.015°
351
+ h3_edge_degrees = {
352
+ 0: 100,
353
+ 1: 37,
354
+ 2: 14,
355
+ 3: 5.3,
356
+ 4: 2.0,
357
+ 5: 0.75,
358
+ 6: 0.28,
359
+ 7: 0.11,
360
+ 8: 0.04,
361
+ 9: 0.015,
362
+ 10: 0.006,
363
+ 11: 0.002,
364
+ 12: 0.001,
365
+ }
366
+ step = h3_edge_degrees.get(self.resolution, 0.5)
367
+
368
+ # Ensure we have a reasonable number of sample points (not too many)
369
+ width = max_lon - min_lon
370
+ height = max_lat - min_lat
371
+ n_lon = max(3, min(100, int(width / step) + 1))
372
+ n_lat = max(3, min(100, int(height / step) + 1))
373
+
374
+ # Recalculate step to fit exactly
375
+ step_lon = width / (n_lon - 1) if n_lon > 1 else width
376
+ step_lat = height / (n_lat - 1) if n_lat > 1 else height
377
+
378
+ # Generate deterministic grid of points
379
+ tiles = set()
380
+ for i in range(n_lat):
381
+ lat = min_lat + i * step_lat
382
+ for j in range(n_lon):
383
+ lon = min_lon + j * step_lon
384
+ point = Point(lon, lat)
385
+ if polygon.contains(point) or polygon.touches(point):
386
+ tile = self.h3.latlng_to_cell(lat, lon, self.resolution)
387
+ tiles.add(tile)
388
+
389
+ # Also check polygon boundary by sampling along exterior
390
+ exterior = getattr(polygon, "exterior", None)
391
+ if exterior is not None:
392
+ length = exterior.length
393
+ n_boundary = max(10, min(50, int(length / step)))
394
+ for i in range(n_boundary):
395
+ point = exterior.interpolate(i / n_boundary, normalized=True)
396
+ tile = self.h3.latlng_to_cell(point.y, point.x, self.resolution)
397
+ tiles.add(tile)
398
+
399
+ if not tiles:
400
+ # Fallback to centroid
401
+ centroid = polygon.centroid
402
+ tile = self.h3.latlng_to_cell(centroid.y, centroid.x, self.resolution)
403
+ return [tile], False
404
+
405
+ tile_list = list(tiles)
406
+ is_spanning = len(tile_list) > 1
407
+
408
+ return tile_list, is_spanning
409
+
410
+
411
+ class S2GridSystem(GridSystem):
412
+ """S2 grid system."""
413
+
414
+ def __init__(self, resolution: int = 13):
415
+ try:
416
+ import s2sphere
417
+
418
+ self.s2 = s2sphere
419
+ if not 0 <= resolution <= 30:
420
+ raise ValueError(f"S2 resolution must be 0-30, got {resolution}")
421
+ self.resolution = resolution
422
+ except ImportError:
423
+ raise ImportError("s2sphere required: pip install s2sphere") from None
424
+
425
+ def get_global_partition_threshold(
426
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
427
+ ) -> int:
428
+ """Get S2-specific global partition threshold."""
429
+ return default_threshold
430
+
431
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
432
+ """Get S2 cells that cover geometry."""
433
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
434
+ return tiles
435
+
436
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
437
+ """Get S2 cells that cover geometry and detect spanning."""
438
+ shp = shape(geom)
439
+
440
+ if isinstance(shp, Point):
441
+ lat, lon = shp.y, shp.x
442
+ else:
443
+ tiles, is_spanning = self._get_s2_cells_for_polygon(shp)
444
+ return tiles, is_spanning
445
+
446
+ cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
447
+
448
+ return [str(cell)], False
449
+
450
+ def _get_s2_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
451
+ """Get S2 cells that intersect with a polygon using deterministic grid sampling."""
452
+ # Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
453
+ bounds = polygon.bounds
454
+ min_lon, min_lat, max_lon, max_lat = bounds
455
+
456
+ # For small or invalid polygons, use centroid
457
+ if polygon.area < 1e-10:
458
+ centroid = polygon.centroid
459
+ lat, lon = centroid.y, centroid.x
460
+ cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
461
+ return [str(cell)], False
462
+
463
+ # S2 resolution to approximate cell size in degrees
464
+ # S2 cells vary in size; use conservative step based on resolution
465
+ step = max(0.01, 180 / (2 ** (self.resolution / 2)))
466
+
467
+ # Ensure reasonable number of sample points
468
+ width = max_lon - min_lon
469
+ height = max_lat - min_lat
470
+ n_lon = max(3, min(50, int(width / step) + 1))
471
+ n_lat = max(3, min(50, int(height / step) + 1))
472
+
473
+ step_lon = width / (n_lon - 1) if n_lon > 1 else width
474
+ step_lat = height / (n_lat - 1) if n_lat > 1 else height
475
+
476
+ # Generate deterministic grid of points
477
+ cells = set()
478
+ for i in range(n_lat):
479
+ lat = min_lat + i * step_lat
480
+ for j in range(n_lon):
481
+ lon = min_lon + j * step_lon
482
+ point = Point(lon, lat)
483
+ if polygon.contains(point) or polygon.touches(point):
484
+ cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
485
+ cells.add(str(cell))
486
+
487
+ # Sample boundary for edge cells
488
+ exterior = getattr(polygon, "exterior", None)
489
+ if exterior is not None:
490
+ n_boundary = max(10, min(30, int(exterior.length / step)))
491
+ for i in range(n_boundary):
492
+ point = exterior.interpolate(i / n_boundary, normalized=True)
493
+ cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(point.y, point.x)).parent(
494
+ self.resolution
495
+ )
496
+ cells.add(str(cell))
497
+
498
+ if not cells:
499
+ # Fallback to centroid
500
+ centroid = polygon.centroid
501
+ lat, lon = centroid.y, centroid.x
502
+ cell = self.s2.CellId.from_lat_lng(self.s2.LatLng.from_degrees(lat, lon)).parent(self.resolution)
503
+ return [str(cell)], False
504
+
505
+ cell_list = list(cells)
506
+ is_spanning = len(cell_list) > 1
507
+
508
+ return cell_list, is_spanning
509
+
510
+
511
+ class MGRSGridSystem(GridSystem):
512
+ """Military Grid Reference System."""
513
+
514
+ def __init__(self, resolution: int = 5):
515
+ try:
516
+ import mgrs
517
+
518
+ self.mgrs = mgrs.MGRS()
519
+ if not 1 <= resolution <= 5:
520
+ raise ValueError(f"MGRS resolution must be 1-5, got {resolution}")
521
+ self.resolution = resolution # Precision level
522
+ # Store the MGRSError exception type for use in methods
523
+ self._mgrs_error: tuple[type[BaseException], ...] = (getattr(mgrs, "MGRSError", ValueError),)
524
+ except ImportError:
525
+ raise ImportError("mgrs required: pip install mgrs") from None
526
+
527
+ def get_global_partition_threshold(
528
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
529
+ ) -> int:
530
+ """Get MGRS-specific global partition threshold."""
531
+ return default_threshold
532
+
533
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
534
+ """Get MGRS grid cells."""
535
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
536
+ return tiles
537
+
538
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
539
+ """Get MGRS grid cells and detect spanning."""
540
+ shp = shape(geom)
541
+
542
+ if isinstance(shp, Point):
543
+ lat, lon = shp.y, shp.x
544
+ mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
545
+ return [mgrs_code], False
546
+ else:
547
+ return self._get_mgrs_cells_for_polygon(shp)
548
+
549
+ def _get_mgrs_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
550
+ """Get MGRS cells that intersect with a polygon using deterministic grid sampling."""
551
+ # Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
552
+ bounds = polygon.bounds
553
+ min_lon, min_lat, max_lon, max_lat = bounds
554
+
555
+ # For small or invalid polygons, use centroid
556
+ if polygon.area < 1e-10:
557
+ centroid = polygon.centroid
558
+ lat, lon = centroid.y, centroid.x
559
+ mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
560
+ return [mgrs_code], False
561
+
562
+ # MGRS resolution to approximate cell size in degrees
563
+ # Precision 1=10km, 2=1km, 3=100m, 4=10m, 5=1m
564
+ mgrs_step_degrees = {1: 0.1, 2: 0.01, 3: 0.001, 4: 0.0001, 5: 0.00001}
565
+ step = mgrs_step_degrees.get(self.resolution, 0.01)
566
+
567
+ # Ensure reasonable number of sample points
568
+ width = max_lon - min_lon
569
+ height = max_lat - min_lat
570
+ n_lon = max(3, min(50, int(width / step) + 1))
571
+ n_lat = max(3, min(50, int(height / step) + 1))
572
+
573
+ step_lon = width / (n_lon - 1) if n_lon > 1 else width
574
+ step_lat = height / (n_lat - 1) if n_lat > 1 else height
575
+
576
+ # Generate deterministic grid of points
577
+ mgrs_codes = set()
578
+ for i in range(n_lat):
579
+ lat = min_lat + i * step_lat
580
+ for j in range(n_lon):
581
+ lon = min_lon + j * step_lon
582
+ point = Point(lon, lat)
583
+ if polygon.contains(point) or polygon.touches(point):
584
+ try:
585
+ mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
586
+ mgrs_codes.add(mgrs_code)
587
+ except self._mgrs_error:
588
+ # Skip coordinates outside MGRS valid range (lat must be -80 to 84)
589
+ logger.debug(f"MGRS conversion failed for ({lat}, {lon}): outside valid range")
590
+
591
+ # Sample boundary for edge cells
592
+ exterior = getattr(polygon, "exterior", None)
593
+ if exterior is not None:
594
+ n_boundary = max(10, min(30, int(exterior.length / step)))
595
+ for i in range(n_boundary):
596
+ point = exterior.interpolate(i / n_boundary, normalized=True)
597
+ try:
598
+ mgrs_code = self.mgrs.toMGRS(point.y, point.x, MGRSPrecision=self.resolution)
599
+ mgrs_codes.add(mgrs_code)
600
+ except self._mgrs_error:
601
+ # Skip boundary points outside MGRS valid range
602
+ logger.debug(f"MGRS boundary conversion failed for ({point.y}, {point.x})")
603
+
604
+ if not mgrs_codes:
605
+ # Fallback to centroid
606
+ centroid = polygon.centroid
607
+ lat, lon = centroid.y, centroid.x
608
+ mgrs_code = self.mgrs.toMGRS(lat, lon, MGRSPrecision=self.resolution)
609
+ return [mgrs_code], False
610
+
611
+ code_list = list(mgrs_codes)
612
+ is_spanning = len(code_list) > 1
613
+
614
+ return code_list, is_spanning
615
+
616
+
617
+ class UTMGridSystem(GridSystem):
618
+ """UTM zone-based grid system."""
619
+
620
+ def __init__(self, resolution: int = 1):
621
+ self.resolution = resolution # Not used, just for consistency
622
+
623
+ def get_global_partition_threshold(
624
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
625
+ ) -> int:
626
+ """Get UTM-specific global partition threshold."""
627
+ return default_threshold
628
+
629
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
630
+ """Get UTM zones."""
631
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
632
+ return tiles
633
+
634
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
635
+ """Get UTM zones and detect spanning."""
636
+ shp = shape(geom)
637
+
638
+ if isinstance(shp, Point):
639
+ lon, lat = shp.x, shp.y
640
+ else:
641
+ return self._get_utm_zones_for_polygon(shp)
642
+
643
+ # Calculate UTM zone
644
+ zone = int((lon + 180) / 6) + 1
645
+
646
+ # Determine hemisphere
647
+ hemisphere = "N" if lat >= 0 else "S"
648
+
649
+ return [f"{zone}{hemisphere}"], False
650
+
651
+ def _get_utm_zones_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
652
+ """Get UTM zones that intersect with a polygon using deterministic approach.
653
+
654
+ UTM zones are 6° wide, so we can directly compute which zones overlap the bounding box.
655
+ """
656
+ # Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
657
+ bounds = polygon.bounds
658
+ min_lon, min_lat, max_lon, max_lat = bounds
659
+
660
+ # Calculate all UTM zones that the bounding box could intersect
661
+ min_zone = max(1, int((min_lon + 180) / 6) + 1)
662
+ max_zone = min(60, int((max_lon + 180) / 6) + 1)
663
+
664
+ # Determine if we cross hemispheres
665
+ crosses_equator = min_lat < 0 and max_lat >= 0
666
+
667
+ zones = set()
668
+ for zone in range(min_zone, max_zone + 1):
669
+ # Calculate the longitude bounds of this zone
670
+ zone_min_lon = (zone - 1) * 6 - 180
671
+ zone_max_lon = zone * 6 - 180
672
+
673
+ # Create a box for this zone and check intersection
674
+ if crosses_equator:
675
+ hemispheres = ["N", "S"]
676
+ elif max_lat >= 0:
677
+ hemispheres = ["N"]
678
+ else:
679
+ hemispheres = ["S"]
680
+
681
+ for hemisphere in hemispheres:
682
+ # Create zone bounds
683
+ if hemisphere == "N":
684
+ zone_polygon = Polygon(
685
+ [
686
+ (zone_min_lon, 0),
687
+ (zone_max_lon, 0),
688
+ (zone_max_lon, 84),
689
+ (zone_min_lon, 84),
690
+ (zone_min_lon, 0),
691
+ ]
692
+ )
693
+ else:
694
+ zone_polygon = Polygon(
695
+ [
696
+ (zone_min_lon, -80),
697
+ (zone_max_lon, -80),
698
+ (zone_max_lon, 0),
699
+ (zone_min_lon, 0),
700
+ (zone_min_lon, -80),
701
+ ]
702
+ )
703
+
704
+ if polygon.intersects(zone_polygon):
705
+ zones.add(f"{zone}{hemisphere}")
706
+
707
+ if not zones:
708
+ # Fallback to centroid
709
+ centroid = polygon.centroid
710
+ lon, lat = centroid.x, centroid.y
711
+ zone = int((lon + 180) / 6) + 1
712
+ hemisphere = "N" if lat >= 0 else "S"
713
+ return [f"{zone}{hemisphere}"], False
714
+
715
+ zone_list = list(zones)
716
+ is_spanning = len(zone_list) > 1
717
+
718
+ return zone_list, is_spanning
719
+
720
+
721
+ class SimpleLatLonGrid(GridSystem):
722
+ """Simple lat/lon grid (degree-based)."""
723
+
724
+ def __init__(self, resolution: int = 1):
725
+ if resolution <= 0:
726
+ raise ValueError(f"LatLon resolution must be positive, got {resolution}")
727
+ self.resolution = resolution # Grid size in degrees
728
+
729
+ def get_global_partition_threshold(
730
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
731
+ ) -> int:
732
+ """Get lat/lon-specific global partition threshold."""
733
+ return default_threshold
734
+
735
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
736
+ """Get lat/lon grid cells."""
737
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
738
+ return tiles
739
+
740
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
741
+ """Get lat/lon grid cells and detect spanning."""
742
+ shp = shape(geom)
743
+
744
+ if isinstance(shp, Point):
745
+ lon, lat = shp.x, shp.y
746
+ # Use floor division for correct handling of negative coordinates
747
+ # int(-0.5/1) = 0, but floor(-0.5/1) = -1 which is correct
748
+ grid_lat = math.floor(lat / self.resolution) * self.resolution
749
+ grid_lon = math.floor(lon / self.resolution) * self.resolution
750
+ return [f"lat{grid_lat:+04d}_lon{grid_lon:+04d}"], False
751
+ else:
752
+ return self._get_latlon_cells_for_polygon(shp)
753
+
754
+ def _get_latlon_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
755
+ """Get lat/lon grid cells that intersect with a polygon."""
756
+ # Get bounding box - Shapely returns (minx, miny, maxx, maxy) = (min_lon, min_lat, max_lon, max_lat)
757
+ bounds = polygon.bounds
758
+ min_lon, min_lat, max_lon, max_lat = bounds
759
+
760
+ # Calculate grid cells that intersect with bounding box
761
+ # Use floor division for correct handling of negative coordinates
762
+ min_grid_lat = math.floor(min_lat / self.resolution) * self.resolution
763
+ max_grid_lat = math.floor(max_lat / self.resolution) * self.resolution
764
+ min_grid_lon = math.floor(min_lon / self.resolution) * self.resolution
765
+ max_grid_lon = math.floor(max_lon / self.resolution) * self.resolution
766
+
767
+ # Generate all potential grid cells within bounding box and check intersection directly
768
+ # This avoids the need to parse cell names back to coordinates
769
+ intersecting_cells = []
770
+ lat = min_grid_lat
771
+ while lat <= max_grid_lat:
772
+ lon = min_grid_lon
773
+ while lon <= max_grid_lon:
774
+ # Create a rectangle for this grid cell
775
+ cell_polygon = Polygon(
776
+ [
777
+ (lon, lat),
778
+ (lon + self.resolution, lat),
779
+ (lon + self.resolution, lat + self.resolution),
780
+ (lon, lat + self.resolution),
781
+ (lon, lat),
782
+ ]
783
+ )
784
+
785
+ # Check if polygons intersect
786
+ if polygon.intersects(cell_polygon):
787
+ cell = f"lat{lat:+04d}_lon{lon:+04d}"
788
+ intersecting_cells.append(cell)
789
+ lon += self.resolution
790
+ lat += self.resolution
791
+
792
+ if not intersecting_cells:
793
+ # Fallback to centroid
794
+ centroid = polygon.centroid
795
+ lon, lat = centroid.x, centroid.y
796
+ grid_lat = math.floor(lat / self.resolution) * self.resolution
797
+ grid_lon = math.floor(lon / self.resolution) * self.resolution
798
+ return [f"lat{grid_lat:+04d}_lon{grid_lon:+04d}"], False
799
+
800
+ is_spanning = len(intersecting_cells) > 1
801
+ return intersecting_cells, is_spanning
802
+
803
+
804
+ class ITSLiveGridSystem(GridSystem):
805
+ """ITS_LIVE center-based 10°×10° grid system with specific naming convention.
806
+
807
+ The ITS_LIVE grid system uses a center-based approach where each 10°×10° cell
808
+ is named using the coordinates of its center point. For example, a cell centered
809
+ at 60°N, 40°W would be named "N60W040".
810
+
811
+ Key Features:
812
+ - Fixed 10°×10° grid cells globally
813
+ - Center-based cell identification
814
+ - Specific naming convention: {N|S}{lat:02d}{E|W}{lon:03d}
815
+ - Compatible with ITS_LIVE data organization standards
816
+
817
+ Grid Cell Naming:
818
+ - Latitude: N for positive, S for negative, 2-digit absolute value
819
+ - Longitude: E for positive, W for negative, 3-digit absolute value
820
+ - Examples: "N60W040", "S10E175", "N00E000"
821
+
822
+ Use Cases:
823
+ - ITS_LIVE velocity and displacement datasets
824
+ - Global ice sheet and glacier monitoring data
825
+ - Compatible with existing ITS_LIVE search patterns
826
+
827
+ Example:
828
+ >>> grid = ITSLiveGridSystem()
829
+ >>> tiles = grid.tiles_for_geometry(greenland_geometry)
830
+ >>> print(tiles) # ['N70W040', 'N70W030', 'N60W040', ...]
831
+ """
832
+
833
+ def __init__(self):
834
+ """Initialize ITS_LIVE grid system with fixed 10-degree resolution."""
835
+ self.resolution = 10 # Fixed 10-degree grid
836
+
837
+ def get_global_partition_threshold(
838
+ self, default_threshold: int, thresholds: dict[str, dict[Any, int]] | None = None
839
+ ) -> int:
840
+ """Get ITS_LIVE-specific global partition threshold."""
841
+ return default_threshold
842
+
843
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
844
+ """Get ITS_LIVE grid cells that intersect with geometry."""
845
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
846
+ return tiles
847
+
848
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
849
+ """Get ITS_LIVE grid cells and detect spanning."""
850
+ shp = shape(geom)
851
+
852
+ if isinstance(shp, Point):
853
+ lat, lon = shp.y, shp.x
854
+ # Find center coordinates for the cell containing this point
855
+ # For ITS_LIVE: cell centers are at multiples of 10 (e.g., -40, -30, -20, etc.)
856
+ # A point at (-40, 60) should be in the cell centered at (-40, 60) = "N60W040"
857
+ # The cell spans from (-45, 55) to (-35, 65)
858
+
859
+ # Calculate which cell center this point belongs to
860
+ # Round to nearest multiple of 10 for center coordinates
861
+ lat_center = round(lat / 10.0) * 10
862
+ lon_center = round(lon / 10.0) * 10
863
+ name = self._format_cell_name(lat_center, lon_center)
864
+ return [name], False
865
+ else:
866
+ return self._get_itslive_cells_for_polygon(shp)
867
+
868
+ def _get_itslive_cells_for_polygon(self, polygon: BaseGeometry) -> tuple[list[str], bool]:
869
+ """Get ITS_LIVE grid cells that intersect with a polygon.
870
+
871
+ Uses the same center-based logic as point queries to ensure consistency.
872
+ ITS_LIVE cells are 10°×10° with centers at multiples of 10 degrees.
873
+ """
874
+ import math
875
+
876
+ from shapely.geometry import box
877
+
878
+ if not polygon.is_valid:
879
+ polygon = polygon.buffer(0)
880
+
881
+ minx, miny, maxx, maxy = polygon.bounds
882
+
883
+ # Calculate the range of cell centers that could intersect the polygon
884
+ # For center-based grid: cell center determines the cell name
885
+ # Each cell spans center ± 5 degrees
886
+
887
+ # Find minimum and maximum cell centers that could intersect
888
+ # Cell centers are at multiples of 10: ..., -20, -10, 0, 10, 20, ...
889
+ min_lat_center = math.floor((miny + 5) / 10.0) * 10 # Add 5 because cell extends center-5 to center+5
890
+ max_lat_center = math.ceil((maxy - 5) / 10.0) * 10 # Subtract 5 for same reason
891
+ min_lon_center = math.floor((minx + 5) / 10.0) * 10
892
+ max_lon_center = math.ceil((maxx - 5) / 10.0) * 10
893
+
894
+ grids = set()
895
+
896
+ # Iterate through all possible cell centers in the range
897
+ lat_center = min_lat_center
898
+ while lat_center <= max_lat_center:
899
+ lon_center = min_lon_center
900
+ while lon_center <= max_lon_center:
901
+ # Create tile bounds: center ± 5 degrees
902
+ tile = box(lon_center - 5, lat_center - 5, lon_center + 5, lat_center + 5)
903
+ if polygon.intersects(tile):
904
+ name = self._format_cell_name(lat_center, lon_center)
905
+ grids.add(name)
906
+ lon_center += 10
907
+ lat_center += 10
908
+
909
+ grid_list = list(grids) if grids else []
910
+
911
+ if not grid_list:
912
+ # Fallback to centroid using same logic as point queries
913
+ centroid = polygon.centroid
914
+ lat, lon = centroid.y, centroid.x
915
+ lat_center = round(lat / 10.0) * 10
916
+ lon_center = round(lon / 10.0) * 10
917
+ name = self._format_cell_name(lat_center, lon_center)
918
+ return [name], False
919
+
920
+ is_spanning = len(grid_list) > 1
921
+ return grid_list, is_spanning
922
+
923
+ def _format_cell_name(self, lat_center: float, lon_center: float) -> str:
924
+ """Format cell name using ITS_LIVE convention: {N|S}{lat:02d}{E|W}{lon:03d}."""
925
+ lat_prefix = f"N{abs(int(lat_center)):02d}" if lat_center >= 0 else f"S{abs(int(lat_center)):02d}"
926
+ lon_prefix = f"E{abs(int(lon_center)):03d}" if lon_center >= 0 else f"W{abs(int(lon_center)):03d}"
927
+ return f"{lat_prefix}{lon_prefix}"
928
+
929
+
930
+ class GeoJSONGridSystem(GridSystem):
931
+ """Generic grid system based on custom GeoJSON tiles."""
932
+
933
+ def __init__(self, geojson_path: str):
934
+ """Initialize with path to GeoJSON file containing tile geometries."""
935
+ self.geojson_path = geojson_path
936
+ self.tiles = self._load_geojson_tiles()
937
+
938
+ def _load_geojson_tiles(self) -> dict[str, Polygon]:
939
+ """Load tiles from GeoJSON file."""
940
+ try:
941
+ with open(self.geojson_path) as f:
942
+ geojson_data = json.load(f)
943
+ except FileNotFoundError as err:
944
+ raise FileNotFoundError(f"GeoJSON file not found: {self.geojson_path}") from err
945
+ except json.JSONDecodeError as e:
946
+ raise ValueError(f"Invalid GeoJSON file: {e}") from e
947
+
948
+ tiles: dict[str, Polygon] = {}
949
+
950
+ if geojson_data.get("type") == "FeatureCollection":
951
+ features = geojson_data.get("features", [])
952
+ elif geojson_data.get("type") == "Feature":
953
+ features = [geojson_data]
954
+ else:
955
+ raise ValueError("GeoJSON must be a Feature or FeatureCollection")
956
+
957
+ for feature in features:
958
+ if feature.get("type") != "Feature":
959
+ continue
960
+
961
+ geometry = feature.get("geometry")
962
+ properties = feature.get("properties", {})
963
+
964
+ if not geometry or geometry.get("type") not in ["Polygon", "MultiPolygon"]:
965
+ continue
966
+
967
+ # Use 'id' from properties, feature id, or generate one
968
+ tile_id = (
969
+ properties.get("id")
970
+ or properties.get("tile_id")
971
+ or properties.get("name")
972
+ or feature.get("id")
973
+ or f"tile_{len(tiles)}"
974
+ )
975
+
976
+ try:
977
+ shp = shape(geometry)
978
+ if isinstance(shp, Polygon):
979
+ tiles[str(tile_id)] = shp
980
+ elif hasattr(shp, "geoms"): # MultiPolygon-like
981
+ try:
982
+ geoms = getattr(shp, "geoms", [])
983
+ for i, geom in enumerate(geoms):
984
+ if isinstance(geom, Polygon):
985
+ tiles[f"{tile_id}_{i}"] = geom
986
+ except (AttributeError, TypeError):
987
+ # Skip if geoms attribute is not accessible
988
+ pass
989
+ except Exception as e:
990
+ raise ValueError(f"Error processing geometry for tile {tile_id}: {e}") from e
991
+
992
+ if not tiles:
993
+ raise ValueError("No valid polygon geometries found in GeoJSON")
994
+
995
+ return tiles
996
+
997
+ def tiles_for_geometry(self, geom: dict[str, Any]) -> list[str]:
998
+ """Get tile IDs that intersect with geometry."""
999
+ tiles, _ = self.tiles_for_geometry_with_spanning_detection(geom)
1000
+ return tiles
1001
+
1002
+ def tiles_for_geometry_with_spanning_detection(self, geom: dict[str, Any]) -> tuple[list[str], bool]:
1003
+ """Get tile IDs and detect if geometry spans multiple tiles."""
1004
+ shp = shape(geom)
1005
+ intersecting_tiles = []
1006
+
1007
+ for tile_id, tile_polygon in self.tiles.items():
1008
+ if shp.intersects(tile_polygon):
1009
+ intersecting_tiles.append(tile_id)
1010
+
1011
+ if not intersecting_tiles:
1012
+ # Fallback: find nearest tile by centroid distance
1013
+ centroid = shp.centroid
1014
+ nearest_tile = min(self.tiles.items(), key=lambda item: centroid.distance(item[1].centroid))
1015
+ return [nearest_tile[0]], False
1016
+
1017
+ is_spanning = len(intersecting_tiles) > 1
1018
+ return intersecting_tiles, is_spanning
1019
+
1020
+
1021
+ def get_grid_system(name: str, resolution: int = 1, geojson_path: str | None = None) -> GridSystem:
1022
+ """Factory function to create grid system instances by name with optimized defaults.
1023
+
1024
+ This is the primary entry point for creating spatial grid systems in EarthCatalog.
1025
+ Supports multiple grid types optimized for different geographical regions and use cases.
1026
+ All grid systems provide consistent spatial partitioning interfaces for efficient
1027
+ catalog organization and querying.
1028
+
1029
+ Supported Grid Systems:
1030
+ - 'h3': Uber's H3 hexagonal grid (global, efficient for most use cases)
1031
+ - 's2': Google's S2 spherical geometry (global, good for polar regions)
1032
+ - 'mgrs': Military Grid Reference System (global, standard for defense/gov)
1033
+ - 'utm': Universal Transverse Mercator (zoned, high accuracy for local areas)
1034
+ - 'latlon': Simple latitude/longitude grid (basic, good for small datasets)
1035
+ - 'itslive': ITS_LIVE center-based 10°×10° grid (glacier/ice sheet datasets)
1036
+ - 'geojson': Custom polygon-based partitioning (flexible, user-defined)
1037
+
1038
+ Performance Characteristics:
1039
+ - H3: Best overall performance for global datasets
1040
+ - S2: Excellent for high-latitude regions and spherical accuracy
1041
+ - UTM: Optimal for regional datasets with high coordinate precision
1042
+ - GeoJSON: Most flexible but requires careful polygon design
1043
+
1044
+ Args:
1045
+ name: Grid system identifier (case-insensitive). Must be one of the supported
1046
+ grid system names listed above.
1047
+ resolution: Grid resolution level where applicable. Meaning varies by system:
1048
+ - H3: 0-15 (0=large hexagons, 15=small hexagons)
1049
+ - S2: 0-30 (0=large cells, 30=small cells)
1050
+ - UTM: Grid spacing in meters (default 1000m)
1051
+ - MGRS: Precision level (1-5, where 5=1m precision)
1052
+ - LatLon: Grid cell size in degrees (default 1.0°)
1053
+ - ITSLive: Ignored (fixed 10° resolution)
1054
+ - GeoJSON: Ignored (resolution determined by polygon geometry)
1055
+ geojson_path: Path to GeoJSON file containing polygon features for custom
1056
+ partitioning. Required only when name='geojson'. Each feature becomes
1057
+ a spatial partition with its 'id' property as the partition key.
1058
+
1059
+ Returns:
1060
+ GridSystem: Configured grid system instance ready for spatial operations.
1061
+ All instances implement the same GridSystem interface for consistent
1062
+ usage across different partitioning strategies.
1063
+
1064
+ Raises:
1065
+ ValueError: If the grid system name is not recognized or if required
1066
+ parameters are missing (e.g., geojson_path for geojson grid).
1067
+ ImportError: If required dependencies for specific grid systems are not
1068
+ installed (e.g., h3 package for H3 grid system).
1069
+
1070
+ Example:
1071
+ >>> # Create H3 grid with resolution 6 (good for country-level datasets)
1072
+ >>> grid = get_grid_system('h3', resolution=6)
1073
+ >>>
1074
+ >>> # Create S2 grid for polar region analysis
1075
+ >>> grid = get_grid_system('s2', resolution=12)
1076
+ >>>
1077
+ >>> # Create custom polygon-based partitioning
1078
+ >>> grid = get_grid_system('geojson', geojson_path='custom_regions.geojson')
1079
+ >>>
1080
+ >>> # Use grid system for spatial partitioning
1081
+ >>> tiles = grid.tiles_for_geometry(feature_geometry)
1082
+ >>> print(f"Geometry intersects {len(tiles)} grid tiles")
1083
+
1084
+ Note:
1085
+ Grid system choice significantly impacts query performance and storage
1086
+ efficiency. H3 is recommended for most global applications, while UTM
1087
+ is preferred for high-precision regional analysis.
1088
+ """
1089
+ systems = {
1090
+ "h3": H3GridSystem,
1091
+ "s2": S2GridSystem,
1092
+ "mgrs": MGRSGridSystem,
1093
+ "utm": UTMGridSystem,
1094
+ "latlon": SimpleLatLonGrid,
1095
+ "itslive": ITSLiveGridSystem,
1096
+ "geojson": GeoJSONGridSystem,
1097
+ }
1098
+
1099
+ if name.lower() not in systems:
1100
+ raise ValueError(f"Unknown grid system: {name}. Available: {', '.join(systems.keys())}")
1101
+
1102
+ if name.lower() == "geojson":
1103
+ if not geojson_path:
1104
+ raise ValueError("geojson_path is required for GeoJSON grid system")
1105
+ result: GridSystem = systems[name.lower()](geojson_path)
1106
+ elif name.lower() == "itslive":
1107
+ # ITSLive has fixed 10-degree resolution
1108
+ result: GridSystem = systems[name.lower()]() # type: ignore
1109
+ else:
1110
+ # Use type: ignore to bypass mypy's abstract class checking
1111
+ # The concrete classes do implement the abstract methods
1112
+ result: GridSystem = systems[name.lower()](resolution) # type: ignore
1113
+
1114
+ return result