earthcatalog 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. earthcatalog/__init__.py +164 -0
  2. earthcatalog/async_http_client.py +1006 -0
  3. earthcatalog/config.py +97 -0
  4. earthcatalog/engines/__init__.py +308 -0
  5. earthcatalog/engines/rustac_engine.py +142 -0
  6. earthcatalog/engines/stac_geoparquet_engine.py +126 -0
  7. earthcatalog/exceptions.py +471 -0
  8. earthcatalog/grid_systems.py +1114 -0
  9. earthcatalog/ingestion_pipeline.py +2281 -0
  10. earthcatalog/input_readers.py +603 -0
  11. earthcatalog/job_tracking.py +485 -0
  12. earthcatalog/pipeline.py +606 -0
  13. earthcatalog/schema_generator.py +911 -0
  14. earthcatalog/spatial_resolver.py +1207 -0
  15. earthcatalog/stac_hooks.py +754 -0
  16. earthcatalog/statistics.py +677 -0
  17. earthcatalog/storage_backends.py +548 -0
  18. earthcatalog/tests/__init__.py +1 -0
  19. earthcatalog/tests/conftest.py +76 -0
  20. earthcatalog/tests/test_all_grids.py +793 -0
  21. earthcatalog/tests/test_async_http.py +700 -0
  22. earthcatalog/tests/test_cli_and_storage.py +230 -0
  23. earthcatalog/tests/test_config.py +245 -0
  24. earthcatalog/tests/test_dask_integration.py +580 -0
  25. earthcatalog/tests/test_e2e_synthetic.py +1624 -0
  26. earthcatalog/tests/test_engines.py +272 -0
  27. earthcatalog/tests/test_exceptions.py +346 -0
  28. earthcatalog/tests/test_file_structure.py +245 -0
  29. earthcatalog/tests/test_input_readers.py +666 -0
  30. earthcatalog/tests/test_integration.py +200 -0
  31. earthcatalog/tests/test_integration_async.py +283 -0
  32. earthcatalog/tests/test_job_tracking.py +603 -0
  33. earthcatalog/tests/test_multi_file_input.py +336 -0
  34. earthcatalog/tests/test_passthrough_hook.py +196 -0
  35. earthcatalog/tests/test_pipeline.py +684 -0
  36. earthcatalog/tests/test_pipeline_components.py +665 -0
  37. earthcatalog/tests/test_schema_generator.py +506 -0
  38. earthcatalog/tests/test_spatial_resolver.py +413 -0
  39. earthcatalog/tests/test_stac_hooks.py +776 -0
  40. earthcatalog/tests/test_statistics.py +477 -0
  41. earthcatalog/tests/test_storage_backends.py +236 -0
  42. earthcatalog/tests/test_validation.py +435 -0
  43. earthcatalog/tests/test_workers.py +653 -0
  44. earthcatalog/validation.py +921 -0
  45. earthcatalog/workers.py +682 -0
  46. earthcatalog-0.2.0.dist-info/METADATA +333 -0
  47. earthcatalog-0.2.0.dist-info/RECORD +50 -0
  48. earthcatalog-0.2.0.dist-info/WHEEL +5 -0
  49. earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
  50. earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
earthcatalog/config.py ADDED
@@ -0,0 +1,97 @@
1
+ """Configuration file loader for EarthCatalog.
2
+
3
+ Supports YAML configuration files with CLI override capability.
4
+ The config file allows users to set defaults for all ProcessingConfig options.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+
13
+ def load_config(config_path: Path | str | None = None) -> dict[str, Any]:
14
+ """Load configuration from YAML file.
15
+
16
+ Search order:
17
+ 1. Explicit path if provided
18
+ 2. ./earthcatalog.yaml in current directory
19
+ 3. Empty dict (use defaults)
20
+
21
+ Args:
22
+ config_path: Optional explicit path to config file.
23
+
24
+ Returns:
25
+ Dictionary with configuration values.
26
+
27
+ Raises:
28
+ FileNotFoundError: If explicit config_path is provided but doesn't exist.
29
+ yaml.YAMLError: If config file is invalid YAML.
30
+ """
31
+ if config_path is not None:
32
+ path = Path(config_path)
33
+ if not path.exists():
34
+ raise FileNotFoundError(f"Config file not found: {config_path}")
35
+ return _load_yaml_file(path)
36
+
37
+ # Check for default config file in current directory
38
+ default_path = Path("earthcatalog.yaml")
39
+ if default_path.exists():
40
+ return _load_yaml_file(default_path)
41
+
42
+ # No config file found, return empty dict
43
+ return {}
44
+
45
+
46
+ def _load_yaml_file(path: Path) -> dict[str, Any]:
47
+ """Load and parse a YAML file.
48
+
49
+ Args:
50
+ path: Path to YAML file.
51
+
52
+ Returns:
53
+ Parsed YAML content as dictionary.
54
+ """
55
+ with open(path) as f:
56
+ content = yaml.safe_load(f)
57
+ # Handle empty files
58
+ if content is None:
59
+ return {}
60
+ if not isinstance(content, dict):
61
+ raise ValueError(f"Config file must contain a YAML mapping, got: {type(content).__name__}")
62
+ return content
63
+
64
+
65
+ def merge_cli_overrides(config: dict[str, Any], cli_args: dict[str, Any]) -> dict[str, Any]:
66
+ """Merge CLI arguments over config file values.
67
+
68
+ CLI args with None values are ignored (use config/default).
69
+ This allows CLI to override config file values while preserving
70
+ unspecified defaults.
71
+
72
+ Args:
73
+ config: Base configuration from file.
74
+ cli_args: CLI arguments (may contain None values).
75
+
76
+ Returns:
77
+ Merged configuration dictionary.
78
+ """
79
+ result = config.copy()
80
+
81
+ for key, value in cli_args.items():
82
+ if value is not None:
83
+ result[key] = value
84
+
85
+ return result
86
+
87
+
88
+ def save_config(config: dict[str, Any], path: Path | str) -> None:
89
+ """Save configuration to YAML file.
90
+
91
+ Args:
92
+ config: Configuration dictionary to save.
93
+ path: Path to write YAML file.
94
+ """
95
+ path = Path(path)
96
+ with open(path, "w") as f:
97
+ yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)
@@ -0,0 +1,308 @@
1
+ """STAC I/O engine abstraction layer.
2
+
3
+ This module provides pluggable backends for STAC item conversion and GeoParquet I/O,
4
+ enabling EarthCatalog to use different underlying implementations without code changes.
5
+
6
+ Supported Engines:
7
+ rustac: High-performance Rust-based engine using the rustac library.
8
+ - Zero Python dependencies for core operations
9
+ - Native async support for I/O operations
10
+ - Built-in object store integration (S3, GCS, Azure)
11
+ - Recommended for production use
12
+
13
+ stac-geoparquet: Legacy Python-based engine using stac-geoparquet library.
14
+ - Pure Python implementation
15
+ - Well-tested and stable
16
+ - Fallback option for compatibility
17
+
18
+ Engine Selection:
19
+ The engine can be selected via ProcessingConfig.stac_engine parameter:
20
+ - "rustac" (default): Use rustac engine
21
+ - "stac-geoparquet": Use legacy stac-geoparquet engine
22
+ - "auto": Auto-detect based on available libraries (prefers rustac)
23
+
24
+ Example:
25
+ >>> from earthcatalog.engines import get_engine
26
+ >>> engine = get_engine("rustac")
27
+ >>> gdf = engine.items_to_geodataframe(items)
28
+ >>> items = engine.geodataframe_to_items(gdf)
29
+
30
+ Performance:
31
+ The rustac engine provides significant performance improvements:
32
+ - Faster Arrow conversion through Rust implementation
33
+ - Native async I/O for cloud storage operations
34
+ - Reduced memory overhead for large datasets
35
+ """
36
+
37
+ import io
38
+ import logging
39
+ from abc import ABC, abstractmethod
40
+ from typing import Any, Literal, cast
41
+
42
+ import geopandas as gpd
43
+ import pyarrow.parquet as pq
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ EngineType = Literal["rustac", "stac-geoparquet", "auto"]
48
+ CompressionType = Literal["snappy", "gzip", "brotli"]
49
+
50
+
51
+ class GeoParquetIOMixin:
52
+ """Mixin providing shared GeoParquet I/O operations for STAC engines.
53
+
54
+ This mixin contains the common implementation for reading and writing
55
+ GeoParquet files that is shared between all engine implementations.
56
+ Extracting this logic ensures bug fixes and improvements are applied
57
+ consistently across all engines.
58
+ """
59
+
60
+ def write_geoparquet_sync(
61
+ self,
62
+ gdf: gpd.GeoDataFrame,
63
+ path: str,
64
+ storage: Any,
65
+ compression: str = "snappy",
66
+ ) -> None:
67
+ """Write GeoDataFrame to GeoParquet synchronously.
68
+
69
+ Uses GeoPandas' native to_parquet() method with storage backend
70
+ support for cloud paths.
71
+
72
+ Args:
73
+ gdf: GeoDataFrame to write.
74
+ path: Output path (local or s3://).
75
+ storage: Storage backend for cloud operations.
76
+ compression: Parquet compression (default: snappy).
77
+
78
+ Raises:
79
+ OSError: If write operation fails.
80
+ """
81
+ if gdf.empty:
82
+ logger.warning(f"Skipping write of empty GeoDataFrame to {path}")
83
+ return
84
+
85
+ try:
86
+ # Cast compression to the expected literal type
87
+ comp = cast(CompressionType, compression) if compression in ("snappy", "gzip", "brotli") else "snappy"
88
+
89
+ if path.startswith("s3://"):
90
+ # Use storage backend for S3
91
+ with storage.open(path, "wb") as f:
92
+ gdf.to_parquet(f, index=False, compression=comp)
93
+ else:
94
+ # Local filesystem - direct write
95
+ gdf.to_parquet(path, index=False, compression=comp)
96
+
97
+ except Exception as e:
98
+ logger.error(f"Error writing GeoParquet to {path}: {e}")
99
+ raise OSError(f"Failed to write GeoParquet to {path}: {e}") from e
100
+
101
+ def read_geoparquet_sync(self, path: str, storage: Any) -> gpd.GeoDataFrame:
102
+ """Read GeoParquet file to GeoDataFrame synchronously.
103
+
104
+ Uses PyArrow for reading with storage backend support for cloud paths.
105
+
106
+ Args:
107
+ path: Input path (local or s3://).
108
+ storage: Storage backend for cloud operations.
109
+
110
+ Returns:
111
+ GeoDataFrame containing the data.
112
+
113
+ Raises:
114
+ FileNotFoundError: If file does not exist.
115
+ OSError: If read fails.
116
+ """
117
+ try:
118
+ if path.startswith("s3://"):
119
+ # Use storage backend for S3
120
+ with storage.open(path, "rb") as f:
121
+ binary_data = f.read()
122
+ table = pq.read_table(io.BytesIO(binary_data))
123
+ else:
124
+ # Local filesystem
125
+ table = pq.read_table(path)
126
+
127
+ # Convert to GeoDataFrame (preserve geometry column type)
128
+ gdf = gpd.GeoDataFrame.from_arrow(table)
129
+
130
+ return gdf
131
+
132
+ except FileNotFoundError:
133
+ raise
134
+ except Exception as e:
135
+ logger.error(f"Error reading GeoParquet from {path}: {e}")
136
+ raise OSError(f"Failed to read GeoParquet from {path}: {e}") from e
137
+
138
+
139
+ class STACEngine(ABC):
140
+ """Abstract base class for STAC I/O operations.
141
+
142
+ This class defines the interface that all STAC engines must implement.
143
+ Engines handle the conversion between STAC items and GeoDataFrames,
144
+ as well as reading/writing GeoParquet files.
145
+
146
+ The interface is designed to be synchronous for GeoDataFrame operations
147
+ (which require in-memory processing) and provides both sync and async
148
+ options for I/O operations.
149
+
150
+ Thread Safety:
151
+ Engine instances should be thread-safe for read operations.
152
+ Write operations may require external synchronization.
153
+ """
154
+
155
+ @property
156
+ @abstractmethod
157
+ def name(self) -> str:
158
+ """Return the engine name identifier."""
159
+ pass
160
+
161
+ @abstractmethod
162
+ def items_to_geodataframe(self, items: list[dict[str, Any]]) -> gpd.GeoDataFrame:
163
+ """Convert STAC items to a GeoDataFrame for sorting and processing.
164
+
165
+ This is a synchronous operation as it requires in-memory processing
166
+ of the entire item set for operations like sorting and deduplication.
167
+
168
+ Args:
169
+ items: List of STAC item dictionaries.
170
+
171
+ Returns:
172
+ GeoDataFrame with STAC item data, geometry column, and all properties.
173
+
174
+ Raises:
175
+ ValueError: If items cannot be converted to GeoDataFrame.
176
+ """
177
+ pass
178
+
179
+ @abstractmethod
180
+ def geodataframe_to_items(self, gdf: gpd.GeoDataFrame) -> list[dict[str, Any]]:
181
+ """Convert a GeoDataFrame back to STAC item dictionaries.
182
+
183
+ Args:
184
+ gdf: GeoDataFrame containing STAC item data.
185
+
186
+ Returns:
187
+ List of STAC item dictionaries.
188
+
189
+ Raises:
190
+ ValueError: If GeoDataFrame cannot be converted to STAC items.
191
+ """
192
+ pass
193
+
194
+ @abstractmethod
195
+ def write_geoparquet_sync(
196
+ self,
197
+ gdf: gpd.GeoDataFrame,
198
+ path: str,
199
+ storage: Any,
200
+ compression: str = "snappy",
201
+ ) -> None:
202
+ """Write a GeoDataFrame to GeoParquet file synchronously.
203
+
204
+ This method handles both local and cloud storage paths using the
205
+ provided storage backend for cloud operations.
206
+
207
+ Args:
208
+ gdf: GeoDataFrame to write.
209
+ path: Output path (local or cloud URL like s3://).
210
+ storage: Storage backend instance for cloud operations.
211
+ compression: Parquet compression algorithm (default: snappy).
212
+
213
+ Raises:
214
+ IOError: If write operation fails.
215
+ """
216
+ pass
217
+
218
+ @abstractmethod
219
+ def read_geoparquet_sync(self, path: str, storage: Any) -> gpd.GeoDataFrame:
220
+ """Read a GeoParquet file to GeoDataFrame synchronously.
221
+
222
+ Args:
223
+ path: Input path (local or cloud URL).
224
+ storage: Storage backend instance for cloud operations.
225
+
226
+ Returns:
227
+ GeoDataFrame containing the data.
228
+
229
+ Raises:
230
+ FileNotFoundError: If the file does not exist.
231
+ IOError: If read operation fails.
232
+ """
233
+ pass
234
+
235
+
236
+ class EngineNotAvailableError(Exception):
237
+ """Raised when the requested engine is not available."""
238
+
239
+ pass
240
+
241
+
242
+ def get_engine(engine_type: EngineType = "rustac") -> STACEngine:
243
+ """Factory function to get the appropriate STAC engine.
244
+
245
+ Args:
246
+ engine_type: Engine to use. Options:
247
+ - "rustac": Use rustac engine (default, recommended)
248
+ - "stac-geoparquet": Use legacy stac-geoparquet engine
249
+ - "auto": Auto-detect best available engine (prefers rustac)
250
+
251
+ Returns:
252
+ STACEngine instance configured for the requested backend.
253
+
254
+ Raises:
255
+ EngineNotAvailableError: If the requested engine is not available.
256
+ ValueError: If an invalid engine type is specified.
257
+
258
+ Example:
259
+ >>> engine = get_engine("rustac")
260
+ >>> gdf = engine.items_to_geodataframe(items)
261
+ """
262
+ if engine_type == "auto":
263
+ # Try rustac first, fall back to stac-geoparquet
264
+ try:
265
+ from .rustac_engine import RustacEngine
266
+
267
+ return RustacEngine()
268
+ except ImportError:
269
+ try:
270
+ from .stac_geoparquet_engine import StacGeoparquetEngine
271
+
272
+ return StacGeoparquetEngine()
273
+ except ImportError:
274
+ raise EngineNotAvailableError(
275
+ "No STAC engine available. Install rustac[arrow] or stac-geoparquet."
276
+ ) from None
277
+
278
+ elif engine_type == "rustac":
279
+ try:
280
+ from .rustac_engine import RustacEngine
281
+
282
+ return RustacEngine()
283
+ except ImportError as e:
284
+ raise EngineNotAvailableError(
285
+ f"rustac engine not available: {e}. Install with: pip install 'rustac[arrow]'"
286
+ ) from e
287
+
288
+ elif engine_type == "stac-geoparquet":
289
+ try:
290
+ from .stac_geoparquet_engine import StacGeoparquetEngine
291
+
292
+ return StacGeoparquetEngine()
293
+ except ImportError as e:
294
+ raise EngineNotAvailableError(
295
+ f"stac-geoparquet engine not available: {e}. Install with: pip install stac-geoparquet"
296
+ ) from e
297
+
298
+ else:
299
+ raise ValueError(f"Unknown engine type: {engine_type}. Valid options: rustac, stac-geoparquet, auto")
300
+
301
+
302
+ __all__ = [
303
+ "STACEngine",
304
+ "GeoParquetIOMixin",
305
+ "EngineType",
306
+ "EngineNotAvailableError",
307
+ "get_engine",
308
+ ]
@@ -0,0 +1,142 @@
1
+ """Rustac-based STAC I/O engine for high-performance operations.
2
+
3
+ This module provides the rustac engine implementation using the Rust-based
4
+ rustac library for STAC item conversion and GeoParquet I/O. It offers significant
5
+ performance improvements over pure Python implementations.
6
+
7
+ Key Features:
8
+ - High-performance Arrow conversion through Rust
9
+ - Native object store support for S3, GCS, Azure
10
+ - Memory-efficient processing for large datasets
11
+ - Zero Python dependencies for core operations
12
+
13
+ Requirements:
14
+ - rustac[arrow]>=0.9.0
15
+
16
+ Example:
17
+ >>> from earthcatalog.engines.rustac_engine import RustacEngine
18
+ >>> engine = RustacEngine()
19
+ >>> gdf = engine.items_to_geodataframe(items)
20
+ >>> engine.write_geoparquet_sync(gdf, "output.parquet", storage)
21
+ """
22
+
23
+ import logging
24
+ from typing import Any
25
+
26
+ import geopandas as gpd
27
+
28
+ from . import GeoParquetIOMixin, STACEngine
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Import rustac - will raise ImportError if not available
33
+ try:
34
+ import rustac
35
+
36
+ HAS_RUSTAC = True
37
+ except ImportError:
38
+ HAS_RUSTAC = False
39
+ rustac = None # type: ignore
40
+
41
+
42
+ class RustacEngine(GeoParquetIOMixin, STACEngine):
43
+ """High-performance STAC engine using rustac library.
44
+
45
+ This engine leverages rustac's Rust implementation for fast Arrow conversion
46
+ and provides native support for cloud object stores.
47
+
48
+ The engine uses synchronous wrappers around rustac's async functions for
49
+ compatibility with the existing pipeline architecture. For direct async usage,
50
+ use the rustac library directly.
51
+
52
+ GeoParquet I/O operations are inherited from GeoParquetIOMixin to ensure
53
+ consistent behavior across all engine implementations.
54
+
55
+ Attributes:
56
+ name: Engine identifier string ("rustac").
57
+
58
+ Example:
59
+ >>> engine = RustacEngine()
60
+ >>> items = [{"type": "Feature", "id": "item1", ...}]
61
+ >>> gdf = engine.items_to_geodataframe(items)
62
+ >>> # Process GeoDataFrame (sort, filter, etc.)
63
+ >>> items_out = engine.geodataframe_to_items(gdf)
64
+ """
65
+
66
+ def __init__(self) -> None:
67
+ """Initialize the rustac engine.
68
+
69
+ Raises:
70
+ ImportError: If rustac library is not installed.
71
+ """
72
+ if not HAS_RUSTAC:
73
+ raise ImportError("rustac library not available. Install with: pip install 'rustac[arrow]'")
74
+
75
+ @property
76
+ def name(self) -> str:
77
+ """Return the engine name."""
78
+ return "rustac"
79
+
80
+ def items_to_geodataframe(self, items: list[dict[str, Any]]) -> gpd.GeoDataFrame:
81
+ """Convert STAC items to GeoDataFrame using rustac.to_arrow().
82
+
83
+ Uses rustac's Rust-based Arrow conversion for better performance
84
+ compared to pure Python implementations.
85
+
86
+ Args:
87
+ items: List of STAC item dictionaries.
88
+
89
+ Returns:
90
+ GeoDataFrame with geometry and all STAC properties.
91
+
92
+ Raises:
93
+ ValueError: If items cannot be converted.
94
+ """
95
+ if not items:
96
+ return gpd.GeoDataFrame()
97
+
98
+ try:
99
+ # rustac.to_arrow() accepts a list of items or an item collection
100
+ table = rustac.to_arrow(items) # type: ignore[union-attr]
101
+
102
+ # Convert Arrow table to GeoDataFrame
103
+ # GeoDataFrame.from_arrow() handles the geometry column correctly
104
+ gdf = gpd.GeoDataFrame.from_arrow(table)
105
+
106
+ return gdf
107
+
108
+ except Exception as e:
109
+ logger.error(f"Error converting items to GeoDataFrame with rustac: {e}")
110
+ raise ValueError(f"Failed to convert STAC items to GeoDataFrame: {e}") from e
111
+
112
+ def geodataframe_to_items(self, gdf: gpd.GeoDataFrame) -> list[dict[str, Any]]:
113
+ """Convert GeoDataFrame back to STAC items using rustac.from_arrow().
114
+
115
+ Args:
116
+ gdf: GeoDataFrame containing STAC item data.
117
+
118
+ Returns:
119
+ List of STAC item dictionaries.
120
+
121
+ Raises:
122
+ ValueError: If GeoDataFrame cannot be converted.
123
+ """
124
+ if gdf.empty:
125
+ return []
126
+
127
+ try:
128
+ # Convert GeoDataFrame to Arrow table
129
+ table = gdf.to_arrow()
130
+
131
+ # Use rustac to convert Arrow table to STAC item collection
132
+ item_collection = rustac.from_arrow(table) # type: ignore[union-attr, arg-type]
133
+
134
+ # Extract features from the item collection
135
+ if isinstance(item_collection, dict):
136
+ return item_collection.get("features", [])
137
+ else:
138
+ return []
139
+
140
+ except Exception as e:
141
+ logger.error(f"Error converting GeoDataFrame to items with rustac: {e}")
142
+ raise ValueError(f"Failed to convert GeoDataFrame to STAC items: {e}") from e
@@ -0,0 +1,126 @@
1
+ """Legacy STAC engine using stac-geoparquet library.
2
+
3
+ This module provides the stac-geoparquet engine implementation using the
4
+ pure Python stac-geoparquet library for STAC item conversion and GeoParquet I/O.
5
+
6
+ This engine is provided for backward compatibility and as a fallback when
7
+ rustac is not available.
8
+
9
+ Requirements:
10
+ - stac-geoparquet>=0.2.0
11
+
12
+ Example:
13
+ >>> from earthcatalog.engines.stac_geoparquet_engine import StacGeoparquetEngine
14
+ >>> engine = StacGeoparquetEngine()
15
+ >>> gdf = engine.items_to_geodataframe(items)
16
+ >>> engine.write_geoparquet_sync(gdf, "output.parquet", storage)
17
+ """
18
+
19
+ import logging
20
+ from typing import Any
21
+
22
+ import geopandas as gpd
23
+
24
+ from . import GeoParquetIOMixin, STACEngine
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Import stac_geoparquet - will raise ImportError if not available
29
+ try:
30
+ from stac_geoparquet import to_geodataframe, to_item_collection
31
+
32
+ HAS_STAC_GEOPARQUET = True
33
+ except ImportError:
34
+ HAS_STAC_GEOPARQUET = False
35
+ to_geodataframe = None # type: ignore
36
+ to_item_collection = None # type: ignore
37
+
38
+
39
+ class StacGeoparquetEngine(GeoParquetIOMixin, STACEngine):
40
+ """Legacy STAC engine using stac-geoparquet library.
41
+
42
+ This engine uses the stac-geoparquet library for STAC item conversion.
43
+ It provides a stable fallback option when rustac is not available.
44
+
45
+ GeoParquet I/O operations are inherited from GeoParquetIOMixin to ensure
46
+ consistent behavior across all engine implementations.
47
+
48
+ Attributes:
49
+ name: Engine identifier string ("stac-geoparquet").
50
+
51
+ Example:
52
+ >>> engine = StacGeoparquetEngine()
53
+ >>> items = [{"type": "Feature", "id": "item1", ...}]
54
+ >>> gdf = engine.items_to_geodataframe(items)
55
+ >>> items_out = engine.geodataframe_to_items(gdf)
56
+ """
57
+
58
+ def __init__(self) -> None:
59
+ """Initialize the stac-geoparquet engine.
60
+
61
+ Raises:
62
+ ImportError: If stac-geoparquet library is not installed.
63
+ """
64
+ if not HAS_STAC_GEOPARQUET:
65
+ raise ImportError("stac-geoparquet library not available. Install with: pip install stac-geoparquet")
66
+
67
+ @property
68
+ def name(self) -> str:
69
+ """Return the engine name."""
70
+ return "stac-geoparquet"
71
+
72
+ def items_to_geodataframe(self, items: list[dict[str, Any]]) -> gpd.GeoDataFrame:
73
+ """Convert STAC items to GeoDataFrame using stac_geoparquet.to_geodataframe().
74
+
75
+ Args:
76
+ items: List of STAC item dictionaries.
77
+
78
+ Returns:
79
+ GeoDataFrame with geometry and all STAC properties.
80
+
81
+ Raises:
82
+ ValueError: If items cannot be converted.
83
+ """
84
+ if not items:
85
+ return gpd.GeoDataFrame()
86
+
87
+ try:
88
+ # Use numpy_nullable to maintain current behavior and avoid FutureWarning
89
+ gdf = to_geodataframe(items, dtype_backend="numpy_nullable") # type: ignore[misc]
90
+ return gdf
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error converting items to GeoDataFrame with stac-geoparquet: {e}")
94
+ raise ValueError(f"Failed to convert STAC items to GeoDataFrame: {e}") from e
95
+
96
+ def geodataframe_to_items(self, gdf: gpd.GeoDataFrame) -> list[dict[str, Any]]:
97
+ """Convert GeoDataFrame back to STAC items using stac_geoparquet.to_item_collection().
98
+
99
+ Args:
100
+ gdf: GeoDataFrame containing STAC item data.
101
+
102
+ Returns:
103
+ List of STAC item dictionaries.
104
+
105
+ Raises:
106
+ ValueError: If GeoDataFrame cannot be converted.
107
+ """
108
+ if gdf.empty:
109
+ return []
110
+
111
+ try:
112
+ item_collection = to_item_collection(gdf) # type: ignore[misc]
113
+
114
+ # Handle both dict and FeatureCollection objects
115
+ if hasattr(item_collection, "to_dict"):
116
+ features = item_collection.to_dict().get("features", [])
117
+ elif isinstance(item_collection, dict):
118
+ features = item_collection.get("features", [])
119
+ else:
120
+ features = []
121
+
122
+ return features
123
+
124
+ except Exception as e:
125
+ logger.error(f"Error converting GeoDataFrame to items with stac-geoparquet: {e}")
126
+ raise ValueError(f"Failed to convert GeoDataFrame to STAC items: {e}") from e