earthcatalog 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- earthcatalog/__init__.py +164 -0
- earthcatalog/async_http_client.py +1006 -0
- earthcatalog/config.py +97 -0
- earthcatalog/engines/__init__.py +308 -0
- earthcatalog/engines/rustac_engine.py +142 -0
- earthcatalog/engines/stac_geoparquet_engine.py +126 -0
- earthcatalog/exceptions.py +471 -0
- earthcatalog/grid_systems.py +1114 -0
- earthcatalog/ingestion_pipeline.py +2281 -0
- earthcatalog/input_readers.py +603 -0
- earthcatalog/job_tracking.py +485 -0
- earthcatalog/pipeline.py +606 -0
- earthcatalog/schema_generator.py +911 -0
- earthcatalog/spatial_resolver.py +1207 -0
- earthcatalog/stac_hooks.py +754 -0
- earthcatalog/statistics.py +677 -0
- earthcatalog/storage_backends.py +548 -0
- earthcatalog/tests/__init__.py +1 -0
- earthcatalog/tests/conftest.py +76 -0
- earthcatalog/tests/test_all_grids.py +793 -0
- earthcatalog/tests/test_async_http.py +700 -0
- earthcatalog/tests/test_cli_and_storage.py +230 -0
- earthcatalog/tests/test_config.py +245 -0
- earthcatalog/tests/test_dask_integration.py +580 -0
- earthcatalog/tests/test_e2e_synthetic.py +1624 -0
- earthcatalog/tests/test_engines.py +272 -0
- earthcatalog/tests/test_exceptions.py +346 -0
- earthcatalog/tests/test_file_structure.py +245 -0
- earthcatalog/tests/test_input_readers.py +666 -0
- earthcatalog/tests/test_integration.py +200 -0
- earthcatalog/tests/test_integration_async.py +283 -0
- earthcatalog/tests/test_job_tracking.py +603 -0
- earthcatalog/tests/test_multi_file_input.py +336 -0
- earthcatalog/tests/test_passthrough_hook.py +196 -0
- earthcatalog/tests/test_pipeline.py +684 -0
- earthcatalog/tests/test_pipeline_components.py +665 -0
- earthcatalog/tests/test_schema_generator.py +506 -0
- earthcatalog/tests/test_spatial_resolver.py +413 -0
- earthcatalog/tests/test_stac_hooks.py +776 -0
- earthcatalog/tests/test_statistics.py +477 -0
- earthcatalog/tests/test_storage_backends.py +236 -0
- earthcatalog/tests/test_validation.py +435 -0
- earthcatalog/tests/test_workers.py +653 -0
- earthcatalog/validation.py +921 -0
- earthcatalog/workers.py +682 -0
- earthcatalog-0.2.0.dist-info/METADATA +333 -0
- earthcatalog-0.2.0.dist-info/RECORD +50 -0
- earthcatalog-0.2.0.dist-info/WHEEL +5 -0
- earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
- earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Custom exceptions for EarthCatalog.
|
|
2
|
+
|
|
3
|
+
This module defines a hierarchy of exceptions for better error handling
|
|
4
|
+
and debugging throughout the EarthCatalog pipeline.
|
|
5
|
+
|
|
6
|
+
Exception Hierarchy:
|
|
7
|
+
EarthCatalogError (base)
|
|
8
|
+
├── ConfigurationError
|
|
9
|
+
│ ├── InvalidGridConfigError
|
|
10
|
+
│ └── InvalidStorageConfigError
|
|
11
|
+
├── IngestionError
|
|
12
|
+
│ ├── DownloadError
|
|
13
|
+
│ ├── ValidationError
|
|
14
|
+
│ └── ConsolidationError
|
|
15
|
+
├── StorageError
|
|
16
|
+
│ ├── StorageConnectionError
|
|
17
|
+
│ └── StorageWriteError
|
|
18
|
+
└── QueryError
|
|
19
|
+
└── SpatialResolverError
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
All custom exceptions inherit from EarthCatalogError, allowing users
|
|
23
|
+
to catch all library-specific errors with a single except clause:
|
|
24
|
+
|
|
25
|
+
>>> try:
|
|
26
|
+
... pipeline.run()
|
|
27
|
+
... except EarthCatalogError as e:
|
|
28
|
+
... logger.error(f"Pipeline failed: {e}")
|
|
29
|
+
... print(e.details)
|
|
30
|
+
|
|
31
|
+
Or catch specific exceptions for fine-grained handling:
|
|
32
|
+
|
|
33
|
+
>>> try:
|
|
34
|
+
... download_items(urls)
|
|
35
|
+
... except DownloadError as e:
|
|
36
|
+
... logger.warning(f"Failed to download {e.url}: {e.message}")
|
|
37
|
+
... except ConsolidationError as e:
|
|
38
|
+
... logger.error(f"Consolidation failed: {e.message}")
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from typing import Any
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EarthCatalogError(Exception):
|
|
45
|
+
"""Base exception for all EarthCatalog errors.
|
|
46
|
+
|
|
47
|
+
All custom exceptions in EarthCatalog inherit from this class,
|
|
48
|
+
allowing users to catch all library-specific errors with a single
|
|
49
|
+
except clause.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
message: Human-readable error description.
|
|
53
|
+
details: Optional dict with additional context for debugging.
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> try:
|
|
57
|
+
... pipeline.run()
|
|
58
|
+
... except EarthCatalogError as e:
|
|
59
|
+
... logger.error(f"Pipeline failed: {e}")
|
|
60
|
+
... if e.details:
|
|
61
|
+
... logger.debug(f"Details: {e.details}")
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, message: str, details: dict[str, Any] | None = None):
|
|
65
|
+
"""Initialize the exception.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
message: Human-readable error description.
|
|
69
|
+
details: Optional dict with additional context.
|
|
70
|
+
"""
|
|
71
|
+
super().__init__(message)
|
|
72
|
+
self.message = message
|
|
73
|
+
self.details = details or {}
|
|
74
|
+
|
|
75
|
+
def __repr__(self) -> str:
|
|
76
|
+
"""Return a detailed representation of the exception."""
|
|
77
|
+
return f"{self.__class__.__name__}({self.message!r})"
|
|
78
|
+
|
|
79
|
+
def __bool__(self) -> bool:
|
|
80
|
+
"""Exceptions are always truthy."""
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# =============================================================================
|
|
85
|
+
# Configuration Errors
|
|
86
|
+
# =============================================================================
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ConfigurationError(EarthCatalogError):
|
|
90
|
+
"""Error in pipeline configuration.
|
|
91
|
+
|
|
92
|
+
Raised when configuration parameters are invalid, missing, or incompatible.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
>>> raise ConfigurationError(
|
|
96
|
+
... "Invalid configuration",
|
|
97
|
+
... details={"missing_field": "output_catalog"}
|
|
98
|
+
... )
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class InvalidGridConfigError(ConfigurationError):
|
|
105
|
+
"""Invalid grid system configuration.
|
|
106
|
+
|
|
107
|
+
Raised when grid system parameters are invalid or incompatible.
|
|
108
|
+
|
|
109
|
+
Attributes:
|
|
110
|
+
grid_system: The grid system that was configured.
|
|
111
|
+
resolution: The resolution/level that was specified.
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> raise InvalidGridConfigError(
|
|
115
|
+
... "H3 resolution must be 0-15",
|
|
116
|
+
... grid_system="h3",
|
|
117
|
+
... resolution=20
|
|
118
|
+
... )
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
def __init__(
|
|
122
|
+
self,
|
|
123
|
+
message: str,
|
|
124
|
+
grid_system: str | None = None,
|
|
125
|
+
resolution: int | None = None,
|
|
126
|
+
**kwargs: Any,
|
|
127
|
+
):
|
|
128
|
+
"""Initialize the exception.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
message: Human-readable error description.
|
|
132
|
+
grid_system: The grid system that was configured.
|
|
133
|
+
resolution: The resolution/level that was specified.
|
|
134
|
+
**kwargs: Additional context added to details.
|
|
135
|
+
"""
|
|
136
|
+
details = {"grid_system": grid_system, "resolution": resolution, **kwargs}
|
|
137
|
+
super().__init__(message, details)
|
|
138
|
+
self.grid_system = grid_system
|
|
139
|
+
self.resolution = resolution
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class InvalidStorageConfigError(ConfigurationError):
|
|
143
|
+
"""Invalid storage backend configuration.
|
|
144
|
+
|
|
145
|
+
Raised when storage backend parameters are invalid or the backend
|
|
146
|
+
cannot be initialized.
|
|
147
|
+
|
|
148
|
+
Attributes:
|
|
149
|
+
backend: The storage backend type.
|
|
150
|
+
path: The storage path that was specified.
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
>>> raise InvalidStorageConfigError(
|
|
154
|
+
... "S3 bucket does not exist",
|
|
155
|
+
... backend="s3",
|
|
156
|
+
... path="s3://nonexistent-bucket/catalog"
|
|
157
|
+
... )
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(
|
|
161
|
+
self,
|
|
162
|
+
message: str,
|
|
163
|
+
backend: str | None = None,
|
|
164
|
+
path: str | None = None,
|
|
165
|
+
**kwargs: Any,
|
|
166
|
+
):
|
|
167
|
+
"""Initialize the exception.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
message: Human-readable error description.
|
|
171
|
+
backend: The storage backend type.
|
|
172
|
+
path: The storage path that was specified.
|
|
173
|
+
**kwargs: Additional context added to details.
|
|
174
|
+
"""
|
|
175
|
+
details = {"backend": backend, "path": path, **kwargs}
|
|
176
|
+
super().__init__(message, details)
|
|
177
|
+
self.backend = backend
|
|
178
|
+
self.path = path
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# =============================================================================
|
|
182
|
+
# Ingestion Errors
|
|
183
|
+
# =============================================================================
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class IngestionError(EarthCatalogError):
|
|
187
|
+
"""Error during ingestion processing.
|
|
188
|
+
|
|
189
|
+
Base class for errors that occur during the ingestion pipeline,
|
|
190
|
+
including download, validation, and consolidation phases.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class DownloadError(IngestionError):
|
|
197
|
+
"""Error downloading STAC items.
|
|
198
|
+
|
|
199
|
+
Raised when one or more STAC items fail to download, with details
|
|
200
|
+
about the failure including HTTP status codes and retry counts.
|
|
201
|
+
|
|
202
|
+
Attributes:
|
|
203
|
+
url: The URL that failed to download.
|
|
204
|
+
status_code: HTTP status code if available.
|
|
205
|
+
retry_count: Number of retries attempted.
|
|
206
|
+
error_type: Categorized error type (e.g., 'timeout', 'not_found').
|
|
207
|
+
|
|
208
|
+
Example:
|
|
209
|
+
>>> raise DownloadError(
|
|
210
|
+
... "Request timed out after 30s",
|
|
211
|
+
... url="https://api.example.com/item.json",
|
|
212
|
+
... status_code=None,
|
|
213
|
+
... retry_count=3,
|
|
214
|
+
... error_type="timeout"
|
|
215
|
+
... )
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
def __init__(
|
|
219
|
+
self,
|
|
220
|
+
message: str,
|
|
221
|
+
url: str | None = None,
|
|
222
|
+
status_code: int | None = None,
|
|
223
|
+
retry_count: int = 0,
|
|
224
|
+
error_type: str | None = None,
|
|
225
|
+
**kwargs: Any,
|
|
226
|
+
):
|
|
227
|
+
"""Initialize the exception.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
message: Human-readable error description.
|
|
231
|
+
url: The URL that failed.
|
|
232
|
+
status_code: HTTP status code if available.
|
|
233
|
+
retry_count: Number of retries attempted.
|
|
234
|
+
error_type: Categorized error type.
|
|
235
|
+
**kwargs: Additional context added to details.
|
|
236
|
+
"""
|
|
237
|
+
details = {
|
|
238
|
+
"url": url,
|
|
239
|
+
"status_code": status_code,
|
|
240
|
+
"retry_count": retry_count,
|
|
241
|
+
"error_type": error_type,
|
|
242
|
+
**kwargs,
|
|
243
|
+
}
|
|
244
|
+
super().__init__(message, details)
|
|
245
|
+
self.url = url
|
|
246
|
+
self.status_code = status_code
|
|
247
|
+
self.retry_count = retry_count
|
|
248
|
+
self.error_type = error_type
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class ItemValidationError(IngestionError):
|
|
252
|
+
"""Error validating STAC item or geometry.
|
|
253
|
+
|
|
254
|
+
Raised when a STAC item fails validation, including geometry issues,
|
|
255
|
+
missing required fields, or schema violations.
|
|
256
|
+
|
|
257
|
+
Attributes:
|
|
258
|
+
item_id: The ID of the item that failed validation.
|
|
259
|
+
issue_code: Validation issue code (e.g., 'INVALID_GEOMETRY').
|
|
260
|
+
|
|
261
|
+
Example:
|
|
262
|
+
>>> raise ItemValidationError(
|
|
263
|
+
... "Geometry is self-intersecting",
|
|
264
|
+
... item_id="ITEM_123",
|
|
265
|
+
... issue_code="INVALID_GEOMETRY"
|
|
266
|
+
... )
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
def __init__(
|
|
270
|
+
self,
|
|
271
|
+
message: str,
|
|
272
|
+
item_id: str | None = None,
|
|
273
|
+
issue_code: str | None = None,
|
|
274
|
+
**kwargs: Any,
|
|
275
|
+
):
|
|
276
|
+
"""Initialize the exception.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
message: Human-readable error description.
|
|
280
|
+
item_id: The ID of the item that failed validation.
|
|
281
|
+
issue_code: Validation issue code.
|
|
282
|
+
**kwargs: Additional context added to details.
|
|
283
|
+
"""
|
|
284
|
+
details = {"item_id": item_id, "issue_code": issue_code, **kwargs}
|
|
285
|
+
super().__init__(message, details)
|
|
286
|
+
self.item_id = item_id
|
|
287
|
+
self.issue_code = issue_code
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class ConsolidationError(IngestionError):
|
|
291
|
+
"""Error during shard consolidation.
|
|
292
|
+
|
|
293
|
+
Raised when the consolidation phase fails, including issues with
|
|
294
|
+
reading shards, merging data, or writing final partitions.
|
|
295
|
+
|
|
296
|
+
Attributes:
|
|
297
|
+
partition_key: The partition being consolidated.
|
|
298
|
+
shard_count: Number of shards being consolidated.
|
|
299
|
+
|
|
300
|
+
Example:
|
|
301
|
+
>>> raise ConsolidationError(
|
|
302
|
+
... "Failed to merge shards: memory limit exceeded",
|
|
303
|
+
... partition_key="h3=abc123/year=2024/month=01",
|
|
304
|
+
... shard_count=50
|
|
305
|
+
... )
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
def __init__(
|
|
309
|
+
self,
|
|
310
|
+
message: str,
|
|
311
|
+
partition_key: str | None = None,
|
|
312
|
+
shard_count: int | None = None,
|
|
313
|
+
**kwargs: Any,
|
|
314
|
+
):
|
|
315
|
+
"""Initialize the exception.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
message: Human-readable error description.
|
|
319
|
+
partition_key: The partition being consolidated.
|
|
320
|
+
shard_count: Number of shards being consolidated.
|
|
321
|
+
**kwargs: Additional context added to details.
|
|
322
|
+
"""
|
|
323
|
+
details = {"partition_key": partition_key, "shard_count": shard_count, **kwargs}
|
|
324
|
+
super().__init__(message, details)
|
|
325
|
+
self.partition_key = partition_key
|
|
326
|
+
self.shard_count = shard_count
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# =============================================================================
|
|
330
|
+
# Storage Errors
|
|
331
|
+
# =============================================================================
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class StorageError(EarthCatalogError):
|
|
335
|
+
"""Error with storage backend operations.
|
|
336
|
+
|
|
337
|
+
Base class for errors related to storage backend operations,
|
|
338
|
+
including connection failures, read/write errors, and permission issues.
|
|
339
|
+
|
|
340
|
+
Attributes:
|
|
341
|
+
path: The storage path involved in the error.
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
def __init__(self, message: str, path: str | None = None, **kwargs: Any):
|
|
345
|
+
"""Initialize the exception.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
message: Human-readable error description.
|
|
349
|
+
path: The storage path involved in the error.
|
|
350
|
+
**kwargs: Additional context added to details.
|
|
351
|
+
"""
|
|
352
|
+
details = {"path": path, **kwargs}
|
|
353
|
+
super().__init__(message, details)
|
|
354
|
+
self.path = path
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class StorageConnectionError(StorageError):
|
|
358
|
+
"""Cannot connect to storage backend.
|
|
359
|
+
|
|
360
|
+
Raised when the storage backend is unreachable or authentication fails.
|
|
361
|
+
|
|
362
|
+
Example:
|
|
363
|
+
>>> raise StorageConnectionError(
|
|
364
|
+
... "Cannot connect to S3: access denied",
|
|
365
|
+
... path="s3://bucket/catalog"
|
|
366
|
+
... )
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
pass
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class StorageWriteError(StorageError):
|
|
373
|
+
"""Error writing to storage.
|
|
374
|
+
|
|
375
|
+
Raised when a write operation fails, including permission issues,
|
|
376
|
+
disk space, or network errors during upload.
|
|
377
|
+
|
|
378
|
+
Attributes:
|
|
379
|
+
bytes_written: Number of bytes successfully written before failure.
|
|
380
|
+
|
|
381
|
+
Example:
|
|
382
|
+
>>> raise StorageWriteError(
|
|
383
|
+
... "Write failed: disk full",
|
|
384
|
+
... path="/catalog/partition.parquet",
|
|
385
|
+
... bytes_written=1024000
|
|
386
|
+
... )
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
def __init__(
|
|
390
|
+
self,
|
|
391
|
+
message: str,
|
|
392
|
+
path: str | None = None,
|
|
393
|
+
bytes_written: int | None = None,
|
|
394
|
+
**kwargs: Any,
|
|
395
|
+
):
|
|
396
|
+
"""Initialize the exception.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
message: Human-readable error description.
|
|
400
|
+
path: The storage path involved in the error.
|
|
401
|
+
bytes_written: Number of bytes successfully written.
|
|
402
|
+
**kwargs: Additional context added to details.
|
|
403
|
+
"""
|
|
404
|
+
super().__init__(message, path, **kwargs)
|
|
405
|
+
self.bytes_written = bytes_written
|
|
406
|
+
self.details["bytes_written"] = bytes_written
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class StorageReadError(StorageError):
|
|
410
|
+
"""Error reading from storage.
|
|
411
|
+
|
|
412
|
+
Raised when a read operation fails, including missing files,
|
|
413
|
+
corrupted data, or network errors during download.
|
|
414
|
+
|
|
415
|
+
Example:
|
|
416
|
+
>>> raise StorageReadError(
|
|
417
|
+
... "File not found",
|
|
418
|
+
... path="s3://bucket/missing.parquet"
|
|
419
|
+
... )
|
|
420
|
+
"""
|
|
421
|
+
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# =============================================================================
|
|
426
|
+
# Query Errors
|
|
427
|
+
# =============================================================================
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class QueryError(EarthCatalogError):
|
|
431
|
+
"""Error during catalog query.
|
|
432
|
+
|
|
433
|
+
Base class for errors related to querying catalogs, including
|
|
434
|
+
spatial resolution and partition lookup failures.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
pass
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
class SpatialResolverError(QueryError):
|
|
441
|
+
"""Error in spatial partition resolution.
|
|
442
|
+
|
|
443
|
+
Raised when the spatial resolver fails to resolve partitions,
|
|
444
|
+
including invalid geometries or missing schema information.
|
|
445
|
+
|
|
446
|
+
Attributes:
|
|
447
|
+
geometry_type: Type of geometry that caused the error.
|
|
448
|
+
|
|
449
|
+
Example:
|
|
450
|
+
>>> raise SpatialResolverError(
|
|
451
|
+
... "Cannot resolve partitions: invalid geometry",
|
|
452
|
+
... geometry_type="Polygon"
|
|
453
|
+
... )
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
def __init__(
|
|
457
|
+
self,
|
|
458
|
+
message: str,
|
|
459
|
+
geometry_type: str | None = None,
|
|
460
|
+
**kwargs: Any,
|
|
461
|
+
):
|
|
462
|
+
"""Initialize the exception.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
message: Human-readable error description.
|
|
466
|
+
geometry_type: Type of geometry that caused the error.
|
|
467
|
+
**kwargs: Additional context added to details.
|
|
468
|
+
"""
|
|
469
|
+
details = {"geometry_type": geometry_type, **kwargs}
|
|
470
|
+
super().__init__(message, details)
|
|
471
|
+
self.geometry_type = geometry_type
|