earthcatalog 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. earthcatalog/__init__.py +164 -0
  2. earthcatalog/async_http_client.py +1006 -0
  3. earthcatalog/config.py +97 -0
  4. earthcatalog/engines/__init__.py +308 -0
  5. earthcatalog/engines/rustac_engine.py +142 -0
  6. earthcatalog/engines/stac_geoparquet_engine.py +126 -0
  7. earthcatalog/exceptions.py +471 -0
  8. earthcatalog/grid_systems.py +1114 -0
  9. earthcatalog/ingestion_pipeline.py +2281 -0
  10. earthcatalog/input_readers.py +603 -0
  11. earthcatalog/job_tracking.py +485 -0
  12. earthcatalog/pipeline.py +606 -0
  13. earthcatalog/schema_generator.py +911 -0
  14. earthcatalog/spatial_resolver.py +1207 -0
  15. earthcatalog/stac_hooks.py +754 -0
  16. earthcatalog/statistics.py +677 -0
  17. earthcatalog/storage_backends.py +548 -0
  18. earthcatalog/tests/__init__.py +1 -0
  19. earthcatalog/tests/conftest.py +76 -0
  20. earthcatalog/tests/test_all_grids.py +793 -0
  21. earthcatalog/tests/test_async_http.py +700 -0
  22. earthcatalog/tests/test_cli_and_storage.py +230 -0
  23. earthcatalog/tests/test_config.py +245 -0
  24. earthcatalog/tests/test_dask_integration.py +580 -0
  25. earthcatalog/tests/test_e2e_synthetic.py +1624 -0
  26. earthcatalog/tests/test_engines.py +272 -0
  27. earthcatalog/tests/test_exceptions.py +346 -0
  28. earthcatalog/tests/test_file_structure.py +245 -0
  29. earthcatalog/tests/test_input_readers.py +666 -0
  30. earthcatalog/tests/test_integration.py +200 -0
  31. earthcatalog/tests/test_integration_async.py +283 -0
  32. earthcatalog/tests/test_job_tracking.py +603 -0
  33. earthcatalog/tests/test_multi_file_input.py +336 -0
  34. earthcatalog/tests/test_passthrough_hook.py +196 -0
  35. earthcatalog/tests/test_pipeline.py +684 -0
  36. earthcatalog/tests/test_pipeline_components.py +665 -0
  37. earthcatalog/tests/test_schema_generator.py +506 -0
  38. earthcatalog/tests/test_spatial_resolver.py +413 -0
  39. earthcatalog/tests/test_stac_hooks.py +776 -0
  40. earthcatalog/tests/test_statistics.py +477 -0
  41. earthcatalog/tests/test_storage_backends.py +236 -0
  42. earthcatalog/tests/test_validation.py +435 -0
  43. earthcatalog/tests/test_workers.py +653 -0
  44. earthcatalog/validation.py +921 -0
  45. earthcatalog/workers.py +682 -0
  46. earthcatalog-0.2.0.dist-info/METADATA +333 -0
  47. earthcatalog-0.2.0.dist-info/RECORD +50 -0
  48. earthcatalog-0.2.0.dist-info/WHEEL +5 -0
  49. earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
  50. earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,471 @@
1
+ """Custom exceptions for EarthCatalog.
2
+
3
+ This module defines a hierarchy of exceptions for better error handling
4
+ and debugging throughout the EarthCatalog pipeline.
5
+
6
+ Exception Hierarchy:
7
+ EarthCatalogError (base)
8
+ ├── ConfigurationError
9
+ │ ├── InvalidGridConfigError
10
+ │ └── InvalidStorageConfigError
11
+ ├── IngestionError
12
+ │ ├── DownloadError
13
+ │ ├── ValidationError
14
+ │ └── ConsolidationError
15
+ ├── StorageError
16
+ │ ├── StorageConnectionError
17
+ │ └── StorageWriteError
18
+ └── QueryError
19
+ └── SpatialResolverError
20
+
21
+ Usage:
22
+ All custom exceptions inherit from EarthCatalogError, allowing users
23
+ to catch all library-specific errors with a single except clause:
24
+
25
+ >>> try:
26
+ ... pipeline.run()
27
+ ... except EarthCatalogError as e:
28
+ ... logger.error(f"Pipeline failed: {e}")
29
+ ... print(e.details)
30
+
31
+ Or catch specific exceptions for fine-grained handling:
32
+
33
+ >>> try:
34
+ ... download_items(urls)
35
+ ... except DownloadError as e:
36
+ ... logger.warning(f"Failed to download {e.url}: {e.message}")
37
+ ... except ConsolidationError as e:
38
+ ... logger.error(f"Consolidation failed: {e.message}")
39
+ """
40
+
41
+ from typing import Any
42
+
43
+
44
+ class EarthCatalogError(Exception):
45
+ """Base exception for all EarthCatalog errors.
46
+
47
+ All custom exceptions in EarthCatalog inherit from this class,
48
+ allowing users to catch all library-specific errors with a single
49
+ except clause.
50
+
51
+ Attributes:
52
+ message: Human-readable error description.
53
+ details: Optional dict with additional context for debugging.
54
+
55
+ Example:
56
+ >>> try:
57
+ ... pipeline.run()
58
+ ... except EarthCatalogError as e:
59
+ ... logger.error(f"Pipeline failed: {e}")
60
+ ... if e.details:
61
+ ... logger.debug(f"Details: {e.details}")
62
+ """
63
+
64
+ def __init__(self, message: str, details: dict[str, Any] | None = None):
65
+ """Initialize the exception.
66
+
67
+ Args:
68
+ message: Human-readable error description.
69
+ details: Optional dict with additional context.
70
+ """
71
+ super().__init__(message)
72
+ self.message = message
73
+ self.details = details or {}
74
+
75
+ def __repr__(self) -> str:
76
+ """Return a detailed representation of the exception."""
77
+ return f"{self.__class__.__name__}({self.message!r})"
78
+
79
+ def __bool__(self) -> bool:
80
+ """Exceptions are always truthy."""
81
+ return True
82
+
83
+
84
+ # =============================================================================
85
+ # Configuration Errors
86
+ # =============================================================================
87
+
88
+
89
+ class ConfigurationError(EarthCatalogError):
90
+ """Error in pipeline configuration.
91
+
92
+ Raised when configuration parameters are invalid, missing, or incompatible.
93
+
94
+ Example:
95
+ >>> raise ConfigurationError(
96
+ ... "Invalid configuration",
97
+ ... details={"missing_field": "output_catalog"}
98
+ ... )
99
+ """
100
+
101
+ pass
102
+
103
+
104
+ class InvalidGridConfigError(ConfigurationError):
105
+ """Invalid grid system configuration.
106
+
107
+ Raised when grid system parameters are invalid or incompatible.
108
+
109
+ Attributes:
110
+ grid_system: The grid system that was configured.
111
+ resolution: The resolution/level that was specified.
112
+
113
+ Example:
114
+ >>> raise InvalidGridConfigError(
115
+ ... "H3 resolution must be 0-15",
116
+ ... grid_system="h3",
117
+ ... resolution=20
118
+ ... )
119
+ """
120
+
121
+ def __init__(
122
+ self,
123
+ message: str,
124
+ grid_system: str | None = None,
125
+ resolution: int | None = None,
126
+ **kwargs: Any,
127
+ ):
128
+ """Initialize the exception.
129
+
130
+ Args:
131
+ message: Human-readable error description.
132
+ grid_system: The grid system that was configured.
133
+ resolution: The resolution/level that was specified.
134
+ **kwargs: Additional context added to details.
135
+ """
136
+ details = {"grid_system": grid_system, "resolution": resolution, **kwargs}
137
+ super().__init__(message, details)
138
+ self.grid_system = grid_system
139
+ self.resolution = resolution
140
+
141
+
142
+ class InvalidStorageConfigError(ConfigurationError):
143
+ """Invalid storage backend configuration.
144
+
145
+ Raised when storage backend parameters are invalid or the backend
146
+ cannot be initialized.
147
+
148
+ Attributes:
149
+ backend: The storage backend type.
150
+ path: The storage path that was specified.
151
+
152
+ Example:
153
+ >>> raise InvalidStorageConfigError(
154
+ ... "S3 bucket does not exist",
155
+ ... backend="s3",
156
+ ... path="s3://nonexistent-bucket/catalog"
157
+ ... )
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ message: str,
163
+ backend: str | None = None,
164
+ path: str | None = None,
165
+ **kwargs: Any,
166
+ ):
167
+ """Initialize the exception.
168
+
169
+ Args:
170
+ message: Human-readable error description.
171
+ backend: The storage backend type.
172
+ path: The storage path that was specified.
173
+ **kwargs: Additional context added to details.
174
+ """
175
+ details = {"backend": backend, "path": path, **kwargs}
176
+ super().__init__(message, details)
177
+ self.backend = backend
178
+ self.path = path
179
+
180
+
181
+ # =============================================================================
182
+ # Ingestion Errors
183
+ # =============================================================================
184
+
185
+
186
+ class IngestionError(EarthCatalogError):
187
+ """Error during ingestion processing.
188
+
189
+ Base class for errors that occur during the ingestion pipeline,
190
+ including download, validation, and consolidation phases.
191
+ """
192
+
193
+ pass
194
+
195
+
196
+ class DownloadError(IngestionError):
197
+ """Error downloading STAC items.
198
+
199
+ Raised when one or more STAC items fail to download, with details
200
+ about the failure including HTTP status codes and retry counts.
201
+
202
+ Attributes:
203
+ url: The URL that failed to download.
204
+ status_code: HTTP status code if available.
205
+ retry_count: Number of retries attempted.
206
+ error_type: Categorized error type (e.g., 'timeout', 'not_found').
207
+
208
+ Example:
209
+ >>> raise DownloadError(
210
+ ... "Request timed out after 30s",
211
+ ... url="https://api.example.com/item.json",
212
+ ... status_code=None,
213
+ ... retry_count=3,
214
+ ... error_type="timeout"
215
+ ... )
216
+ """
217
+
218
+ def __init__(
219
+ self,
220
+ message: str,
221
+ url: str | None = None,
222
+ status_code: int | None = None,
223
+ retry_count: int = 0,
224
+ error_type: str | None = None,
225
+ **kwargs: Any,
226
+ ):
227
+ """Initialize the exception.
228
+
229
+ Args:
230
+ message: Human-readable error description.
231
+ url: The URL that failed.
232
+ status_code: HTTP status code if available.
233
+ retry_count: Number of retries attempted.
234
+ error_type: Categorized error type.
235
+ **kwargs: Additional context added to details.
236
+ """
237
+ details = {
238
+ "url": url,
239
+ "status_code": status_code,
240
+ "retry_count": retry_count,
241
+ "error_type": error_type,
242
+ **kwargs,
243
+ }
244
+ super().__init__(message, details)
245
+ self.url = url
246
+ self.status_code = status_code
247
+ self.retry_count = retry_count
248
+ self.error_type = error_type
249
+
250
+
251
+ class ItemValidationError(IngestionError):
252
+ """Error validating STAC item or geometry.
253
+
254
+ Raised when a STAC item fails validation, including geometry issues,
255
+ missing required fields, or schema violations.
256
+
257
+ Attributes:
258
+ item_id: The ID of the item that failed validation.
259
+ issue_code: Validation issue code (e.g., 'INVALID_GEOMETRY').
260
+
261
+ Example:
262
+ >>> raise ItemValidationError(
263
+ ... "Geometry is self-intersecting",
264
+ ... item_id="ITEM_123",
265
+ ... issue_code="INVALID_GEOMETRY"
266
+ ... )
267
+ """
268
+
269
+ def __init__(
270
+ self,
271
+ message: str,
272
+ item_id: str | None = None,
273
+ issue_code: str | None = None,
274
+ **kwargs: Any,
275
+ ):
276
+ """Initialize the exception.
277
+
278
+ Args:
279
+ message: Human-readable error description.
280
+ item_id: The ID of the item that failed validation.
281
+ issue_code: Validation issue code.
282
+ **kwargs: Additional context added to details.
283
+ """
284
+ details = {"item_id": item_id, "issue_code": issue_code, **kwargs}
285
+ super().__init__(message, details)
286
+ self.item_id = item_id
287
+ self.issue_code = issue_code
288
+
289
+
290
+ class ConsolidationError(IngestionError):
291
+ """Error during shard consolidation.
292
+
293
+ Raised when the consolidation phase fails, including issues with
294
+ reading shards, merging data, or writing final partitions.
295
+
296
+ Attributes:
297
+ partition_key: The partition being consolidated.
298
+ shard_count: Number of shards being consolidated.
299
+
300
+ Example:
301
+ >>> raise ConsolidationError(
302
+ ... "Failed to merge shards: memory limit exceeded",
303
+ ... partition_key="h3=abc123/year=2024/month=01",
304
+ ... shard_count=50
305
+ ... )
306
+ """
307
+
308
+ def __init__(
309
+ self,
310
+ message: str,
311
+ partition_key: str | None = None,
312
+ shard_count: int | None = None,
313
+ **kwargs: Any,
314
+ ):
315
+ """Initialize the exception.
316
+
317
+ Args:
318
+ message: Human-readable error description.
319
+ partition_key: The partition being consolidated.
320
+ shard_count: Number of shards being consolidated.
321
+ **kwargs: Additional context added to details.
322
+ """
323
+ details = {"partition_key": partition_key, "shard_count": shard_count, **kwargs}
324
+ super().__init__(message, details)
325
+ self.partition_key = partition_key
326
+ self.shard_count = shard_count
327
+
328
+
329
+ # =============================================================================
330
+ # Storage Errors
331
+ # =============================================================================
332
+
333
+
334
+ class StorageError(EarthCatalogError):
335
+ """Error with storage backend operations.
336
+
337
+ Base class for errors related to storage backend operations,
338
+ including connection failures, read/write errors, and permission issues.
339
+
340
+ Attributes:
341
+ path: The storage path involved in the error.
342
+ """
343
+
344
+ def __init__(self, message: str, path: str | None = None, **kwargs: Any):
345
+ """Initialize the exception.
346
+
347
+ Args:
348
+ message: Human-readable error description.
349
+ path: The storage path involved in the error.
350
+ **kwargs: Additional context added to details.
351
+ """
352
+ details = {"path": path, **kwargs}
353
+ super().__init__(message, details)
354
+ self.path = path
355
+
356
+
357
+ class StorageConnectionError(StorageError):
358
+ """Cannot connect to storage backend.
359
+
360
+ Raised when the storage backend is unreachable or authentication fails.
361
+
362
+ Example:
363
+ >>> raise StorageConnectionError(
364
+ ... "Cannot connect to S3: access denied",
365
+ ... path="s3://bucket/catalog"
366
+ ... )
367
+ """
368
+
369
+ pass
370
+
371
+
372
+ class StorageWriteError(StorageError):
373
+ """Error writing to storage.
374
+
375
+ Raised when a write operation fails, including permission issues,
376
+ disk space, or network errors during upload.
377
+
378
+ Attributes:
379
+ bytes_written: Number of bytes successfully written before failure.
380
+
381
+ Example:
382
+ >>> raise StorageWriteError(
383
+ ... "Write failed: disk full",
384
+ ... path="/catalog/partition.parquet",
385
+ ... bytes_written=1024000
386
+ ... )
387
+ """
388
+
389
+ def __init__(
390
+ self,
391
+ message: str,
392
+ path: str | None = None,
393
+ bytes_written: int | None = None,
394
+ **kwargs: Any,
395
+ ):
396
+ """Initialize the exception.
397
+
398
+ Args:
399
+ message: Human-readable error description.
400
+ path: The storage path involved in the error.
401
+ bytes_written: Number of bytes successfully written.
402
+ **kwargs: Additional context added to details.
403
+ """
404
+ super().__init__(message, path, **kwargs)
405
+ self.bytes_written = bytes_written
406
+ self.details["bytes_written"] = bytes_written
407
+
408
+
409
+ class StorageReadError(StorageError):
410
+ """Error reading from storage.
411
+
412
+ Raised when a read operation fails, including missing files,
413
+ corrupted data, or network errors during download.
414
+
415
+ Example:
416
+ >>> raise StorageReadError(
417
+ ... "File not found",
418
+ ... path="s3://bucket/missing.parquet"
419
+ ... )
420
+ """
421
+
422
+ pass
423
+
424
+
425
+ # =============================================================================
426
+ # Query Errors
427
+ # =============================================================================
428
+
429
+
430
+ class QueryError(EarthCatalogError):
431
+ """Error during catalog query.
432
+
433
+ Base class for errors related to querying catalogs, including
434
+ spatial resolution and partition lookup failures.
435
+ """
436
+
437
+ pass
438
+
439
+
440
+ class SpatialResolverError(QueryError):
441
+ """Error in spatial partition resolution.
442
+
443
+ Raised when the spatial resolver fails to resolve partitions,
444
+ including invalid geometries or missing schema information.
445
+
446
+ Attributes:
447
+ geometry_type: Type of geometry that caused the error.
448
+
449
+ Example:
450
+ >>> raise SpatialResolverError(
451
+ ... "Cannot resolve partitions: invalid geometry",
452
+ ... geometry_type="Polygon"
453
+ ... )
454
+ """
455
+
456
+ def __init__(
457
+ self,
458
+ message: str,
459
+ geometry_type: str | None = None,
460
+ **kwargs: Any,
461
+ ):
462
+ """Initialize the exception.
463
+
464
+ Args:
465
+ message: Human-readable error description.
466
+ geometry_type: Type of geometry that caused the error.
467
+ **kwargs: Additional context added to details.
468
+ """
469
+ details = {"geometry_type": geometry_type, **kwargs}
470
+ super().__init__(message, details)
471
+ self.geometry_type = geometry_type