earthcatalog 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. earthcatalog/__init__.py +164 -0
  2. earthcatalog/async_http_client.py +1006 -0
  3. earthcatalog/config.py +97 -0
  4. earthcatalog/engines/__init__.py +308 -0
  5. earthcatalog/engines/rustac_engine.py +142 -0
  6. earthcatalog/engines/stac_geoparquet_engine.py +126 -0
  7. earthcatalog/exceptions.py +471 -0
  8. earthcatalog/grid_systems.py +1114 -0
  9. earthcatalog/ingestion_pipeline.py +2281 -0
  10. earthcatalog/input_readers.py +603 -0
  11. earthcatalog/job_tracking.py +485 -0
  12. earthcatalog/pipeline.py +606 -0
  13. earthcatalog/schema_generator.py +911 -0
  14. earthcatalog/spatial_resolver.py +1207 -0
  15. earthcatalog/stac_hooks.py +754 -0
  16. earthcatalog/statistics.py +677 -0
  17. earthcatalog/storage_backends.py +548 -0
  18. earthcatalog/tests/__init__.py +1 -0
  19. earthcatalog/tests/conftest.py +76 -0
  20. earthcatalog/tests/test_all_grids.py +793 -0
  21. earthcatalog/tests/test_async_http.py +700 -0
  22. earthcatalog/tests/test_cli_and_storage.py +230 -0
  23. earthcatalog/tests/test_config.py +245 -0
  24. earthcatalog/tests/test_dask_integration.py +580 -0
  25. earthcatalog/tests/test_e2e_synthetic.py +1624 -0
  26. earthcatalog/tests/test_engines.py +272 -0
  27. earthcatalog/tests/test_exceptions.py +346 -0
  28. earthcatalog/tests/test_file_structure.py +245 -0
  29. earthcatalog/tests/test_input_readers.py +666 -0
  30. earthcatalog/tests/test_integration.py +200 -0
  31. earthcatalog/tests/test_integration_async.py +283 -0
  32. earthcatalog/tests/test_job_tracking.py +603 -0
  33. earthcatalog/tests/test_multi_file_input.py +336 -0
  34. earthcatalog/tests/test_passthrough_hook.py +196 -0
  35. earthcatalog/tests/test_pipeline.py +684 -0
  36. earthcatalog/tests/test_pipeline_components.py +665 -0
  37. earthcatalog/tests/test_schema_generator.py +506 -0
  38. earthcatalog/tests/test_spatial_resolver.py +413 -0
  39. earthcatalog/tests/test_stac_hooks.py +776 -0
  40. earthcatalog/tests/test_statistics.py +477 -0
  41. earthcatalog/tests/test_storage_backends.py +236 -0
  42. earthcatalog/tests/test_validation.py +435 -0
  43. earthcatalog/tests/test_workers.py +653 -0
  44. earthcatalog/validation.py +921 -0
  45. earthcatalog/workers.py +682 -0
  46. earthcatalog-0.2.0.dist-info/METADATA +333 -0
  47. earthcatalog-0.2.0.dist-info/RECORD +50 -0
  48. earthcatalog-0.2.0.dist-info/WHEEL +5 -0
  49. earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
  50. earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,700 @@
1
+ """
2
+ Tests for async HTTP client functionality in EarthCatalog.
3
+
4
+ These tests validate the concurrent HTTP processing capabilities including
5
+ performance improvements, error handling, and integration with the existing
6
+ pipeline architecture.
7
+ """
8
+
9
+ # mypy: ignore-errors
10
+
11
+ import asyncio
12
+ import time
13
+ from unittest.mock import patch
14
+
15
+ import pytest
16
+
17
+ # Import async HTTP testing utilities
18
+ try:
19
+ import aiohttp
20
+ from aioresponses import aioresponses
21
+
22
+ HAS_ASYNC_TEST_SUPPORT = True
23
+ except ImportError:
24
+ HAS_ASYNC_TEST_SUPPORT = False
25
+ aioresponses = None # type: ignore
26
+ aiohttp = None # type: ignore
27
+
28
+ # Import grid systems for validation tests
29
+ from earthcatalog.grid_systems import (
30
+ H3GridSystem,
31
+ MGRSGridSystem,
32
+ S2GridSystem,
33
+ SimpleLatLonGrid,
34
+ )
35
+
36
+ # Import EarthCatalog modules
37
+ from earthcatalog.ingestion_pipeline import ProcessingConfig, STACIngestionPipeline
38
+
39
+ # Grid systems are now managed by the pipeline internally
40
+
41
+ try:
42
+ from earthcatalog.async_http_client import (
43
+ HAS_ASYNC_HTTP,
44
+ AsyncHTTPClient,
45
+ BatchDownloader,
46
+ ErrorType,
47
+ RequestResult,
48
+ download_stac_items_async,
49
+ )
50
+ except ImportError:
51
+ HAS_ASYNC_HTTP = False
52
+ # Type stubs for mypy
53
+ AsyncHTTPClient = None # type: ignore
54
+ BatchDownloader = None # type: ignore
55
+ RequestResult = None # type: ignore
56
+ ErrorType = None # type: ignore
57
+ download_stac_items_async = None # type: ignore
58
+
59
+
60
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
61
+ class TestAsyncHTTPClient:
62
+ """Test suite for AsyncHTTPClient functionality."""
63
+
64
+ def setup_method(self):
65
+ """Set up test fixtures."""
66
+ self.test_config = {
67
+ "concurrent_requests": 10,
68
+ "connection_pool_size": 20,
69
+ "request_timeout": 5,
70
+ "retry_attempts": 2,
71
+ "retry_delay": 0.1,
72
+ }
73
+
74
+ # Sample STAC item for responses
75
+ self.sample_stac_item = {
76
+ "id": "test_item",
77
+ "type": "Feature",
78
+ "geometry": {"type": "Point", "coordinates": [-122.4, 37.8]},
79
+ "properties": {"datetime": "2024-01-01T00:00:00Z", "collection": "test_collection"},
80
+ }
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_async_http_client_initialization(self):
84
+ """Test AsyncHTTPClient can be initialized with proper configuration."""
85
+ async with AsyncHTTPClient(**self.test_config) as client:
86
+ assert client.concurrent_requests == 10
87
+ assert client.connection_pool_size == 20
88
+ assert client.request_timeout == 5
89
+ assert client.retry_attempts == 2
90
+ assert client.retry_delay == 0.1
91
+ assert client.session is not None
92
+
93
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
94
+ @pytest.mark.asyncio
95
+ async def test_single_successful_request(self):
96
+ """Test successful download of a single STAC item."""
97
+ url = "https://example.com/item.json"
98
+
99
+ with aioresponses() as mock_responses:
100
+ mock_responses.get(url, payload=self.sample_stac_item)
101
+
102
+ async with AsyncHTTPClient(**self.test_config) as client:
103
+ result = await client._fetch_single_url(url)
104
+
105
+ assert result.success is True
106
+ assert result.data == self.sample_stac_item
107
+ assert result.error is None
108
+ assert result.attempts == 1
109
+ assert result.response_time > 0
110
+
111
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
112
+ @pytest.mark.asyncio
113
+ async def test_batch_download(self):
114
+ """Test concurrent download of multiple STAC items."""
115
+ urls = [f"https://example.com/item_{i}.json" for i in range(100)]
116
+
117
+ with aioresponses() as mock_responses:
118
+ # Mock all URLs with successful responses
119
+ for i, url in enumerate(urls):
120
+ item = self.sample_stac_item.copy()
121
+ item["id"] = f"item_{i}"
122
+ mock_responses.get(url, payload=item)
123
+
124
+ async with AsyncHTTPClient(**self.test_config) as client:
125
+ start_time = time.time()
126
+ results = await client.download_batch(urls)
127
+ duration = time.time() - start_time
128
+
129
+ # Validate results
130
+ assert len(results) == 100
131
+ successful = [r for r in results if r.success]
132
+ assert len(successful) == 100
133
+
134
+ # Should complete much faster than sequential (under 2 seconds)
135
+ assert duration < 2.0
136
+
137
+ # Validate data integrity
138
+ for i, result in enumerate(successful):
139
+ assert result.data is not None
140
+ assert result.data["id"] == f"item_{i}"
141
+
142
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
143
+ @pytest.mark.asyncio
144
+ async def test_error_handling_and_retries(self):
145
+ """Test error handling with different HTTP error conditions using mocked responses."""
146
+ # Use mocked responses for fast, reliable testing
147
+ urls = [
148
+ "https://example.com/server_error.json", # Will mock 500 error
149
+ "https://example.com/not_found.json", # Will mock 404 error
150
+ "https://example.com/rate_limit.json", # Will mock 429 error
151
+ "https://example.com/success.json", # Will mock success
152
+ "https://example.com/timeout.json", # Will mock timeout
153
+ ]
154
+
155
+ # Fast test configuration
156
+ test_config = {
157
+ "concurrent_requests": 5,
158
+ "connection_pool_size": 10,
159
+ "request_timeout": 1, # Short timeout for fast tests
160
+ "retry_attempts": 2, # Limited retries for speed
161
+ "retry_delay": 0.01, # Minimal delay for speed
162
+ }
163
+
164
+ with aioresponses() as mock_responses:
165
+ # Mock different error conditions
166
+ mock_responses.get(urls[0], status=500) # Server error
167
+ mock_responses.get(urls[1], status=404) # Not found
168
+ mock_responses.get(urls[2], status=429) # Rate limit
169
+ mock_responses.get(urls[3], payload={"id": "success", "type": "Feature"}) # Success
170
+ # urls[4] not mocked - will timeout quickly
171
+
172
+ async with AsyncHTTPClient(**test_config) as client:
173
+ results = await client.download_batch(urls)
174
+
175
+ # Validate we got results for all URLs
176
+ assert len(results) == len(urls)
177
+
178
+ # Analyze results
179
+ success_count = sum(1 for r in results if r.success)
180
+ failure_count = sum(1 for r in results if not r.success)
181
+
182
+ # Should have exactly one success (the mocked success)
183
+ assert success_count >= 1, "Should have at least one successful request"
184
+ assert failure_count >= 1, "Should have failed requests"
185
+
186
+ # Check that different error types are detected
187
+ error_types = {r.error_type for r in results if not r.success and r.error_type}
188
+ assert len(error_types) >= 1, "Should detect different error types"
189
+
190
+ # Validate retry behavior - failed requests should have retry attempts
191
+ retry_counts = [r.attempts for r in results if not r.success]
192
+ if retry_counts: # Only check if there are failed requests
193
+ max_retries = max(retry_counts)
194
+ assert max_retries >= 1, "Should have retry attempts for failed requests"
195
+
196
+
197
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
198
+ class TestBatchDownloader:
199
+ """Test suite for BatchDownloader functionality."""
200
+
201
+ def setup_method(self):
202
+ """Set up test fixtures."""
203
+ self.downloader = BatchDownloader(batch_size=50, concurrent_requests=10, request_timeout=5, retry_attempts=2)
204
+
205
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
206
+ @pytest.mark.asyncio
207
+ async def test_batch_processing_with_memory_management(self):
208
+ """Test batch processing handles memory efficiently."""
209
+ # Create 1000 URLs to test batch processing
210
+ urls = [f"https://example.com/item_{i}.json" for i in range(1000)]
211
+
212
+ with aioresponses() as mock_responses:
213
+ # Mock all URLs with successful responses
214
+ for i, url in enumerate(urls):
215
+ item = {
216
+ "id": f"item_{i}",
217
+ "type": "Feature",
218
+ "geometry": {"type": "Point", "coordinates": [0, 0]},
219
+ "properties": {"datetime": "2024-01-01T00:00:00Z"},
220
+ }
221
+ mock_responses.get(url, payload=item)
222
+
223
+ # Download all items
224
+ items = await self.downloader.download_all(urls)
225
+
226
+ # Validate results
227
+ assert len(items) == 1000
228
+ for i, item in enumerate(items):
229
+ assert item["id"] == f"item_{i}"
230
+
231
+
232
+ class TestAsyncHTTPIntegration:
233
+ """Integration tests for async HTTP with EarthCatalog pipeline."""
234
+
235
+ def setup_method(self):
236
+ """Set up test fixtures."""
237
+ import tempfile
238
+
239
+ import pandas as pd
240
+
241
+ # Create temporary input file
242
+ self.temp_input_file = tempfile.NamedTemporaryFile(suffix=".parquet", delete=False)
243
+ df = pd.DataFrame({"url": ["https://example.com/test.json"]})
244
+ df.to_parquet(self.temp_input_file.name)
245
+
246
+ self.config = ProcessingConfig(
247
+ input_file=self.temp_input_file.name,
248
+ output_catalog="/tmp/test_catalog",
249
+ scratch_location="/tmp/test_scratch",
250
+ grid_system="h3",
251
+ grid_resolution=2,
252
+ temporal_bin="month",
253
+ # Enable async HTTP
254
+ enable_concurrent_http=True,
255
+ concurrent_requests=5,
256
+ batch_size=10,
257
+ )
258
+
259
+ # Create a local processor for testing
260
+ from earthcatalog.ingestion_pipeline import LocalProcessor
261
+
262
+ self.processor = LocalProcessor()
263
+ self.pipeline = STACIngestionPipeline(self.config, self.processor)
264
+
265
+ def teardown_method(self):
266
+ """Clean up test fixtures."""
267
+ import os
268
+
269
+ if hasattr(self, "temp_input_file") and os.path.exists(self.temp_input_file.name):
270
+ os.unlink(self.temp_input_file.name)
271
+ if hasattr(self, "processor"):
272
+ self.processor.close()
273
+
274
+ def test_configuration_validation(self):
275
+ """Test async HTTP configuration validation."""
276
+ # Valid configuration should pass
277
+ self.config.validate()
278
+
279
+ # Invalid concurrent_requests should fail
280
+ invalid_config = ProcessingConfig(
281
+ input_file=self.temp_input_file.name, # Use existing temp file
282
+ output_catalog="/tmp/test_catalog",
283
+ scratch_location="/tmp/test_scratch",
284
+ concurrent_requests=0,
285
+ )
286
+
287
+ with pytest.raises(ValueError, match="concurrent_requests must be positive"):
288
+ invalid_config.validate()
289
+
290
+ def test_async_fallback_behavior(self):
291
+ """Test graceful fallback when async HTTP is not available."""
292
+ # Create config with async enabled
293
+ config = self.config
294
+ config.enable_concurrent_http = True
295
+
296
+ # Mock HAS_ASYNC_HTTP to False to test fallback
297
+ with patch("earthcatalog.ingestion_pipeline.HAS_ASYNC_HTTP", False):
298
+ # Should still create pipeline without errors
299
+ pipeline = STACIngestionPipeline(config, self.processor)
300
+
301
+ # Should be able to process small batch (will use sync processing)
302
+ urls = ["https://example.com/item1.json", "https://example.com/item2.json"]
303
+
304
+ with patch.object(pipeline, "_download_stac_item") as mock_download:
305
+ mock_download.return_value = {
306
+ "id": "test_item",
307
+ "type": "Feature",
308
+ "geometry": {"type": "Point", "coordinates": [0, 0]},
309
+ "properties": {"datetime": "2024-01-01T00:00:00Z"},
310
+ }
311
+
312
+ # This should use sync processing
313
+ items = pipeline._download_stac_items_batch_async(urls, 1)
314
+ assert len(items) == 2
315
+ assert mock_download.call_count == 2
316
+
317
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
318
+ def test_async_processing_configuration(self):
319
+ """Test that pipeline correctly configures async vs sync processing."""
320
+ # Test that async is enabled in the configuration
321
+ assert self.pipeline.config.enable_concurrent_http is True
322
+ assert self.pipeline.config.concurrent_requests == 5 # As configured in setup_method
323
+ assert self.pipeline.config.batch_size == 10 # As configured in setup_method
324
+
325
+ # Test that HAS_ASYNC_HTTP is properly detected
326
+ from earthcatalog.async_http_client import HAS_ASYNC_HTTP
327
+
328
+ if HAS_ASYNC_HTTP:
329
+ # Test async configuration is valid
330
+ assert self.pipeline.config.concurrent_requests > 0
331
+ assert self.pipeline.config.request_timeout > 0
332
+ assert self.pipeline.config.retry_attempts > 0
333
+ else:
334
+ # If async not available, should still work
335
+ assert self.pipeline.config.enable_concurrent_http is True # Config can be True even if not available
336
+
337
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
338
+ @pytest.mark.asyncio
339
+ async def test_performance_improvement_validation(self):
340
+ """Test that async processing provides performance improvement."""
341
+ # Create test URLs
342
+ test_urls = [f"https://example.com/item_{i}.json" for i in range(100)]
343
+
344
+ sample_item = {
345
+ "id": "test_item",
346
+ "type": "Feature",
347
+ "geometry": {"type": "Point", "coordinates": [0, 0]},
348
+ "properties": {"datetime": "2024-01-01T00:00:00Z"},
349
+ }
350
+
351
+ with aioresponses() as mock_responses:
352
+ # Mock all URLs with small delay to simulate network
353
+ for url in test_urls:
354
+ mock_responses.get(url, payload=sample_item)
355
+
356
+ # Test async performance
357
+ start_time = time.time()
358
+ async_items = await download_stac_items_async(urls=test_urls, concurrent_requests=20, batch_size=50)
359
+ async_duration = time.time() - start_time
360
+
361
+ # Validate async results
362
+ assert len(async_items) == 100
363
+ assert async_duration < 5.0 # Should complete quickly
364
+
365
+ def test_error_logging_compatibility(self):
366
+ """Test that async errors are logged in the same format as sync errors."""
367
+ with patch("earthcatalog.ingestion_pipeline.logger"):
368
+ # Create a temporary config with fast timeout for failed requests
369
+ import tempfile
370
+
371
+ import pandas as pd
372
+
373
+ # Create temp file for fast test
374
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp_file:
375
+ df = pd.DataFrame({"url": ["https://invalid-url-fast-fail.example.com/item.json"]})
376
+ df.to_parquet(tmp_file.name)
377
+
378
+ # Create config with very short timeouts to fail fast
379
+ fast_config = ProcessingConfig(
380
+ input_file=tmp_file.name,
381
+ output_catalog="/tmp/test_catalog",
382
+ scratch_location="/tmp/test_scratch",
383
+ enable_concurrent_http=True,
384
+ concurrent_requests=1,
385
+ request_timeout=1, # Very short timeout for fast failure
386
+ retry_attempts=1, # Minimal retries for speed
387
+ retry_delay=0.01, # Minimal delay
388
+ )
389
+
390
+ from earthcatalog.ingestion_pipeline import LocalProcessor, STACIngestionPipeline
391
+
392
+ processor = LocalProcessor()
393
+ pipeline = STACIngestionPipeline(fast_config, processor)
394
+
395
+ # Test with URLs that will fail quickly
396
+ failed_urls = ["https://invalid-url-fast-fail.example.com/item.json"]
397
+
398
+ # This should fail quickly and generate logs
399
+ items = pipeline._download_stac_items_batch_async(failed_urls, 1)
400
+
401
+ # Verify error handling occurred (items should be empty or contain errors)
402
+ assert isinstance(items, list) # Should return a list even on failure
403
+
404
+ # Clean up
405
+ import os
406
+
407
+ os.unlink(tmp_file.name)
408
+ processor.close()
409
+
410
+
411
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
412
+ class TestAsyncHTTPClientEdgeCases:
413
+ """Test edge cases and error conditions for async HTTP client."""
414
+
415
+ @pytest.mark.asyncio
416
+ async def test_client_without_context_manager(self):
417
+ """Test that client properly fails when used without context manager."""
418
+ client = AsyncHTTPClient()
419
+
420
+ with pytest.raises(RuntimeError, match="not initialized"):
421
+ await client._fetch_single_url("https://example.com/test.json")
422
+
423
+ @pytest.mark.asyncio
424
+ async def test_empty_url_list(self):
425
+ """Test handling of empty URL list."""
426
+ async with AsyncHTTPClient() as client:
427
+ results = await client.download_batch([])
428
+ assert results == []
429
+
430
+ @pytest.mark.asyncio
431
+ async def test_malformed_urls(self):
432
+ """Test handling of malformed URLs."""
433
+ malformed_urls = ["not-a-url", "ftp://invalid-protocol.com", ""]
434
+
435
+ async with AsyncHTTPClient(retry_attempts=1) as client:
436
+ results = await client.download_batch(malformed_urls)
437
+
438
+ # All should fail with connection errors
439
+ assert len(results) == 3
440
+ for result in results:
441
+ assert result.success is False
442
+ assert result.error_type in [ErrorType.CONNECTION, ErrorType.PARSE_ERROR]
443
+
444
+
445
+ def test_import_compatibility():
446
+ """Test that module imports work correctly even without async dependencies."""
447
+ # This test should always pass, testing import structure
448
+ from earthcatalog.ingestion_pipeline import ProcessingConfig
449
+
450
+ config = ProcessingConfig(
451
+ input_file="/tmp/test.parquet", output_catalog="/tmp/test_catalog", scratch_location="/tmp/test_scratch"
452
+ )
453
+
454
+ # Should be able to create config even if async HTTP not available
455
+ assert config.enable_concurrent_http is True # Default should be True
456
+ assert config.concurrent_requests == 50 # Default value
457
+ assert config.batch_size == 1000 # Default value
458
+
459
+
460
+ def test_backward_compatibility():
461
+ """Test that all existing functionality works unchanged."""
462
+ import os
463
+ import tempfile
464
+
465
+ import pandas as pd
466
+
467
+ # Create a temporary parquet file
468
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp_file:
469
+ df = pd.DataFrame({"url": ["https://example.com/test.json"]})
470
+ df.to_parquet(tmp_file.name)
471
+
472
+ config = ProcessingConfig(
473
+ input_file=tmp_file.name,
474
+ output_catalog="/tmp/test_catalog",
475
+ scratch_location="/tmp/test_scratch",
476
+ enable_concurrent_http=False, # Explicitly disable async
477
+ )
478
+
479
+ from earthcatalog.ingestion_pipeline import LocalProcessor
480
+
481
+ processor = LocalProcessor()
482
+ pipeline = STACIngestionPipeline(config, processor)
483
+
484
+ # Should be able to create pipeline and call existing methods
485
+ assert hasattr(pipeline, "_download_stac_item")
486
+ assert hasattr(pipeline, "_compute_partition_key")
487
+
488
+ # Configuration validation should work
489
+ config.validate()
490
+
491
+ # Clean up
492
+ os.unlink(tmp_file.name)
493
+
494
+
495
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
496
+ class TestSessionLockAndCleanup:
497
+ """Test session lock and cleanup improvements (c1, c2)."""
498
+
499
+ @pytest.mark.asyncio
500
+ async def test_session_lock_prevents_race_condition(self):
501
+ """Test that session lock prevents race conditions during concurrent initialization."""
502
+ client = AsyncHTTPClient(concurrent_requests=10)
503
+
504
+ # Create multiple concurrent tasks that all try to enter the context
505
+ async def enter_context():
506
+ async with client:
507
+ # Session should be properly initialized
508
+ assert client.session is not None
509
+ return True
510
+
511
+ # Run multiple concurrent context manager entries
512
+ # This would cause issues without the session lock
513
+ tasks = [enter_context() for _ in range(5)]
514
+ results = await asyncio.gather(*tasks, return_exceptions=True)
515
+
516
+ # All tasks should complete successfully
517
+ for result in results:
518
+ assert result is True or isinstance(result, Exception)
519
+
520
+ @pytest.mark.asyncio
521
+ async def test_session_cleanup_on_normal_exit(self):
522
+ """Test session is properly cleaned up on normal exit."""
523
+ client = AsyncHTTPClient()
524
+
525
+ async with client:
526
+ assert client.session is not None
527
+
528
+ # After exit, session should be None
529
+ assert client.session is None
530
+
531
+ @pytest.mark.asyncio
532
+ async def test_session_cleanup_on_exception(self):
533
+ """Test session is properly cleaned up even when exception occurs."""
534
+ client = AsyncHTTPClient()
535
+
536
+ class TestException(Exception):
537
+ pass
538
+
539
+ try:
540
+ async with client:
541
+ assert client.session is not None
542
+ raise TestException("Test exception")
543
+ except TestException:
544
+ pass
545
+
546
+ # Session should still be cleaned up
547
+ assert client.session is None
548
+
549
+ @pytest.mark.asyncio
550
+ async def test_concurrent_session_access_with_lock(self):
551
+ """Test that multiple coroutines can safely share a session."""
552
+
553
+ async with AsyncHTTPClient(concurrent_requests=5) as client:
554
+ # Verify session is created
555
+ assert client.session is not None
556
+
557
+ # Multiple coroutines accessing session should work safely
558
+ async def check_session():
559
+ return client.session is not None
560
+
561
+ results = await asyncio.gather(*[check_session() for _ in range(10)])
562
+ assert all(results)
563
+
564
+
565
+ @pytest.mark.skipif(not HAS_ASYNC_HTTP, reason="Async HTTP client not available")
566
+ class TestDownloadResultAndFailureTracking:
567
+ """Test DownloadResult dataclass and failure tracking (c3)."""
568
+
569
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
570
+ @pytest.mark.asyncio
571
+ async def test_download_result_structure(self):
572
+ """Test DownloadResult dataclass has correct structure."""
573
+ from earthcatalog.async_http_client import DownloadResult
574
+
575
+ # Create a mock result
576
+ result = DownloadResult(
577
+ items=[{"id": "item1", "type": "Feature"}],
578
+ failed_urls=[{"url": "http://fail.com", "error": "404", "error_type": "http_error"}],
579
+ metrics={"total_requests": 2, "success_rate_percent": 50.0},
580
+ )
581
+
582
+ assert len(result.items) == 1
583
+ assert len(result.failed_urls) == 1
584
+ assert result.metrics["success_rate_percent"] == 50.0
585
+
586
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
587
+ @pytest.mark.asyncio
588
+ async def test_download_with_failures_tracking(self):
589
+ """Test that download_all_with_failures properly tracks failures."""
590
+ from earthcatalog.async_http_client import BatchDownloader
591
+
592
+ urls = [
593
+ "https://example.com/success.json",
594
+ "https://example.com/fail_404.json",
595
+ "https://example.com/success2.json",
596
+ ]
597
+
598
+ with aioresponses() as mock_responses:
599
+ # Mock responses
600
+ mock_responses.get(urls[0], payload={"id": "item1", "type": "Feature"})
601
+ mock_responses.get(urls[1], status=404)
602
+ mock_responses.get(urls[2], payload={"id": "item2", "type": "Feature"})
603
+
604
+ downloader = BatchDownloader(batch_size=10, concurrent_requests=5, request_timeout=5, retry_attempts=1)
605
+
606
+ result = await downloader.download_all_with_failures(urls)
607
+
608
+ # Should have 2 successful items
609
+ assert len(result.items) == 2
610
+ assert result.items[0]["id"] == "item1"
611
+ assert result.items[1]["id"] == "item2"
612
+
613
+ # Should have 1 failed URL with details
614
+ assert len(result.failed_urls) == 1
615
+ assert result.failed_urls[0]["url"] == urls[1]
616
+ assert "error" in result.failed_urls[0]
617
+ assert "error_type" in result.failed_urls[0]
618
+
619
+ # Metrics should be populated
620
+ assert "total_requests" in result.metrics
621
+ assert "success_rate_percent" in result.metrics
622
+
623
+ @pytest.mark.skipif(not HAS_ASYNC_TEST_SUPPORT, reason="aioresponses not available")
624
+ @pytest.mark.asyncio
625
+ async def test_download_stac_items_async_with_failures_function(self):
626
+ """Test the convenience function download_stac_items_async_with_failures."""
627
+ from earthcatalog.async_http_client import download_stac_items_async_with_failures
628
+
629
+ urls = ["https://example.com/item1.json", "https://example.com/item2.json"]
630
+
631
+ with aioresponses() as mock_responses:
632
+ mock_responses.get(urls[0], payload={"id": "item1", "type": "Feature"})
633
+ mock_responses.get(urls[1], status=500) # Server error
634
+
635
+ result = await download_stac_items_async_with_failures(
636
+ urls=urls, concurrent_requests=5, batch_size=10, retry_attempts=1
637
+ )
638
+
639
+ assert isinstance(result.items, list)
640
+ assert isinstance(result.failed_urls, list)
641
+ assert isinstance(result.metrics, dict)
642
+
643
+
644
+ class TestGridParameterValidation:
645
+ """Test grid parameter validation (h1)."""
646
+
647
+ def test_h3_resolution_validation(self):
648
+ """Test H3 resolution validation (0-15)."""
649
+ # Valid resolutions
650
+ H3GridSystem(resolution=0)
651
+ H3GridSystem(resolution=15)
652
+ H3GridSystem(resolution=6)
653
+
654
+ # Invalid resolutions
655
+ with pytest.raises(ValueError, match="H3 resolution must be 0-15"):
656
+ H3GridSystem(resolution=-1)
657
+ with pytest.raises(ValueError, match="H3 resolution must be 0-15"):
658
+ H3GridSystem(resolution=16)
659
+
660
+ def test_s2_resolution_validation(self):
661
+ """Test S2 resolution validation (0-30)."""
662
+ # Valid resolutions
663
+ S2GridSystem(resolution=0)
664
+ S2GridSystem(resolution=30)
665
+ S2GridSystem(resolution=13)
666
+
667
+ # Invalid resolutions
668
+ with pytest.raises(ValueError, match="S2 resolution must be 0-30"):
669
+ S2GridSystem(resolution=-1)
670
+ with pytest.raises(ValueError, match="S2 resolution must be 0-30"):
671
+ S2GridSystem(resolution=31)
672
+
673
+ def test_mgrs_resolution_validation(self):
674
+ """Test MGRS resolution validation (1-5)."""
675
+ # Valid resolutions
676
+ MGRSGridSystem(resolution=1)
677
+ MGRSGridSystem(resolution=5)
678
+ MGRSGridSystem(resolution=3)
679
+
680
+ # Invalid resolutions
681
+ with pytest.raises(ValueError, match="MGRS resolution must be 1-5"):
682
+ MGRSGridSystem(resolution=0)
683
+ with pytest.raises(ValueError, match="MGRS resolution must be 1-5"):
684
+ MGRSGridSystem(resolution=6)
685
+
686
+ def test_latlon_resolution_validation(self):
687
+ """Test LatLon resolution validation (must be positive)."""
688
+ # Valid resolutions
689
+ SimpleLatLonGrid(resolution=1)
690
+ SimpleLatLonGrid(resolution=10)
691
+
692
+ # Invalid resolutions
693
+ with pytest.raises(ValueError, match="LatLon resolution must be positive"):
694
+ SimpleLatLonGrid(resolution=0)
695
+ with pytest.raises(ValueError, match="LatLon resolution must be positive"):
696
+ SimpleLatLonGrid(resolution=-1)
697
+
698
+
699
+ if __name__ == "__main__":
700
+ pytest.main([__file__, "-v"])