earthcatalog 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- earthcatalog/__init__.py +164 -0
- earthcatalog/async_http_client.py +1006 -0
- earthcatalog/config.py +97 -0
- earthcatalog/engines/__init__.py +308 -0
- earthcatalog/engines/rustac_engine.py +142 -0
- earthcatalog/engines/stac_geoparquet_engine.py +126 -0
- earthcatalog/exceptions.py +471 -0
- earthcatalog/grid_systems.py +1114 -0
- earthcatalog/ingestion_pipeline.py +2281 -0
- earthcatalog/input_readers.py +603 -0
- earthcatalog/job_tracking.py +485 -0
- earthcatalog/pipeline.py +606 -0
- earthcatalog/schema_generator.py +911 -0
- earthcatalog/spatial_resolver.py +1207 -0
- earthcatalog/stac_hooks.py +754 -0
- earthcatalog/statistics.py +677 -0
- earthcatalog/storage_backends.py +548 -0
- earthcatalog/tests/__init__.py +1 -0
- earthcatalog/tests/conftest.py +76 -0
- earthcatalog/tests/test_all_grids.py +793 -0
- earthcatalog/tests/test_async_http.py +700 -0
- earthcatalog/tests/test_cli_and_storage.py +230 -0
- earthcatalog/tests/test_config.py +245 -0
- earthcatalog/tests/test_dask_integration.py +580 -0
- earthcatalog/tests/test_e2e_synthetic.py +1624 -0
- earthcatalog/tests/test_engines.py +272 -0
- earthcatalog/tests/test_exceptions.py +346 -0
- earthcatalog/tests/test_file_structure.py +245 -0
- earthcatalog/tests/test_input_readers.py +666 -0
- earthcatalog/tests/test_integration.py +200 -0
- earthcatalog/tests/test_integration_async.py +283 -0
- earthcatalog/tests/test_job_tracking.py +603 -0
- earthcatalog/tests/test_multi_file_input.py +336 -0
- earthcatalog/tests/test_passthrough_hook.py +196 -0
- earthcatalog/tests/test_pipeline.py +684 -0
- earthcatalog/tests/test_pipeline_components.py +665 -0
- earthcatalog/tests/test_schema_generator.py +506 -0
- earthcatalog/tests/test_spatial_resolver.py +413 -0
- earthcatalog/tests/test_stac_hooks.py +776 -0
- earthcatalog/tests/test_statistics.py +477 -0
- earthcatalog/tests/test_storage_backends.py +236 -0
- earthcatalog/tests/test_validation.py +435 -0
- earthcatalog/tests/test_workers.py +653 -0
- earthcatalog/validation.py +921 -0
- earthcatalog/workers.py +682 -0
- earthcatalog-0.2.0.dist-info/METADATA +333 -0
- earthcatalog-0.2.0.dist-info/RECORD +50 -0
- earthcatalog-0.2.0.dist-info/WHEEL +5 -0
- earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
- earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
# test_stac_hooks.py
|
|
2
|
+
"""Tests for STAC fetch hooks module."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import tempfile
|
|
9
|
+
from typing import Any
|
|
10
|
+
from unittest.mock import MagicMock, patch
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from earthcatalog.stac_hooks import (
|
|
15
|
+
CallableSTACHook,
|
|
16
|
+
DefaultSTACHook,
|
|
17
|
+
ModuleSTACHook,
|
|
18
|
+
ScriptSTACHook,
|
|
19
|
+
_normalize_stac_result,
|
|
20
|
+
_normalize_stac_results,
|
|
21
|
+
get_hook,
|
|
22
|
+
parse_hook_config,
|
|
23
|
+
serialize_hook,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# Sample STAC Item for Testing
|
|
28
|
+
# =============================================================================
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def sample_stac_item(item_id: str = "test-item") -> dict[str, Any]:
|
|
32
|
+
"""Create a sample STAC item for testing."""
|
|
33
|
+
return {
|
|
34
|
+
"type": "Feature",
|
|
35
|
+
"stac_version": "1.0.0",
|
|
36
|
+
"id": item_id,
|
|
37
|
+
"geometry": {
|
|
38
|
+
"type": "Polygon",
|
|
39
|
+
"coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]],
|
|
40
|
+
},
|
|
41
|
+
"bbox": [-180, -90, 180, 90],
|
|
42
|
+
"properties": {
|
|
43
|
+
"datetime": "2024-01-15T12:00:00Z",
|
|
44
|
+
"mission": "test-mission",
|
|
45
|
+
},
|
|
46
|
+
"links": [],
|
|
47
|
+
"assets": {},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# =============================================================================
|
|
52
|
+
# DefaultSTACHook Tests
|
|
53
|
+
# =============================================================================
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestDefaultSTACHook:
|
|
57
|
+
"""Tests for DefaultSTACHook."""
|
|
58
|
+
|
|
59
|
+
def test_to_config(self):
|
|
60
|
+
"""Test config string generation."""
|
|
61
|
+
hook = DefaultSTACHook()
|
|
62
|
+
assert hook.to_config() == "default"
|
|
63
|
+
|
|
64
|
+
@patch("requests.get")
|
|
65
|
+
def test_fetch_success(self, mock_get):
|
|
66
|
+
"""Test successful fetch of STAC item."""
|
|
67
|
+
mock_response = MagicMock()
|
|
68
|
+
mock_response.json.return_value = sample_stac_item()
|
|
69
|
+
mock_response.raise_for_status = MagicMock()
|
|
70
|
+
mock_get.return_value = mock_response
|
|
71
|
+
|
|
72
|
+
hook = DefaultSTACHook()
|
|
73
|
+
result = hook.fetch("https://example.com/item.json")
|
|
74
|
+
|
|
75
|
+
assert result is not None
|
|
76
|
+
assert result["id"] == "test-item"
|
|
77
|
+
mock_get.assert_called_once()
|
|
78
|
+
|
|
79
|
+
@patch("requests.get")
|
|
80
|
+
def test_fetch_failure_with_retry(self, mock_get):
|
|
81
|
+
"""Test fetch with retry on failure."""
|
|
82
|
+
from requests.exceptions import RequestException
|
|
83
|
+
|
|
84
|
+
mock_get.side_effect = RequestException("Connection error")
|
|
85
|
+
|
|
86
|
+
hook = DefaultSTACHook()
|
|
87
|
+
result = hook.fetch("https://example.com/item.json", retry_attempts=2, timeout=1)
|
|
88
|
+
|
|
89
|
+
assert result is None
|
|
90
|
+
assert mock_get.call_count == 2
|
|
91
|
+
|
|
92
|
+
@patch("requests.get")
|
|
93
|
+
def test_fetch_batch_sequential(self, mock_get):
|
|
94
|
+
"""Test batch fetch processes sequentially."""
|
|
95
|
+
mock_response = MagicMock()
|
|
96
|
+
mock_response.json.side_effect = [
|
|
97
|
+
sample_stac_item("item-1"),
|
|
98
|
+
sample_stac_item("item-2"),
|
|
99
|
+
]
|
|
100
|
+
mock_response.raise_for_status = MagicMock()
|
|
101
|
+
mock_get.return_value = mock_response
|
|
102
|
+
|
|
103
|
+
hook = DefaultSTACHook()
|
|
104
|
+
results = hook.fetch_batch(["url1", "url2"])
|
|
105
|
+
|
|
106
|
+
assert len(results) == 2
|
|
107
|
+
assert results[0]["id"] == "item-1"
|
|
108
|
+
assert results[1]["id"] == "item-2"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# =============================================================================
|
|
112
|
+
# CallableSTACHook Tests
|
|
113
|
+
# =============================================================================
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class TestCallableSTACHook:
|
|
117
|
+
"""Tests for CallableSTACHook."""
|
|
118
|
+
|
|
119
|
+
def test_fetch_with_callable(self):
|
|
120
|
+
"""Test fetch using a callable function."""
|
|
121
|
+
|
|
122
|
+
def my_hook(url: str, **kwargs) -> dict[str, Any]:
|
|
123
|
+
return sample_stac_item(f"item-from-{url}")
|
|
124
|
+
|
|
125
|
+
hook = CallableSTACHook(my_hook)
|
|
126
|
+
result = hook.fetch("test-url")
|
|
127
|
+
|
|
128
|
+
assert result is not None
|
|
129
|
+
assert result["id"] == "item-from-test-url"
|
|
130
|
+
|
|
131
|
+
def test_fetch_returns_none_on_exception(self):
|
|
132
|
+
"""Test that exceptions in callable return None."""
|
|
133
|
+
|
|
134
|
+
def failing_hook(url: str, **kwargs):
|
|
135
|
+
raise ValueError("Hook failed")
|
|
136
|
+
|
|
137
|
+
hook = CallableSTACHook(failing_hook)
|
|
138
|
+
result = hook.fetch("test-url")
|
|
139
|
+
|
|
140
|
+
assert result is None
|
|
141
|
+
|
|
142
|
+
def test_fetch_batch_with_batch_function(self):
|
|
143
|
+
"""Test batch fetch with dedicated batch function."""
|
|
144
|
+
|
|
145
|
+
def single_hook(url: str, **kwargs) -> dict[str, Any]:
|
|
146
|
+
return sample_stac_item(f"single-{url}")
|
|
147
|
+
|
|
148
|
+
def batch_hook(urls: list[str], **kwargs) -> list[dict[str, Any]]:
|
|
149
|
+
return [sample_stac_item(f"batch-{url}") for url in urls]
|
|
150
|
+
|
|
151
|
+
hook = CallableSTACHook(single_hook, batch_func=batch_hook)
|
|
152
|
+
results = hook.fetch_batch(["url1", "url2"])
|
|
153
|
+
|
|
154
|
+
assert len(results) == 2
|
|
155
|
+
assert results[0]["id"] == "batch-url1"
|
|
156
|
+
assert results[1]["id"] == "batch-url2"
|
|
157
|
+
|
|
158
|
+
def test_fetch_batch_fallback_to_sequential(self):
|
|
159
|
+
"""Test batch fetch falls back to sequential when no batch func."""
|
|
160
|
+
|
|
161
|
+
def single_hook(url: str, **kwargs) -> dict[str, Any]:
|
|
162
|
+
return sample_stac_item(f"single-{url}")
|
|
163
|
+
|
|
164
|
+
hook = CallableSTACHook(single_hook)
|
|
165
|
+
results = hook.fetch_batch(["url1", "url2"])
|
|
166
|
+
|
|
167
|
+
assert len(results) == 2
|
|
168
|
+
assert results[0]["id"] == "single-url1"
|
|
169
|
+
assert results[1]["id"] == "single-url2"
|
|
170
|
+
|
|
171
|
+
def test_to_config(self):
|
|
172
|
+
"""Test config string for callable hook."""
|
|
173
|
+
|
|
174
|
+
def named_func(url: str, **kwargs):
|
|
175
|
+
return sample_stac_item()
|
|
176
|
+
|
|
177
|
+
hook = CallableSTACHook(named_func)
|
|
178
|
+
config = hook.to_config()
|
|
179
|
+
|
|
180
|
+
assert "callable:" in config
|
|
181
|
+
assert "named_func" in config
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# =============================================================================
|
|
185
|
+
# ModuleSTACHook Tests
|
|
186
|
+
# =============================================================================
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class TestModuleSTACHook:
|
|
190
|
+
"""Tests for ModuleSTACHook."""
|
|
191
|
+
|
|
192
|
+
def test_to_config(self):
|
|
193
|
+
"""Test config string generation."""
|
|
194
|
+
hook = ModuleSTACHook("mypackage.hooks:generate_item")
|
|
195
|
+
assert hook.to_config() == "module:mypackage.hooks:generate_item"
|
|
196
|
+
|
|
197
|
+
def test_import_error_on_invalid_path(self):
|
|
198
|
+
"""Test ImportError raised for invalid module path."""
|
|
199
|
+
hook = ModuleSTACHook("nonexistent.module:function")
|
|
200
|
+
|
|
201
|
+
with pytest.raises(ImportError):
|
|
202
|
+
hook.fetch("test-url")
|
|
203
|
+
|
|
204
|
+
def test_import_error_on_missing_function(self):
|
|
205
|
+
"""Test ImportError raised for missing function."""
|
|
206
|
+
# Use a real module but non-existent function
|
|
207
|
+
hook = ModuleSTACHook("json:nonexistent_function")
|
|
208
|
+
|
|
209
|
+
with pytest.raises(ImportError):
|
|
210
|
+
hook.fetch("test-url")
|
|
211
|
+
|
|
212
|
+
def test_fetch_with_valid_module(self):
|
|
213
|
+
"""Test fetch with a real importable function."""
|
|
214
|
+
# Create a hook that uses json.loads which takes a string
|
|
215
|
+
# We'll mock the behavior we expect
|
|
216
|
+
with patch.object(ModuleSTACHook, "_load_function") as mock_load:
|
|
217
|
+
mock_func = MagicMock(return_value=sample_stac_item())
|
|
218
|
+
mock_load.return_value = mock_func
|
|
219
|
+
|
|
220
|
+
hook = ModuleSTACHook("some.module:func")
|
|
221
|
+
result = hook.fetch("test-url")
|
|
222
|
+
|
|
223
|
+
assert result is not None
|
|
224
|
+
assert result["id"] == "test-item"
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# =============================================================================
|
|
228
|
+
# ScriptSTACHook Tests
|
|
229
|
+
# =============================================================================
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class TestScriptSTACHook:
|
|
233
|
+
"""Tests for ScriptSTACHook."""
|
|
234
|
+
|
|
235
|
+
def test_to_config_without_interpreter(self):
|
|
236
|
+
"""Test config string without interpreter."""
|
|
237
|
+
hook = ScriptSTACHook("/path/to/script.py")
|
|
238
|
+
assert hook.to_config() == "script:/path/to/script.py"
|
|
239
|
+
|
|
240
|
+
def test_to_config_with_interpreter(self):
|
|
241
|
+
"""Test config string with interpreter."""
|
|
242
|
+
hook = ScriptSTACHook("/path/to/script.py", interpreter="python3")
|
|
243
|
+
assert hook.to_config() == "script:python3:/path/to/script.py"
|
|
244
|
+
|
|
245
|
+
def test_fetch_success(self):
|
|
246
|
+
"""Test successful script execution."""
|
|
247
|
+
# Create a temporary script that outputs valid STAC JSON
|
|
248
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
249
|
+
f.write("""#!/usr/bin/env python3
|
|
250
|
+
import json
|
|
251
|
+
import sys
|
|
252
|
+
item = {
|
|
253
|
+
"type": "Feature",
|
|
254
|
+
"id": f"item-{sys.argv[1]}",
|
|
255
|
+
"geometry": None,
|
|
256
|
+
"bbox": None,
|
|
257
|
+
"properties": {"datetime": "2024-01-01T00:00:00Z"},
|
|
258
|
+
"links": [],
|
|
259
|
+
"assets": {}
|
|
260
|
+
}
|
|
261
|
+
print(json.dumps(item))
|
|
262
|
+
""")
|
|
263
|
+
script_path = f.name
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
os.chmod(script_path, 0o755)
|
|
267
|
+
hook = ScriptSTACHook(script_path, interpreter="python3")
|
|
268
|
+
result = hook.fetch("test-url", timeout=10)
|
|
269
|
+
|
|
270
|
+
assert result is not None
|
|
271
|
+
assert result["id"] == "item-test-url"
|
|
272
|
+
finally:
|
|
273
|
+
os.unlink(script_path)
|
|
274
|
+
|
|
275
|
+
def test_fetch_script_timeout(self):
|
|
276
|
+
"""Test script timeout handling."""
|
|
277
|
+
# Create a script that sleeps forever
|
|
278
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
279
|
+
f.write("""#!/usr/bin/env python3
|
|
280
|
+
import time
|
|
281
|
+
time.sleep(100)
|
|
282
|
+
""")
|
|
283
|
+
script_path = f.name
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
os.chmod(script_path, 0o755)
|
|
287
|
+
hook = ScriptSTACHook(script_path, interpreter="python3")
|
|
288
|
+
result = hook.fetch("test-url", timeout=1)
|
|
289
|
+
|
|
290
|
+
assert result is None
|
|
291
|
+
finally:
|
|
292
|
+
os.unlink(script_path)
|
|
293
|
+
|
|
294
|
+
def test_fetch_script_invalid_json(self):
|
|
295
|
+
"""Test script that outputs invalid JSON."""
|
|
296
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
297
|
+
f.write("""#!/usr/bin/env python3
|
|
298
|
+
print("not valid json")
|
|
299
|
+
""")
|
|
300
|
+
script_path = f.name
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
os.chmod(script_path, 0o755)
|
|
304
|
+
hook = ScriptSTACHook(script_path, interpreter="python3")
|
|
305
|
+
result = hook.fetch("test-url", timeout=10)
|
|
306
|
+
|
|
307
|
+
assert result is None
|
|
308
|
+
finally:
|
|
309
|
+
os.unlink(script_path)
|
|
310
|
+
|
|
311
|
+
def test_fetch_script_failure(self):
|
|
312
|
+
"""Test script that exits with error."""
|
|
313
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
314
|
+
f.write("""#!/usr/bin/env python3
|
|
315
|
+
import sys
|
|
316
|
+
sys.exit(1)
|
|
317
|
+
""")
|
|
318
|
+
script_path = f.name
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
os.chmod(script_path, 0o755)
|
|
322
|
+
hook = ScriptSTACHook(script_path, interpreter="python3")
|
|
323
|
+
result = hook.fetch("test-url", timeout=10)
|
|
324
|
+
|
|
325
|
+
assert result is None
|
|
326
|
+
finally:
|
|
327
|
+
os.unlink(script_path)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
# =============================================================================
|
|
331
|
+
# parse_hook_config Tests
|
|
332
|
+
# =============================================================================
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class TestParseHookConfig:
|
|
336
|
+
"""Tests for parse_hook_config function."""
|
|
337
|
+
|
|
338
|
+
def test_none_returns_default(self):
|
|
339
|
+
"""Test None config returns DefaultSTACHook."""
|
|
340
|
+
hook = parse_hook_config(None)
|
|
341
|
+
assert isinstance(hook, DefaultSTACHook)
|
|
342
|
+
|
|
343
|
+
def test_default_string_returns_default(self):
|
|
344
|
+
"""Test 'default' string returns DefaultSTACHook."""
|
|
345
|
+
hook = parse_hook_config("default")
|
|
346
|
+
assert isinstance(hook, DefaultSTACHook)
|
|
347
|
+
|
|
348
|
+
def test_module_string_returns_module_hook(self):
|
|
349
|
+
"""Test module: prefix returns ModuleSTACHook."""
|
|
350
|
+
hook = parse_hook_config("module:mypackage.hooks:generate_item")
|
|
351
|
+
assert isinstance(hook, ModuleSTACHook)
|
|
352
|
+
assert hook.module_path == "mypackage.hooks:generate_item"
|
|
353
|
+
|
|
354
|
+
def test_script_string_returns_script_hook(self):
|
|
355
|
+
"""Test script: prefix returns ScriptSTACHook."""
|
|
356
|
+
hook = parse_hook_config("script:/path/to/script.py")
|
|
357
|
+
assert isinstance(hook, ScriptSTACHook)
|
|
358
|
+
assert hook.script_path == "/path/to/script.py"
|
|
359
|
+
|
|
360
|
+
def test_script_with_interpreter(self):
|
|
361
|
+
"""Test script with interpreter prefix."""
|
|
362
|
+
hook = parse_hook_config("script:python3:/path/to/script.py")
|
|
363
|
+
assert isinstance(hook, ScriptSTACHook)
|
|
364
|
+
assert hook.script_path == "/path/to/script.py"
|
|
365
|
+
assert hook.interpreter == "python3"
|
|
366
|
+
|
|
367
|
+
def test_callable_returns_callable_hook(self):
|
|
368
|
+
"""Test callable returns CallableSTACHook."""
|
|
369
|
+
|
|
370
|
+
def my_func(url: str, **kwargs):
|
|
371
|
+
return {"id": url}
|
|
372
|
+
|
|
373
|
+
hook = parse_hook_config(my_func)
|
|
374
|
+
assert isinstance(hook, CallableSTACHook)
|
|
375
|
+
|
|
376
|
+
def test_existing_hook_returned_as_is(self):
|
|
377
|
+
"""Test existing hook instance returned unchanged."""
|
|
378
|
+
original = DefaultSTACHook()
|
|
379
|
+
hook = parse_hook_config(original)
|
|
380
|
+
assert hook is original
|
|
381
|
+
|
|
382
|
+
def test_invalid_string_raises_error(self):
|
|
383
|
+
"""Test invalid config string raises ValueError."""
|
|
384
|
+
with pytest.raises(ValueError):
|
|
385
|
+
parse_hook_config("invalid:config")
|
|
386
|
+
|
|
387
|
+
def test_invalid_type_raises_error(self):
|
|
388
|
+
"""Test invalid config type raises ValueError."""
|
|
389
|
+
with pytest.raises(ValueError):
|
|
390
|
+
parse_hook_config(12345) # type: ignore
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# =============================================================================
|
|
394
|
+
# get_hook Tests
|
|
395
|
+
# =============================================================================
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class TestGetHook:
|
|
399
|
+
"""Tests for get_hook function (alias for parse_hook_config)."""
|
|
400
|
+
|
|
401
|
+
def test_get_hook_default(self):
|
|
402
|
+
"""Test get_hook with default config."""
|
|
403
|
+
hook = get_hook()
|
|
404
|
+
assert isinstance(hook, DefaultSTACHook)
|
|
405
|
+
|
|
406
|
+
def test_get_hook_with_string(self):
|
|
407
|
+
"""Test get_hook with string config."""
|
|
408
|
+
hook = get_hook("default")
|
|
409
|
+
assert isinstance(hook, DefaultSTACHook)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# =============================================================================
|
|
413
|
+
# serialize_hook Tests
|
|
414
|
+
# =============================================================================
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class TestSerializeHook:
|
|
418
|
+
"""Tests for serialize_hook function."""
|
|
419
|
+
|
|
420
|
+
def test_serialize_none_returns_default(self):
|
|
421
|
+
"""Test serializing None returns 'default'."""
|
|
422
|
+
assert serialize_hook(None) == "default"
|
|
423
|
+
|
|
424
|
+
def test_serialize_default_hook(self):
|
|
425
|
+
"""Test serializing DefaultSTACHook."""
|
|
426
|
+
hook = DefaultSTACHook()
|
|
427
|
+
assert serialize_hook(hook) == "default"
|
|
428
|
+
|
|
429
|
+
def test_serialize_module_hook(self):
|
|
430
|
+
"""Test serializing ModuleSTACHook."""
|
|
431
|
+
hook = ModuleSTACHook("mypackage.hooks:func")
|
|
432
|
+
assert serialize_hook(hook) == "module:mypackage.hooks:func"
|
|
433
|
+
|
|
434
|
+
def test_serialize_script_hook(self):
|
|
435
|
+
"""Test serializing ScriptSTACHook."""
|
|
436
|
+
hook = ScriptSTACHook("/path/to/script")
|
|
437
|
+
assert serialize_hook(hook) == "script:/path/to/script"
|
|
438
|
+
|
|
439
|
+
def test_serialize_importable_callable(self):
|
|
440
|
+
"""Test serializing an importable callable."""
|
|
441
|
+
# json.dumps is an importable function
|
|
442
|
+
config = serialize_hook(json.dumps)
|
|
443
|
+
assert config == "module:json:dumps"
|
|
444
|
+
|
|
445
|
+
def test_serialize_anonymous_callable_raises_error(self):
|
|
446
|
+
"""Test serializing main-scoped callable raises error."""
|
|
447
|
+
# Functions defined in __main__ cannot be serialized because they can't be imported
|
|
448
|
+
# on remote workers. We need to use a function that's in __main__ scope.
|
|
449
|
+
# Lambda functions have __name__ = "<lambda>" which can be detected
|
|
450
|
+
|
|
451
|
+
def main_func(url):
|
|
452
|
+
return {"id": url}
|
|
453
|
+
|
|
454
|
+
# Manually set __module__ to __main__ to simulate a function in main
|
|
455
|
+
main_func.__module__ = "__main__"
|
|
456
|
+
|
|
457
|
+
with pytest.raises(ValueError, match="Cannot serialize callable"):
|
|
458
|
+
serialize_hook(main_func)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
# =============================================================================
|
|
462
|
+
# Integration Tests with workers.py
|
|
463
|
+
# =============================================================================
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class TestWorkersIntegration:
|
|
467
|
+
"""Integration tests for STAC hooks with workers module."""
|
|
468
|
+
|
|
469
|
+
def test_download_stac_item_with_default_hook(self):
|
|
470
|
+
"""Test _download_stac_item with default hook."""
|
|
471
|
+
from earthcatalog.workers import _download_stac_item
|
|
472
|
+
|
|
473
|
+
with patch("earthcatalog.stac_hooks.requests.get") as mock_get:
|
|
474
|
+
mock_response = MagicMock()
|
|
475
|
+
mock_response.json.return_value = sample_stac_item()
|
|
476
|
+
mock_response.raise_for_status = MagicMock()
|
|
477
|
+
mock_get.return_value = mock_response
|
|
478
|
+
|
|
479
|
+
result = _download_stac_item(
|
|
480
|
+
"https://example.com/item.json",
|
|
481
|
+
hook_config="default",
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
assert result is not None
|
|
485
|
+
assert result["id"] == "test-item"
|
|
486
|
+
|
|
487
|
+
def test_download_stac_items_batch_with_default_hook(self):
|
|
488
|
+
"""Test _download_stac_items_batch with default hook."""
|
|
489
|
+
from earthcatalog.workers import _download_stac_items_batch
|
|
490
|
+
|
|
491
|
+
with patch("earthcatalog.stac_hooks.requests.get") as mock_get:
|
|
492
|
+
mock_response = MagicMock()
|
|
493
|
+
mock_response.json.side_effect = [
|
|
494
|
+
sample_stac_item("item-1"),
|
|
495
|
+
sample_stac_item("item-2"),
|
|
496
|
+
]
|
|
497
|
+
mock_response.raise_for_status = MagicMock()
|
|
498
|
+
mock_get.return_value = mock_response
|
|
499
|
+
|
|
500
|
+
results = _download_stac_items_batch(
|
|
501
|
+
["url1", "url2"],
|
|
502
|
+
hook_config="default",
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
assert len(results) == 2
|
|
506
|
+
assert results[0]["id"] == "item-1"
|
|
507
|
+
assert results[1]["id"] == "item-2"
|
|
508
|
+
|
|
509
|
+
def test_get_stac_hook_caching(self):
|
|
510
|
+
"""Test that hooks are created correctly from config."""
|
|
511
|
+
from earthcatalog.workers import _get_stac_hook
|
|
512
|
+
|
|
513
|
+
hook1 = _get_stac_hook("default")
|
|
514
|
+
hook2 = _get_stac_hook("default")
|
|
515
|
+
|
|
516
|
+
# Both should be DefaultSTACHook instances
|
|
517
|
+
assert isinstance(hook1, DefaultSTACHook)
|
|
518
|
+
assert isinstance(hook2, DefaultSTACHook)
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
# =============================================================================
|
|
522
|
+
# ProcessingConfig Integration Tests
|
|
523
|
+
# =============================================================================
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
class TestProcessingConfigHook:
|
|
527
|
+
"""Test STAC hook configuration in ProcessingConfig."""
|
|
528
|
+
|
|
529
|
+
def test_default_stac_hook(self):
|
|
530
|
+
"""Test default stac_hook value."""
|
|
531
|
+
from earthcatalog.ingestion_pipeline import ProcessingConfig
|
|
532
|
+
|
|
533
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".parquet", delete=False) as f:
|
|
534
|
+
f.write("") # Just need a file to exist
|
|
535
|
+
temp_file = f.name
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
config = ProcessingConfig(
|
|
539
|
+
input_file=temp_file,
|
|
540
|
+
output_catalog="/tmp/catalog",
|
|
541
|
+
scratch_location="/tmp/scratch",
|
|
542
|
+
)
|
|
543
|
+
assert config.stac_hook == "default"
|
|
544
|
+
finally:
|
|
545
|
+
os.unlink(temp_file)
|
|
546
|
+
|
|
547
|
+
def test_stac_hook_serialization(self):
|
|
548
|
+
"""Test stac_hook is included in to_dict/from_dict."""
|
|
549
|
+
from earthcatalog.ingestion_pipeline import ProcessingConfig
|
|
550
|
+
|
|
551
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".parquet", delete=False) as f:
|
|
552
|
+
f.write("")
|
|
553
|
+
temp_file = f.name
|
|
554
|
+
|
|
555
|
+
try:
|
|
556
|
+
config = ProcessingConfig(
|
|
557
|
+
input_file=temp_file,
|
|
558
|
+
output_catalog="/tmp/catalog",
|
|
559
|
+
scratch_location="/tmp/scratch",
|
|
560
|
+
stac_hook="module:mypackage:myfunc",
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
config_dict = config.to_dict()
|
|
564
|
+
assert "stac_hook" in config_dict
|
|
565
|
+
assert config_dict["stac_hook"] == "module:mypackage:myfunc"
|
|
566
|
+
|
|
567
|
+
restored = ProcessingConfig.from_dict(config_dict)
|
|
568
|
+
assert restored.stac_hook == "module:mypackage:myfunc"
|
|
569
|
+
finally:
|
|
570
|
+
os.unlink(temp_file)
|
|
571
|
+
|
|
572
|
+
def test_stac_hook_validation(self):
|
|
573
|
+
"""Test stac_hook validation in ProcessingConfig."""
|
|
574
|
+
from earthcatalog.ingestion_pipeline import ProcessingConfig
|
|
575
|
+
|
|
576
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".parquet", delete=False) as f:
|
|
577
|
+
f.write("")
|
|
578
|
+
temp_file = f.name
|
|
579
|
+
|
|
580
|
+
try:
|
|
581
|
+
# Valid hook configs should pass
|
|
582
|
+
for hook in ["default", "module:pkg:func", "script:/path/to/script"]:
|
|
583
|
+
config = ProcessingConfig(
|
|
584
|
+
input_file=temp_file,
|
|
585
|
+
output_catalog="/tmp/catalog",
|
|
586
|
+
scratch_location="/tmp/scratch",
|
|
587
|
+
stac_hook=hook,
|
|
588
|
+
)
|
|
589
|
+
config.validate() # Should not raise
|
|
590
|
+
|
|
591
|
+
# Invalid hook config should fail
|
|
592
|
+
config = ProcessingConfig(
|
|
593
|
+
input_file=temp_file,
|
|
594
|
+
output_catalog="/tmp/catalog",
|
|
595
|
+
scratch_location="/tmp/scratch",
|
|
596
|
+
stac_hook="invalid:hook:config",
|
|
597
|
+
)
|
|
598
|
+
with pytest.raises(ValueError, match="stac_hook"):
|
|
599
|
+
config.validate()
|
|
600
|
+
finally:
|
|
601
|
+
os.unlink(temp_file)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
# =============================================================================
|
|
605
|
+
# pystac.Item Support Tests
|
|
606
|
+
# =============================================================================
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
class MockPystacItem:
|
|
610
|
+
"""Mock pystac.Item class for testing without pystac dependency."""
|
|
611
|
+
|
|
612
|
+
def __init__(self, item_dict: dict[str, Any]):
|
|
613
|
+
self._dict = item_dict
|
|
614
|
+
|
|
615
|
+
def to_dict(self) -> dict[str, Any]:
|
|
616
|
+
return self._dict
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
class TestNormalizeStacResult:
|
|
620
|
+
"""Tests for _normalize_stac_result helper function."""
|
|
621
|
+
|
|
622
|
+
def test_normalize_dict_returns_as_is(self):
|
|
623
|
+
"""Test that dict is returned unchanged."""
|
|
624
|
+
item = sample_stac_item()
|
|
625
|
+
result = _normalize_stac_result(item)
|
|
626
|
+
assert result == item
|
|
627
|
+
|
|
628
|
+
def test_normalize_none_returns_none(self):
|
|
629
|
+
"""Test that None is returned unchanged."""
|
|
630
|
+
result = _normalize_stac_result(None)
|
|
631
|
+
assert result is None
|
|
632
|
+
|
|
633
|
+
def test_normalize_pystac_item_converts_to_dict(self):
|
|
634
|
+
"""Test that object with to_dict method is converted."""
|
|
635
|
+
original_dict = sample_stac_item("pystac-item")
|
|
636
|
+
mock_item = MockPystacItem(original_dict)
|
|
637
|
+
|
|
638
|
+
result = _normalize_stac_result(mock_item)
|
|
639
|
+
|
|
640
|
+
assert result == original_dict
|
|
641
|
+
assert result["id"] == "pystac-item"
|
|
642
|
+
|
|
643
|
+
def test_normalize_invalid_type_returns_none(self):
|
|
644
|
+
"""Test that invalid types return None."""
|
|
645
|
+
result = _normalize_stac_result("not a valid item")
|
|
646
|
+
assert result is None
|
|
647
|
+
|
|
648
|
+
result = _normalize_stac_result(12345)
|
|
649
|
+
assert result is None
|
|
650
|
+
|
|
651
|
+
result = _normalize_stac_result(["list", "of", "items"])
|
|
652
|
+
assert result is None
|
|
653
|
+
|
|
654
|
+
def test_normalize_to_dict_exception_returns_none(self):
|
|
655
|
+
"""Test that exception in to_dict returns None."""
|
|
656
|
+
|
|
657
|
+
class BrokenItem:
|
|
658
|
+
def to_dict(self):
|
|
659
|
+
raise RuntimeError("Failed to convert")
|
|
660
|
+
|
|
661
|
+
result = _normalize_stac_result(BrokenItem())
|
|
662
|
+
assert result is None
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
class TestNormalizeStacResults:
|
|
666
|
+
"""Tests for _normalize_stac_results batch helper function."""
|
|
667
|
+
|
|
668
|
+
def test_normalize_empty_list(self):
|
|
669
|
+
"""Test normalizing empty list."""
|
|
670
|
+
result = _normalize_stac_results([])
|
|
671
|
+
assert result == []
|
|
672
|
+
|
|
673
|
+
def test_normalize_list_of_dicts(self):
|
|
674
|
+
"""Test normalizing list of dictionaries."""
|
|
675
|
+
items = [sample_stac_item("item-1"), sample_stac_item("item-2")]
|
|
676
|
+
result = _normalize_stac_results(items)
|
|
677
|
+
|
|
678
|
+
assert len(result) == 2
|
|
679
|
+
assert result[0]["id"] == "item-1"
|
|
680
|
+
assert result[1]["id"] == "item-2"
|
|
681
|
+
|
|
682
|
+
def test_normalize_list_with_pystac_items(self):
|
|
683
|
+
"""Test normalizing list containing pystac.Item objects."""
|
|
684
|
+
items = [
|
|
685
|
+
MockPystacItem(sample_stac_item("pystac-1")),
|
|
686
|
+
sample_stac_item("dict-2"),
|
|
687
|
+
MockPystacItem(sample_stac_item("pystac-3")),
|
|
688
|
+
]
|
|
689
|
+
|
|
690
|
+
result = _normalize_stac_results(items)
|
|
691
|
+
|
|
692
|
+
assert len(result) == 3
|
|
693
|
+
assert result[0]["id"] == "pystac-1"
|
|
694
|
+
assert result[1]["id"] == "dict-2"
|
|
695
|
+
assert result[2]["id"] == "pystac-3"
|
|
696
|
+
|
|
697
|
+
def test_normalize_list_with_none_values(self):
|
|
698
|
+
"""Test normalizing list with None values."""
|
|
699
|
+
items = [sample_stac_item("item-1"), None, sample_stac_item("item-3")]
|
|
700
|
+
|
|
701
|
+
result = _normalize_stac_results(items)
|
|
702
|
+
|
|
703
|
+
assert len(result) == 3
|
|
704
|
+
assert result[0]["id"] == "item-1"
|
|
705
|
+
assert result[1] is None
|
|
706
|
+
assert result[2]["id"] == "item-3"
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
class TestCallableSTACHookPystacSupport:
|
|
710
|
+
"""Tests for pystac.Item support in CallableSTACHook."""
|
|
711
|
+
|
|
712
|
+
def test_fetch_returns_pystac_item(self):
|
|
713
|
+
"""Test that CallableSTACHook handles pystac.Item returns."""
|
|
714
|
+
|
|
715
|
+
def hook_returning_pystac(url: str, **kwargs) -> MockPystacItem:
|
|
716
|
+
return MockPystacItem(sample_stac_item(f"pystac-{url}"))
|
|
717
|
+
|
|
718
|
+
hook = CallableSTACHook(hook_returning_pystac)
|
|
719
|
+
result = hook.fetch("test-url")
|
|
720
|
+
|
|
721
|
+
assert result is not None
|
|
722
|
+
assert isinstance(result, dict)
|
|
723
|
+
assert result["id"] == "pystac-test-url"
|
|
724
|
+
|
|
725
|
+
def test_fetch_batch_returns_pystac_items(self):
|
|
726
|
+
"""Test batch fetch with pystac.Item returns."""
|
|
727
|
+
|
|
728
|
+
def single_hook(url: str, **kwargs) -> MockPystacItem:
|
|
729
|
+
return MockPystacItem(sample_stac_item(f"single-{url}"))
|
|
730
|
+
|
|
731
|
+
def batch_hook(urls: list[str], **kwargs) -> list[MockPystacItem]:
|
|
732
|
+
return [MockPystacItem(sample_stac_item(f"batch-{url}")) for url in urls]
|
|
733
|
+
|
|
734
|
+
hook = CallableSTACHook(single_hook, batch_func=batch_hook)
|
|
735
|
+
results = hook.fetch_batch(["url1", "url2"])
|
|
736
|
+
|
|
737
|
+
assert len(results) == 2
|
|
738
|
+
assert all(isinstance(r, dict) for r in results)
|
|
739
|
+
assert results[0]["id"] == "batch-url1"
|
|
740
|
+
assert results[1]["id"] == "batch-url2"
|
|
741
|
+
|
|
742
|
+
def test_fetch_batch_mixed_returns(self):
|
|
743
|
+
"""Test batch fetch with mixed dict and pystac.Item returns."""
|
|
744
|
+
|
|
745
|
+
def batch_hook(urls: list[str], **kwargs) -> list[Any]:
|
|
746
|
+
return [
|
|
747
|
+
MockPystacItem(sample_stac_item("pystac-item")),
|
|
748
|
+
sample_stac_item("dict-item"),
|
|
749
|
+
None,
|
|
750
|
+
]
|
|
751
|
+
|
|
752
|
+
hook = CallableSTACHook(lambda url, **kwargs: None, batch_func=batch_hook)
|
|
753
|
+
results = hook.fetch_batch(["url1", "url2", "url3"])
|
|
754
|
+
|
|
755
|
+
assert len(results) == 3
|
|
756
|
+
assert results[0]["id"] == "pystac-item"
|
|
757
|
+
assert results[1]["id"] == "dict-item"
|
|
758
|
+
assert results[2] is None
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
class TestModuleSTACHookPystacSupport:
|
|
762
|
+
"""Tests for pystac.Item support in ModuleSTACHook."""
|
|
763
|
+
|
|
764
|
+
def test_fetch_with_pystac_return(self):
|
|
765
|
+
"""Test ModuleSTACHook handles pystac.Item returns."""
|
|
766
|
+
with patch.object(ModuleSTACHook, "_load_function") as mock_load:
|
|
767
|
+
# Simulate a function that returns a pystac.Item
|
|
768
|
+
mock_func = MagicMock(return_value=MockPystacItem(sample_stac_item("pystac-module")))
|
|
769
|
+
mock_load.return_value = mock_func
|
|
770
|
+
|
|
771
|
+
hook = ModuleSTACHook("some.module:func")
|
|
772
|
+
result = hook.fetch("test-url")
|
|
773
|
+
|
|
774
|
+
assert result is not None
|
|
775
|
+
assert isinstance(result, dict)
|
|
776
|
+
assert result["id"] == "pystac-module"
|