earthcatalog 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. earthcatalog/__init__.py +164 -0
  2. earthcatalog/async_http_client.py +1006 -0
  3. earthcatalog/config.py +97 -0
  4. earthcatalog/engines/__init__.py +308 -0
  5. earthcatalog/engines/rustac_engine.py +142 -0
  6. earthcatalog/engines/stac_geoparquet_engine.py +126 -0
  7. earthcatalog/exceptions.py +471 -0
  8. earthcatalog/grid_systems.py +1114 -0
  9. earthcatalog/ingestion_pipeline.py +2281 -0
  10. earthcatalog/input_readers.py +603 -0
  11. earthcatalog/job_tracking.py +485 -0
  12. earthcatalog/pipeline.py +606 -0
  13. earthcatalog/schema_generator.py +911 -0
  14. earthcatalog/spatial_resolver.py +1207 -0
  15. earthcatalog/stac_hooks.py +754 -0
  16. earthcatalog/statistics.py +677 -0
  17. earthcatalog/storage_backends.py +548 -0
  18. earthcatalog/tests/__init__.py +1 -0
  19. earthcatalog/tests/conftest.py +76 -0
  20. earthcatalog/tests/test_all_grids.py +793 -0
  21. earthcatalog/tests/test_async_http.py +700 -0
  22. earthcatalog/tests/test_cli_and_storage.py +230 -0
  23. earthcatalog/tests/test_config.py +245 -0
  24. earthcatalog/tests/test_dask_integration.py +580 -0
  25. earthcatalog/tests/test_e2e_synthetic.py +1624 -0
  26. earthcatalog/tests/test_engines.py +272 -0
  27. earthcatalog/tests/test_exceptions.py +346 -0
  28. earthcatalog/tests/test_file_structure.py +245 -0
  29. earthcatalog/tests/test_input_readers.py +666 -0
  30. earthcatalog/tests/test_integration.py +200 -0
  31. earthcatalog/tests/test_integration_async.py +283 -0
  32. earthcatalog/tests/test_job_tracking.py +603 -0
  33. earthcatalog/tests/test_multi_file_input.py +336 -0
  34. earthcatalog/tests/test_passthrough_hook.py +196 -0
  35. earthcatalog/tests/test_pipeline.py +684 -0
  36. earthcatalog/tests/test_pipeline_components.py +665 -0
  37. earthcatalog/tests/test_schema_generator.py +506 -0
  38. earthcatalog/tests/test_spatial_resolver.py +413 -0
  39. earthcatalog/tests/test_stac_hooks.py +776 -0
  40. earthcatalog/tests/test_statistics.py +477 -0
  41. earthcatalog/tests/test_storage_backends.py +236 -0
  42. earthcatalog/tests/test_validation.py +435 -0
  43. earthcatalog/tests/test_workers.py +653 -0
  44. earthcatalog/validation.py +921 -0
  45. earthcatalog/workers.py +682 -0
  46. earthcatalog-0.2.0.dist-info/METADATA +333 -0
  47. earthcatalog-0.2.0.dist-info/RECORD +50 -0
  48. earthcatalog-0.2.0.dist-info/WHEEL +5 -0
  49. earthcatalog-0.2.0.dist-info/entry_points.txt +3 -0
  50. earthcatalog-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,506 @@
1
+ """Tests for the schema_generator module."""
2
+
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Any
6
+ from unittest.mock import MagicMock, Mock, create_autospec
7
+
8
+ import pytest
9
+
10
+ from earthcatalog import grid_systems
11
+ from earthcatalog.schema_generator import SchemaGenerator
12
+ from earthcatalog.statistics import IngestionStatistics
13
+
14
+
15
+ class MockProcessingConfig:
16
+ """Mock ProcessingConfig for testing."""
17
+
18
+ def __init__(
19
+ self,
20
+ grid_system: str = "h3",
21
+ grid_resolution: int = 6,
22
+ temporal_bin: str = "month",
23
+ enable_global_partitioning: bool = True,
24
+ global_partition_threshold: int = 100,
25
+ output_catalog: str = "./test_catalog",
26
+ input_file: str = "./test_input.parquet",
27
+ sort_key: str = "datetime",
28
+ sort_ascending: bool = True,
29
+ items_per_shard: int = 10000,
30
+ max_workers: int = 4,
31
+ output_format: str = "geoparquet",
32
+ mission_field: str = "dataset_id",
33
+ geojson_path: str | None = None,
34
+ ):
35
+ self.grid_system = grid_system
36
+ self.grid_resolution = grid_resolution
37
+ self.temporal_bin = temporal_bin
38
+ self.enable_global_partitioning = enable_global_partitioning
39
+ self.global_partition_threshold = global_partition_threshold
40
+ self.output_catalog = output_catalog
41
+ self.input_file = input_file
42
+ self.sort_key = sort_key
43
+ self.sort_ascending = sort_ascending
44
+ self.items_per_shard = items_per_shard
45
+ self.max_workers = max_workers
46
+ self.output_format = output_format
47
+ self.mission_field = mission_field
48
+ self.geojson_path = geojson_path
49
+
50
+
51
+ def create_mock_grid(grid_type: str = "h3", resolution: int = 6) -> Any:
52
+ """Create a mock GridSystem that passes type checking."""
53
+ mock = create_autospec(grid_systems.GridSystem, instance=True)
54
+ mock.grid_type = grid_type
55
+ mock.resolution = resolution
56
+ mock.tiles_for_geometry.return_value = ["tile_001", "tile_002"]
57
+ return mock
58
+
59
+
60
+ class MockStorage:
61
+ """Mock StorageBackend for testing."""
62
+
63
+ def __init__(self):
64
+ self.written_files = {}
65
+
66
+ def makedirs(self, path):
67
+ pass
68
+
69
+ def open(self, path, mode):
70
+ mock_file = MagicMock()
71
+ mock_file.__enter__ = Mock(return_value=mock_file)
72
+ mock_file.__exit__ = Mock(return_value=False)
73
+ mock_file.write = Mock(side_effect=lambda data: self.written_files.update({path: data}))
74
+ return mock_file
75
+
76
+ def exists(self, path):
77
+ return path in self.written_files
78
+
79
+
80
+ class TestSchemaGenerator:
81
+ """Tests for SchemaGenerator class initialization and basic operations."""
82
+
83
+ @pytest.fixture
84
+ def mock_config(self):
85
+ """Create a mock ProcessingConfig."""
86
+ return MockProcessingConfig()
87
+
88
+ @pytest.fixture
89
+ def mock_grid(self):
90
+ """Create a mock grid system."""
91
+ return create_mock_grid()
92
+
93
+ @pytest.fixture
94
+ def mock_storage(self):
95
+ """Create a mock storage backend."""
96
+ return MockStorage()
97
+
98
+ @pytest.fixture
99
+ def mock_stats(self):
100
+ """Create mock IngestionStatistics with sample data."""
101
+ stats = IngestionStatistics()
102
+ stats.stored_references = 1000
103
+ stats.spanning_items_count = 50
104
+ stats.items_routed_to_global = 10
105
+ for i in range(100):
106
+ stats.unique_ids.add(f"item_{i}")
107
+ return stats
108
+
109
+ @pytest.fixture
110
+ def generator(self, mock_config, mock_grid, mock_storage):
111
+ """Create a SchemaGenerator instance."""
112
+ return SchemaGenerator(mock_config, mock_grid, mock_storage)
113
+
114
+ @pytest.fixture
115
+ def generator_with_stats(self, mock_config, mock_grid, mock_storage, mock_stats):
116
+ """Create a SchemaGenerator instance with stats."""
117
+ return SchemaGenerator(mock_config, mock_grid, mock_storage, mock_stats)
118
+
119
+ def test_initialization(self, mock_config, mock_grid, mock_storage):
120
+ """Test SchemaGenerator initializes correctly."""
121
+ generator = SchemaGenerator(mock_config, mock_grid, mock_storage)
122
+ assert generator.config == mock_config
123
+ assert generator.grid == mock_grid
124
+ assert generator.storage == mock_storage
125
+ assert generator.stats is None
126
+
127
+ def test_initialization_with_stats(self, mock_config, mock_grid, mock_storage, mock_stats):
128
+ """Test SchemaGenerator initializes with stats."""
129
+ generator = SchemaGenerator(mock_config, mock_grid, mock_storage, mock_stats)
130
+ assert generator.stats == mock_stats
131
+
132
+
133
+ class TestSchemaGeneratorBasicSchema:
134
+ """Tests for basic schema generation."""
135
+
136
+ @pytest.fixture
137
+ def generator(self):
138
+ """Create a basic generator for testing."""
139
+ config = MockProcessingConfig()
140
+ grid = create_mock_grid()
141
+ storage = MockStorage()
142
+ return SchemaGenerator(config, grid, storage)
143
+
144
+ def test_generate_catalog_schema_returns_dict(self, generator):
145
+ """Test schema generation returns a dictionary."""
146
+ partition_stats = {"partition_1": {"total_items": 100, "new_items": 100, "existing_items": 0}}
147
+ schema = generator.generate_catalog_schema(partition_stats)
148
+ assert isinstance(schema, dict)
149
+
150
+ def test_schema_has_required_top_level_keys(self, generator):
151
+ """Test schema includes all expected top-level keys."""
152
+ partition_stats = {"partition_1": {"total_items": 100}}
153
+ schema = generator.generate_catalog_schema(partition_stats)
154
+
155
+ required_keys = [
156
+ "earthcatalog_version",
157
+ "schema_version",
158
+ "generated_at",
159
+ "catalog_info",
160
+ "spatial_partitioning",
161
+ "temporal_partitioning",
162
+ "partition_structure",
163
+ "global_partitioning",
164
+ "statistics",
165
+ "usage",
166
+ ]
167
+ for key in required_keys:
168
+ assert key in schema, f"Missing required key: {key}"
169
+
170
+ def test_schema_version_present(self, generator):
171
+ """Test earthcatalog_version is included."""
172
+ partition_stats = {}
173
+ schema = generator.generate_catalog_schema(partition_stats)
174
+ assert "earthcatalog_version" in schema
175
+ assert schema["earthcatalog_version"] == "1.0.0"
176
+ assert "schema_version" in schema
177
+ assert schema["schema_version"] == "1.0.0"
178
+
179
+ def test_generated_at_timestamp_format(self, generator):
180
+ """Test generated_at timestamp is ISO format with Z suffix."""
181
+ partition_stats = {}
182
+ schema = generator.generate_catalog_schema(partition_stats)
183
+ assert "generated_at" in schema
184
+ timestamp = schema["generated_at"]
185
+ assert timestamp.endswith("Z")
186
+ # Should be parseable as ISO format
187
+ datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
188
+
189
+ def test_schema_json_serializable(self, generator):
190
+ """Test generated schema can be serialized to JSON."""
191
+ partition_stats = {"partition_1": {"total_items": 100}}
192
+ schema = generator.generate_catalog_schema(partition_stats)
193
+ # Should not raise
194
+ json_str = json.dumps(schema)
195
+ assert isinstance(json_str, str)
196
+ # Should be valid JSON
197
+ parsed = json.loads(json_str)
198
+ assert parsed == schema
199
+
200
+
201
+ class TestSchemaGeneratorSpatialPartitioning:
202
+ """Tests for spatial partitioning metadata."""
203
+
204
+ @pytest.mark.parametrize(
205
+ "grid_system,resolution,expected_keys,expected_values",
206
+ [
207
+ (
208
+ "h3",
209
+ 6,
210
+ ["grid_system", "resolution", "cell_area_km2", "cell_edge_length_km", "coordinate_system"],
211
+ {"grid_system": "h3", "resolution": 6, "coordinate_system": "EPSG:4326"},
212
+ ),
213
+ ("s2", 10, ["grid_system", "level", "average_cell_area_km2"], {"grid_system": "s2", "level": 10}),
214
+ ("mgrs", 2, ["grid_system", "precision", "precision_description"], {"grid_system": "mgrs", "precision": 2}),
215
+ ("utm", 3, ["grid_system", "precision"], {"grid_system": "utm", "precision": 3}),
216
+ ("latlon", 1, ["grid_system", "cell_size_degrees"], {"grid_system": "latlon", "cell_size_degrees": 1}),
217
+ (
218
+ "itslive",
219
+ 10,
220
+ ["grid_system", "cell_size_degrees", "naming_convention"],
221
+ {"grid_system": "itslive", "cell_size_degrees": 10},
222
+ ),
223
+ ],
224
+ ids=["h3", "s2", "mgrs", "utm", "latlon", "itslive"],
225
+ )
226
+ def test_spatial_partitioning_metadata(self, grid_system, resolution, expected_keys, expected_values):
227
+ """Test spatial partitioning metadata for various grid systems."""
228
+ config = MockProcessingConfig(grid_system=grid_system, grid_resolution=resolution)
229
+ generator = SchemaGenerator(config, create_mock_grid(grid_system, resolution), MockStorage())
230
+ schema = generator.generate_catalog_schema({})
231
+
232
+ spatial = schema["spatial_partitioning"]
233
+ for key in expected_keys:
234
+ assert key in spatial, f"Missing key '{key}' for {grid_system}"
235
+ for key, value in expected_values.items():
236
+ assert spatial[key] == value, f"Wrong value for '{key}' in {grid_system}"
237
+
238
+ def test_geojson_spatial_partitioning(self):
239
+ """Test GeoJSON grid metadata is correct."""
240
+ config = MockProcessingConfig(grid_system="geojson", grid_resolution=0, geojson_path="custom.geojson")
241
+ generator = SchemaGenerator(config, create_mock_grid("geojson", 0), MockStorage())
242
+ schema = generator.generate_catalog_schema({})
243
+
244
+ spatial = schema["spatial_partitioning"]
245
+ assert spatial["grid_system"] == "geojson"
246
+ assert spatial["custom_grid"] is True
247
+
248
+
249
+ class TestSchemaGeneratorTemporalPartitioning:
250
+ """Tests for temporal partitioning metadata."""
251
+
252
+ @pytest.mark.parametrize(
253
+ "temporal_bin,expected_pattern",
254
+ [
255
+ ("year", "year=2024/items.parquet"),
256
+ ("month", "year=2024/month=01/items.parquet"),
257
+ ("day", "year=2024/month=01/day=15/items.parquet"),
258
+ ],
259
+ )
260
+ def test_temporal_bin_examples(self, temporal_bin, expected_pattern):
261
+ """Test temporal binning produces correct path examples."""
262
+ config = MockProcessingConfig(temporal_bin=temporal_bin)
263
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
264
+ schema = generator.generate_catalog_schema({})
265
+
266
+ temporal = schema["temporal_partitioning"]
267
+ assert temporal["temporal_bin"] == temporal_bin
268
+ assert temporal["hive_path_examples"] == expected_pattern
269
+
270
+ def test_temporal_partitioning_fields(self):
271
+ """Test temporal partitioning includes all expected fields."""
272
+ config = MockProcessingConfig(temporal_bin="month")
273
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
274
+ schema = generator.generate_catalog_schema({})
275
+
276
+ temporal = schema["temporal_partitioning"]
277
+ assert "temporal_bin" in temporal
278
+ assert "temporal_bin_description" in temporal
279
+ assert "datetime_field" in temporal
280
+ assert "pruning_benefit" in temporal
281
+
282
+
283
+ class TestSchemaGeneratorGlobalPartitioning:
284
+ """Tests for global partitioning metadata."""
285
+
286
+ def test_global_partitioning_enabled(self):
287
+ """Test global partitioning info when enabled."""
288
+ config = MockProcessingConfig(enable_global_partitioning=True, global_partition_threshold=100)
289
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
290
+ schema = generator.generate_catalog_schema({})
291
+
292
+ global_part = schema["global_partitioning"]
293
+ assert global_part["enabled"] is True
294
+ assert global_part["threshold"] == 100
295
+ assert "description" in global_part
296
+
297
+ def test_global_partitioning_disabled(self):
298
+ """Test global partitioning info when disabled."""
299
+ config = MockProcessingConfig(enable_global_partitioning=False)
300
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
301
+ schema = generator.generate_catalog_schema({})
302
+
303
+ global_part = schema["global_partitioning"]
304
+ assert global_part["enabled"] is False
305
+
306
+
307
+ class TestSchemaGeneratorStatistics:
308
+ """Tests for statistics integration in schema."""
309
+
310
+ def test_statistics_from_ingestion_stats(self):
311
+ """Test statistics are taken from IngestionStatistics when provided."""
312
+ config = MockProcessingConfig()
313
+ stats = IngestionStatistics()
314
+ stats.stored_references = 5000
315
+ stats.spanning_items_count = 250
316
+ for i in range(1000):
317
+ stats.unique_ids.add(f"item_{i}")
318
+
319
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage(), stats)
320
+ schema = generator.generate_catalog_schema({})
321
+
322
+ # Should use stats.get_summary()
323
+ statistics = schema["statistics"]
324
+ assert "stored_references" in statistics
325
+ assert statistics["stored_references"] == 5000
326
+
327
+ def test_statistics_fallback_without_ingestion_stats(self):
328
+ """Test statistics fallback when IngestionStatistics not provided."""
329
+ config = MockProcessingConfig()
330
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
331
+
332
+ partition_stats = {
333
+ "partition_1": {"total_items": 100, "new_items": 80, "existing_items": 20},
334
+ "partition_2": {"total_items": 200, "new_items": 150, "existing_items": 50},
335
+ }
336
+ schema = generator.generate_catalog_schema(partition_stats)
337
+
338
+ statistics = schema["statistics"]
339
+ assert statistics["stored_references"] == 300 # 100 + 200
340
+ assert statistics["unique_granules"] == 300
341
+
342
+
343
+ class TestSchemaGeneratorPartitionStructure:
344
+ """Tests for partition structure metadata."""
345
+
346
+ def test_partition_structure_counts(self):
347
+ """Test partition structure includes correct counts."""
348
+ config = MockProcessingConfig()
349
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
350
+
351
+ partition_stats = {
352
+ "sentinel2/partition=h3/level=6/abc123/year=2024/month=01": {"total_items": 100},
353
+ "sentinel2/partition=h3/level=6/def456/year=2024/month=02": {"total_items": 200},
354
+ "landsat8/partition=h3/level=6/abc123/year=2024/month=01": {"total_items": 150},
355
+ }
356
+ schema = generator.generate_catalog_schema(partition_stats)
357
+
358
+ structure = schema["partition_structure"]
359
+ assert structure["total_partitions"] == 3
360
+ assert "spatial_partitions_count" in structure
361
+ assert "temporal_partitions_count" in structure
362
+ assert "missions_count" in structure
363
+
364
+
365
+ class TestSchemaGeneratorUsage:
366
+ """Tests for usage examples and recommendations."""
367
+
368
+ def test_usage_section_exists(self):
369
+ """Test usage section includes expected subsections."""
370
+ config = MockProcessingConfig()
371
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
372
+ schema = generator.generate_catalog_schema({})
373
+
374
+ usage = schema["usage"]
375
+ assert "file_structure" in usage
376
+ assert "spatial_partition_resolution" in usage
377
+ assert "partition_pruning" in usage
378
+ assert "recommended_tools" in usage
379
+
380
+ def test_usage_includes_python_example(self):
381
+ """Test usage includes Python example code."""
382
+ config = MockProcessingConfig()
383
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
384
+ schema = generator.generate_catalog_schema({})
385
+
386
+ spatial_resolution = schema["usage"]["spatial_partition_resolution"]
387
+ assert "python_example" in spatial_resolution
388
+ assert "spatial_resolver" in spatial_resolution["python_example"]
389
+
390
+
391
+ class TestSchemaGeneratorHelperMethods:
392
+ """Tests for helper methods."""
393
+
394
+ @pytest.mark.parametrize(
395
+ "hive_parts,expected_result",
396
+ [
397
+ (["year=2024"], "2024"),
398
+ (["year=2024", "month=06"], "2024-06"),
399
+ (["year=2024", "month=06", "day=15"], "2024-06-15"),
400
+ ([], "unknown"),
401
+ ],
402
+ ids=["year", "month", "day", "empty"],
403
+ )
404
+ def test_hive_parts_to_temporal_bin(self, hive_parts, expected_result):
405
+ """Test converting Hive parts to temporal bin format."""
406
+ generator = SchemaGenerator(MockProcessingConfig(), create_mock_grid(), MockStorage())
407
+ result = generator._hive_parts_to_temporal_bin(hive_parts)
408
+ assert result == expected_result
409
+
410
+ def test_get_h3_average_area(self):
411
+ """Test H3 area lookup."""
412
+ generator = SchemaGenerator(MockProcessingConfig(), create_mock_grid(), MockStorage())
413
+ area = generator._get_h3_average_area(6)
414
+ assert area is not None
415
+ assert area == pytest.approx(36.129, rel=0.01)
416
+
417
+ def test_get_h3_average_edge_length(self):
418
+ """Test H3 edge length lookup."""
419
+ generator = SchemaGenerator(MockProcessingConfig(), create_mock_grid(), MockStorage())
420
+ edge = generator._get_h3_average_edge_length(6)
421
+ assert edge is not None
422
+ assert edge == pytest.approx(3.23, rel=0.01)
423
+
424
+ def test_get_s2_average_area(self):
425
+ """Test S2 area calculation."""
426
+ generator = SchemaGenerator(MockProcessingConfig(), create_mock_grid(), MockStorage())
427
+ area = generator._get_s2_average_area(10)
428
+ assert area is not None
429
+ assert area > 0
430
+
431
+ def test_get_grid_description(self):
432
+ """Test grid description for all systems."""
433
+ for grid_system in ["h3", "s2", "mgrs", "utm", "latlon", "itslive", "geojson"]:
434
+ config = MockProcessingConfig(grid_system=grid_system)
435
+ generator = SchemaGenerator(config, create_mock_grid(grid_system), MockStorage())
436
+ description = generator._get_grid_description()
437
+ assert isinstance(description, str)
438
+ assert len(description) > 0
439
+
440
+
441
+ class TestSchemaGeneratorWriting:
442
+ """Tests for schema file writing."""
443
+
444
+ def test_schema_written_to_storage(self):
445
+ """Test schema is written to storage backend."""
446
+ storage = MockStorage()
447
+ config = MockProcessingConfig(output_catalog="./test_catalog")
448
+ generator = SchemaGenerator(config, create_mock_grid(), storage)
449
+
450
+ generator.generate_catalog_schema({})
451
+
452
+ # Check that something was written
453
+ assert len(storage.written_files) > 0
454
+
455
+ def test_schema_custom_filename(self):
456
+ """Test schema can be written with custom filename."""
457
+ storage = MockStorage()
458
+ config = MockProcessingConfig(output_catalog="./test_catalog")
459
+ generator = SchemaGenerator(config, create_mock_grid(), storage)
460
+
461
+ generator.generate_catalog_schema({}, output_filename="custom_schema.json")
462
+
463
+ # Verify custom filename was used
464
+ written_paths = list(storage.written_files.keys())
465
+ assert any("custom_schema.json" in path for path in written_paths)
466
+
467
+
468
+ class TestSchemaGeneratorEdgeCases:
469
+ """Edge case tests for schema generation."""
470
+
471
+ def test_empty_partition_stats(self):
472
+ """Test schema generation with empty partition stats."""
473
+ config = MockProcessingConfig()
474
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
475
+ schema = generator.generate_catalog_schema({})
476
+
477
+ assert schema["partition_structure"]["total_partitions"] == 0
478
+
479
+ def test_none_temporal_binning_fallback(self):
480
+ """Test schema handles unusual temporal bin gracefully."""
481
+ # This tests the temporal_bin_description dict lookup
482
+ config = MockProcessingConfig(temporal_bin="month")
483
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
484
+ schema = generator.generate_catalog_schema({})
485
+
486
+ assert schema["temporal_partitioning"]["temporal_bin"] == "month"
487
+
488
+ def test_unknown_grid_system_description(self):
489
+ """Test description for unknown grid system."""
490
+ config = MockProcessingConfig(grid_system="unknown_grid")
491
+ generator = SchemaGenerator(config, create_mock_grid("unknown_grid"), MockStorage())
492
+ description = generator._get_grid_description()
493
+ assert "Unknown grid system" in description
494
+
495
+ def test_large_partition_stats(self):
496
+ """Test schema with many partitions."""
497
+ config = MockProcessingConfig()
498
+ generator = SchemaGenerator(config, create_mock_grid(), MockStorage())
499
+
500
+ # Create 100 partitions
501
+ partition_stats = {
502
+ f"mission/partition=h3/level=6/cell_{i}/year=2024/month=01": {"total_items": i * 10} for i in range(100)
503
+ }
504
+ schema = generator.generate_catalog_schema(partition_stats)
505
+
506
+ assert schema["partition_structure"]["total_partitions"] == 100