climate-ref-core 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref_core/cmip6_to_cmip7.py +598 -0
- climate_ref_core/dataset_registry.py +43 -0
- climate_ref_core/diagnostics.py +10 -0
- climate_ref_core/env.py +37 -0
- climate_ref_core/esgf/__init__.py +21 -0
- climate_ref_core/esgf/base.py +122 -0
- climate_ref_core/esgf/cmip6.py +119 -0
- climate_ref_core/esgf/fetcher.py +138 -0
- climate_ref_core/esgf/obs4mips.py +94 -0
- climate_ref_core/esgf/registry.py +307 -0
- climate_ref_core/exceptions.py +24 -0
- climate_ref_core/providers.py +143 -17
- climate_ref_core/testing.py +621 -0
- {climate_ref_core-0.8.0.dist-info → climate_ref_core-0.9.0.dist-info}/METADATA +6 -3
- climate_ref_core-0.9.0.dist-info/RECORD +32 -0
- climate_ref_core-0.8.0.dist-info/RECORD +0 -24
- {climate_ref_core-0.8.0.dist-info → climate_ref_core-0.9.0.dist-info}/WHEEL +0 -0
- {climate_ref_core-0.8.0.dist-info → climate_ref_core-0.9.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref_core-0.8.0.dist-info → climate_ref_core-0.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test infrastructure for diagnostic testing.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- TestCase and TestDataSpecification for defining test scenarios
|
|
6
|
+
- YAML serialization for dataset catalogs (with paths stored separately)
|
|
7
|
+
- RegressionValidator for validating pre-stored outputs
|
|
8
|
+
- Utilities for CMEC bundle validation
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import shutil
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import yaml
|
|
20
|
+
from attrs import field, frozen
|
|
21
|
+
from loguru import logger
|
|
22
|
+
|
|
23
|
+
from climate_ref_core.datasets import (
|
|
24
|
+
DatasetCollection,
|
|
25
|
+
ExecutionDatasetCollection,
|
|
26
|
+
Selector,
|
|
27
|
+
SourceDatasetType,
|
|
28
|
+
)
|
|
29
|
+
from climate_ref_core.diagnostics import ExecutionDefinition, ExecutionResult
|
|
30
|
+
from climate_ref_core.esgf.base import ESGFRequest
|
|
31
|
+
from climate_ref_core.pycmec.metric import CMECMetric
|
|
32
|
+
from climate_ref_core.pycmec.output import CMECOutput
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from _pytest.mark.structures import ParameterSet
|
|
36
|
+
|
|
37
|
+
from climate_ref_core.diagnostics import Diagnostic
|
|
38
|
+
from climate_ref_core.providers import DiagnosticProvider
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@frozen
|
|
42
|
+
class TestCase:
|
|
43
|
+
"""
|
|
44
|
+
A single test case for a diagnostic.
|
|
45
|
+
|
|
46
|
+
Test cases define scenarios for testing, with data resolved via:
|
|
47
|
+
- `requests`: ESGF requests to fetch data (use `ref test-cases fetch`)
|
|
48
|
+
- `datasets_file`: Path to a pre-built catalog YAML file
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
name: str
|
|
52
|
+
"""Name of the test case (e.g., 'default', 'short-timeseries')."""
|
|
53
|
+
|
|
54
|
+
description: str
|
|
55
|
+
"""Human-readable description of what this test case covers."""
|
|
56
|
+
|
|
57
|
+
requests: tuple[ESGFRequest, ...] | None = None
|
|
58
|
+
"""Optional ESGF requests to fetch data for this test case."""
|
|
59
|
+
|
|
60
|
+
datasets_file: str | None = None
|
|
61
|
+
"""Path to YAML file with dataset specification (relative to package)."""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@frozen
|
|
65
|
+
class TestDataSpecification:
|
|
66
|
+
"""
|
|
67
|
+
Test data specification for a diagnostic.
|
|
68
|
+
|
|
69
|
+
Contains multiple named test cases for testing different input datasets.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
test_cases: tuple[TestCase, ...] = field(factory=tuple)
|
|
73
|
+
"""Collection of test cases for this diagnostic."""
|
|
74
|
+
|
|
75
|
+
def get_case(self, name: str) -> TestCase:
|
|
76
|
+
"""
|
|
77
|
+
Get a test case by name.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
name
|
|
82
|
+
Name of the test case to retrieve
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
TestCase
|
|
87
|
+
The matching test case
|
|
88
|
+
|
|
89
|
+
Raises
|
|
90
|
+
------
|
|
91
|
+
StopIteration
|
|
92
|
+
If no test case with that name exists
|
|
93
|
+
"""
|
|
94
|
+
return next(tc for tc in self.test_cases if tc.name == name)
|
|
95
|
+
|
|
96
|
+
def has_case(self, name: str) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
Check if a test case with the given name exists.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
name
|
|
103
|
+
Name of the test case to check
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
bool
|
|
108
|
+
True if the test case exists
|
|
109
|
+
"""
|
|
110
|
+
return any(tc.name == name for tc in self.test_cases)
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def case_names(self) -> list[str]:
|
|
114
|
+
"""Get names of all test cases."""
|
|
115
|
+
return [tc.name for tc in self.test_cases]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@frozen
|
|
119
|
+
class TestCasePaths:
|
|
120
|
+
"""
|
|
121
|
+
Path resolver for test case data.
|
|
122
|
+
|
|
123
|
+
Provides access to all paths within a test case directory:
|
|
124
|
+
- catalog.yaml: Dataset metadata (tracked in git)
|
|
125
|
+
- catalog.paths.yaml: Local file paths (gitignored)
|
|
126
|
+
- regression/: Regression outputs (tracked in git)
|
|
127
|
+
|
|
128
|
+
Can be constructed from:
|
|
129
|
+
- A diagnostic + test case name (auto-resolves provider's test-data dir)
|
|
130
|
+
- An explicit test_data_dir + diagnostic slug + test case name
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
root: Path
|
|
134
|
+
"""The test case directory (test_data_dir / diagnostic_slug / test_case_name)."""
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_diagnostic(cls, diagnostic: Diagnostic, test_case: str) -> TestCasePaths | None:
|
|
138
|
+
"""
|
|
139
|
+
Create from a diagnostic, auto-resolving the provider's test-data directory.
|
|
140
|
+
|
|
141
|
+
Returns None if the provider's test-data directory cannot be determined
|
|
142
|
+
(e.g., not a development checkout).
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
diagnostic
|
|
147
|
+
The diagnostic to get paths for
|
|
148
|
+
test_case
|
|
149
|
+
Test case name (e.g., 'default')
|
|
150
|
+
"""
|
|
151
|
+
test_data_dir = _get_provider_test_data_dir(diagnostic)
|
|
152
|
+
if test_data_dir is None:
|
|
153
|
+
return None
|
|
154
|
+
return cls(root=test_data_dir / diagnostic.slug / test_case)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def from_test_data_dir(
|
|
158
|
+
cls,
|
|
159
|
+
test_data_dir: Path,
|
|
160
|
+
diagnostic_slug: str,
|
|
161
|
+
test_case: str,
|
|
162
|
+
) -> TestCasePaths:
|
|
163
|
+
"""
|
|
164
|
+
Create from an explicit test data directory.
|
|
165
|
+
|
|
166
|
+
Use this when you have a test_data_dir fixture (in tests) or
|
|
167
|
+
know the base path explicitly.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
test_data_dir
|
|
172
|
+
Base test data directory (e.g., from test fixture)
|
|
173
|
+
diagnostic_slug
|
|
174
|
+
The diagnostic slug
|
|
175
|
+
test_case
|
|
176
|
+
Test case name (e.g., 'default')
|
|
177
|
+
"""
|
|
178
|
+
return cls(root=test_data_dir / diagnostic_slug / test_case)
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def catalog(self) -> Path:
|
|
182
|
+
"""Path to catalog.yaml."""
|
|
183
|
+
return self.root / "catalog.yaml"
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def catalog_paths(self) -> Path:
|
|
187
|
+
"""Path to catalog.paths.yaml (gitignored, contains local file paths)."""
|
|
188
|
+
return self.root / "catalog.paths.yaml"
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def regression(self) -> Path:
|
|
192
|
+
"""Path to regression/ directory."""
|
|
193
|
+
return self.root / "regression"
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def regression_catalog_hash(self) -> Path:
|
|
197
|
+
"""Path to catalog hash file in regression directory."""
|
|
198
|
+
return self.regression / ".catalog_hash"
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def test_data_dir(self) -> Path:
|
|
202
|
+
"""Path to the test-data directory (parent of diagnostic slug dir)."""
|
|
203
|
+
return self.root.parent.parent
|
|
204
|
+
|
|
205
|
+
def exists(self) -> bool:
|
|
206
|
+
"""Check if the test case directory exists."""
|
|
207
|
+
return self.root.exists()
|
|
208
|
+
|
|
209
|
+
def create(self) -> None:
|
|
210
|
+
"""Create the test case directory if it doesn't exist."""
|
|
211
|
+
self.root.mkdir(parents=True, exist_ok=True)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _get_provider_test_data_dir(diag: Diagnostic) -> Path | None:
|
|
215
|
+
"""
|
|
216
|
+
Get the test-data directory for a provider's package.
|
|
217
|
+
|
|
218
|
+
Returns packages/climate-ref-{provider}/tests/test-data/ or None if unavailable.
|
|
219
|
+
This will only work if working in a development checkout of the package.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
diag
|
|
224
|
+
The diagnostic to get the test data dir for
|
|
225
|
+
"""
|
|
226
|
+
# TODO: Simplify once providers are in their own packages
|
|
227
|
+
|
|
228
|
+
# Use the diagnostic's module to determine the provider package
|
|
229
|
+
diagnostic_module_name = diag.__class__.__module__.split(".")[0]
|
|
230
|
+
logger.debug(f"Looking up test data dir for diagnostic module: {diagnostic_module_name}")
|
|
231
|
+
|
|
232
|
+
if diagnostic_module_name not in sys.modules:
|
|
233
|
+
logger.debug(f"Module {diagnostic_module_name} not in sys.modules")
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
diagnostic_module = sys.modules[diagnostic_module_name]
|
|
237
|
+
if not hasattr(diagnostic_module, "__file__") or diagnostic_module.__file__ is None:
|
|
238
|
+
logger.debug(f"Module {diagnostic_module_name} has no __file__ attribute")
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
# Module: packages/climate-ref-{slug}/src/climate_ref_{slug}/__init__.py
|
|
242
|
+
# Target: packages/climate-ref-{slug}/tests/test-data/
|
|
243
|
+
module_path = Path(diagnostic_module.__file__)
|
|
244
|
+
package_root = module_path.parent.parent.parent # src -> climate-ref-{slug}
|
|
245
|
+
tests_dir = package_root / "tests"
|
|
246
|
+
|
|
247
|
+
# Only return path if tests/ exists (dev checkout)
|
|
248
|
+
if not tests_dir.exists():
|
|
249
|
+
logger.debug(f"Tests dir does not exist (not a dev checkout): {tests_dir}")
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
test_data_dir = tests_dir / "test-data"
|
|
253
|
+
logger.debug(f"Diagnostic module path: {module_path}")
|
|
254
|
+
logger.debug(f"Derived test data dir: {test_data_dir} (exists: {test_data_dir.exists()})")
|
|
255
|
+
|
|
256
|
+
return test_data_dir
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _get_paths_file(catalog_path: Path) -> Path:
|
|
260
|
+
"""Get the paths file path for a catalog file."""
|
|
261
|
+
return catalog_path.with_suffix(".paths.yaml")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def load_datasets_from_yaml(path: Path) -> ExecutionDatasetCollection:
|
|
265
|
+
"""
|
|
266
|
+
Load ExecutionDatasetCollection from a YAML file.
|
|
267
|
+
|
|
268
|
+
The YAML file structure:
|
|
269
|
+
|
|
270
|
+
```yaml
|
|
271
|
+
cmip6:
|
|
272
|
+
slug_column: instance_id
|
|
273
|
+
selector:
|
|
274
|
+
source_id: ACCESS-ESM1-5
|
|
275
|
+
datasets:
|
|
276
|
+
- instance_id: CMIP6.CMIP...
|
|
277
|
+
variable_id: tas
|
|
278
|
+
filename: tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc
|
|
279
|
+
# ... other metadata
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Paths are loaded from a separate `.paths.yaml` file if it exists,
|
|
283
|
+
allowing the main catalog to be version-controlled while paths
|
|
284
|
+
remain user-specific. Multi-file datasets have multiple rows with
|
|
285
|
+
paths keyed by `{instance_id}::{filename}`.
|
|
286
|
+
"""
|
|
287
|
+
with open(path) as f:
|
|
288
|
+
data = yaml.safe_load(f)
|
|
289
|
+
|
|
290
|
+
# Load paths from separate file if it exists
|
|
291
|
+
paths_file = _get_paths_file(path)
|
|
292
|
+
paths_map: dict[str, str] = {}
|
|
293
|
+
if paths_file.exists():
|
|
294
|
+
with open(paths_file) as f:
|
|
295
|
+
paths_map = yaml.safe_load(f) or {}
|
|
296
|
+
|
|
297
|
+
collections: dict[SourceDatasetType | str, DatasetCollection] = {}
|
|
298
|
+
|
|
299
|
+
for source_type_str, source_data in data.items():
|
|
300
|
+
if source_type_str == "_metadata":
|
|
301
|
+
continue # Skip metadata section
|
|
302
|
+
source_type = SourceDatasetType(source_type_str)
|
|
303
|
+
selector_dict = source_data.get("selector", {})
|
|
304
|
+
selector: Selector = tuple(sorted(selector_dict.items()))
|
|
305
|
+
datasets_list = source_data.get("datasets", [])
|
|
306
|
+
slug_column = source_data.get("slug_column", "instance_id")
|
|
307
|
+
|
|
308
|
+
# Merge paths from paths file using composite key
|
|
309
|
+
for dataset in datasets_list:
|
|
310
|
+
instance_id = dataset.get(slug_column)
|
|
311
|
+
filename = dataset.get("filename")
|
|
312
|
+
if instance_id and filename:
|
|
313
|
+
# Try composite key first (new format for multi-file datasets)
|
|
314
|
+
composite_key = f"{instance_id}::{filename}"
|
|
315
|
+
if composite_key in paths_map:
|
|
316
|
+
dataset["path"] = paths_map[composite_key]
|
|
317
|
+
elif instance_id in paths_map:
|
|
318
|
+
# Fall back to simple key for backward compatibility
|
|
319
|
+
dataset["path"] = paths_map[instance_id]
|
|
320
|
+
elif instance_id and instance_id in paths_map:
|
|
321
|
+
# Legacy format without filename
|
|
322
|
+
dataset["path"] = paths_map[instance_id]
|
|
323
|
+
|
|
324
|
+
collections[source_type] = DatasetCollection(
|
|
325
|
+
datasets=pd.DataFrame(datasets_list),
|
|
326
|
+
slug_column=slug_column,
|
|
327
|
+
selector=selector,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
return ExecutionDatasetCollection(collections)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def get_catalog_hash(path: Path) -> str | None:
|
|
334
|
+
"""
|
|
335
|
+
Get the hash stored in an existing catalog file.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
path
|
|
340
|
+
Path to the catalog YAML file
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
:
|
|
345
|
+
The hash string if found, None if file doesn't exist or has no hash
|
|
346
|
+
"""
|
|
347
|
+
if not path.exists():
|
|
348
|
+
return None
|
|
349
|
+
with open(path) as f:
|
|
350
|
+
data = yaml.safe_load(f)
|
|
351
|
+
if data is None:
|
|
352
|
+
return None
|
|
353
|
+
hash_value = data.get("_metadata", {}).get("hash")
|
|
354
|
+
return str(hash_value) if hash_value is not None else None
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def catalog_changed_since_regression(paths: TestCasePaths) -> bool:
|
|
358
|
+
"""
|
|
359
|
+
Check if the catalog has changed since regression data was generated.
|
|
360
|
+
|
|
361
|
+
Returns True if:
|
|
362
|
+
- No regression data exists (new test case)
|
|
363
|
+
- No stored catalog hash exists (legacy regression data)
|
|
364
|
+
- The catalog hash differs from the stored one
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
paths
|
|
369
|
+
TestCasePaths for the test case
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
:
|
|
374
|
+
True if regression should be regenerated, False otherwise
|
|
375
|
+
"""
|
|
376
|
+
if not paths.regression.exists():
|
|
377
|
+
return True # No regression data, needs to run
|
|
378
|
+
if not paths.regression_catalog_hash.exists():
|
|
379
|
+
return True # No stored hash, needs to run
|
|
380
|
+
if not paths.catalog.exists():
|
|
381
|
+
return True # No catalog file, needs to run
|
|
382
|
+
|
|
383
|
+
stored_hash = paths.regression_catalog_hash.read_text().strip()
|
|
384
|
+
current_hash = get_catalog_hash(paths.catalog)
|
|
385
|
+
|
|
386
|
+
return stored_hash != current_hash
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def save_datasets_to_yaml(
|
|
390
|
+
datasets: ExecutionDatasetCollection,
|
|
391
|
+
path: Path,
|
|
392
|
+
*,
|
|
393
|
+
force: bool = False,
|
|
394
|
+
) -> bool:
|
|
395
|
+
"""
|
|
396
|
+
Save ExecutionDatasetCollection to a YAML file.
|
|
397
|
+
|
|
398
|
+
Paths are saved to a separate `.paths.yaml` file to allow the main
|
|
399
|
+
catalog to be version-controlled while paths remain user-specific.
|
|
400
|
+
|
|
401
|
+
Multi-file datasets (e.g., time-chunked data) are stored as multiple rows,
|
|
402
|
+
one per file. Paths are keyed by `{instance_id}::{filename}` to support
|
|
403
|
+
multiple files per dataset.
|
|
404
|
+
|
|
405
|
+
By default, the catalog is only written if the content has changed
|
|
406
|
+
(detected via hash comparison). Use `force=True` to always write.
|
|
407
|
+
|
|
408
|
+
Parameters
|
|
409
|
+
----------
|
|
410
|
+
datasets
|
|
411
|
+
The datasets to save
|
|
412
|
+
path
|
|
413
|
+
Path to write the YAML file
|
|
414
|
+
force
|
|
415
|
+
If True, always write the catalog even if unchanged
|
|
416
|
+
|
|
417
|
+
Returns
|
|
418
|
+
-------
|
|
419
|
+
:
|
|
420
|
+
True if the catalog was written, False if skipped (unchanged)
|
|
421
|
+
"""
|
|
422
|
+
# Compute the hash first to check if we need to write
|
|
423
|
+
new_hash = datasets.hash
|
|
424
|
+
|
|
425
|
+
if not force:
|
|
426
|
+
existing_hash = get_catalog_hash(path)
|
|
427
|
+
if existing_hash == new_hash:
|
|
428
|
+
logger.info(f"Catalog unchanged, skipping write: {path}")
|
|
429
|
+
return False
|
|
430
|
+
|
|
431
|
+
data: dict[str, Any] = {
|
|
432
|
+
"_metadata": {"hash": new_hash},
|
|
433
|
+
}
|
|
434
|
+
paths_map: dict[str, str] = {}
|
|
435
|
+
|
|
436
|
+
for source_type, collection in datasets.items():
|
|
437
|
+
slug_column = collection.slug_column
|
|
438
|
+
datasets_records = collection.datasets.to_dict(orient="records")
|
|
439
|
+
|
|
440
|
+
# Extract paths to separate map, keeping all rows (including multi-file datasets)
|
|
441
|
+
filtered_records = []
|
|
442
|
+
for record in datasets_records:
|
|
443
|
+
instance_id = record.get(slug_column)
|
|
444
|
+
if instance_id and "path" in record: # pragma: no branch
|
|
445
|
+
file_path = record.pop("path")
|
|
446
|
+
filename = Path(file_path).name
|
|
447
|
+
# Store filename in record for matching when loading
|
|
448
|
+
record["filename"] = filename
|
|
449
|
+
# Use composite key to support multiple files per instance_id
|
|
450
|
+
paths_map[f"{instance_id}::{filename}"] = file_path
|
|
451
|
+
# Sort fields within each record alphabetically
|
|
452
|
+
sorted_record = dict(sorted(record.items()))
|
|
453
|
+
filtered_records.append(sorted_record)
|
|
454
|
+
|
|
455
|
+
# Sort records by instance_id, then by filename for stability
|
|
456
|
+
filtered_records.sort(key=lambda r: (r.get(slug_column, ""), r.get("filename", "")))
|
|
457
|
+
|
|
458
|
+
data[source_type.value] = {
|
|
459
|
+
"slug_column": slug_column,
|
|
460
|
+
"selector": dict(collection.selector),
|
|
461
|
+
"datasets": filtered_records,
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
465
|
+
|
|
466
|
+
with open(path, "w") as f:
|
|
467
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
|
468
|
+
|
|
469
|
+
paths_file = _get_paths_file(path)
|
|
470
|
+
with open(paths_file, "w") as f:
|
|
471
|
+
yaml.dump(paths_map, f, default_flow_style=False, sort_keys=False)
|
|
472
|
+
logger.info(f"Saved catalog to {path} (paths: {paths_file})")
|
|
473
|
+
return True
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def validate_cmec_bundles(diagnostic: Diagnostic, result: ExecutionResult) -> None:
|
|
477
|
+
"""
|
|
478
|
+
Validate CMEC bundles in an execution result.
|
|
479
|
+
|
|
480
|
+
Performs structural validation of the metric and output bundles.
|
|
481
|
+
|
|
482
|
+
Raises
|
|
483
|
+
------
|
|
484
|
+
AssertionError
|
|
485
|
+
If the result is not successful or bundles are invalid
|
|
486
|
+
"""
|
|
487
|
+
assert result.successful, f"Execution failed: {result}"
|
|
488
|
+
|
|
489
|
+
# Validate metric bundle
|
|
490
|
+
metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
|
|
491
|
+
CMECMetric.model_validate(metric_bundle)
|
|
492
|
+
|
|
493
|
+
# Check dimensions match diagnostic facets
|
|
494
|
+
bundle_dimensions = tuple(metric_bundle.DIMENSIONS.root["json_structure"])
|
|
495
|
+
assert diagnostic.facets == bundle_dimensions, (
|
|
496
|
+
f"Bundle dimensions {bundle_dimensions} don't match diagnostic facets {diagnostic.facets}"
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Validate output bundle
|
|
500
|
+
CMECOutput.load_from_json(result.to_output_path(result.output_bundle_filename))
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
@frozen
|
|
504
|
+
class RegressionValidator:
|
|
505
|
+
"""
|
|
506
|
+
Validate diagnostic outputs from pre-stored regression data.
|
|
507
|
+
|
|
508
|
+
Loads regression outputs and validates CMEC bundles without
|
|
509
|
+
running the diagnostic. Suitable for fast CI validation.
|
|
510
|
+
|
|
511
|
+
The regression data is expected at:
|
|
512
|
+
test_data_dir/{diagnostic}/{test_case}/regression/
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
diagnostic: Diagnostic
|
|
516
|
+
test_case_name: str
|
|
517
|
+
test_data_dir: Path
|
|
518
|
+
|
|
519
|
+
@property
|
|
520
|
+
def paths(self) -> TestCasePaths:
|
|
521
|
+
"""Get paths for this test case."""
|
|
522
|
+
return TestCasePaths.from_test_data_dir(self.test_data_dir, self.diagnostic.slug, self.test_case_name)
|
|
523
|
+
|
|
524
|
+
def has_regression_data(self) -> bool:
|
|
525
|
+
"""Check if regression data exists for this test case."""
|
|
526
|
+
regression_path = self.paths.regression
|
|
527
|
+
return regression_path.exists() and (regression_path / "diagnostic.json").exists()
|
|
528
|
+
|
|
529
|
+
def load_regression_definition(self, tmp_dir: Path) -> ExecutionDefinition:
|
|
530
|
+
"""
|
|
531
|
+
Load regression data and create an ExecutionDefinition.
|
|
532
|
+
|
|
533
|
+
Copies regression data to tmp_dir and replaces path placeholders.
|
|
534
|
+
"""
|
|
535
|
+
regression_path = self.paths.regression
|
|
536
|
+
catalog_path = self.paths.catalog
|
|
537
|
+
|
|
538
|
+
if not catalog_path.exists():
|
|
539
|
+
raise FileNotFoundError(
|
|
540
|
+
f"No catalog file at {catalog_path} for test case datasets. Run `ref test-cases fetch` first."
|
|
541
|
+
)
|
|
542
|
+
if not regression_path.exists():
|
|
543
|
+
raise FileNotFoundError(
|
|
544
|
+
f"No regression data at {regression_path}. Run 'ref test-cases run --force-regen' first."
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
output_dir = tmp_dir / "output"
|
|
548
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
549
|
+
shutil.copytree(regression_path, output_dir, dirs_exist_ok=True)
|
|
550
|
+
|
|
551
|
+
# Replace placeholders with actual paths
|
|
552
|
+
for pattern in ("*.json", "*.txt", "*.yaml", "*.yml"):
|
|
553
|
+
for file in output_dir.rglob(pattern):
|
|
554
|
+
content = file.read_text()
|
|
555
|
+
content = content.replace("<OUTPUT_DIR>", str(output_dir))
|
|
556
|
+
content = content.replace("<TEST_DATA_DIR>", str(self.test_data_dir))
|
|
557
|
+
file.write_text(content)
|
|
558
|
+
|
|
559
|
+
# Load datasets from catalog
|
|
560
|
+
datasets: ExecutionDatasetCollection = load_datasets_from_yaml(catalog_path)
|
|
561
|
+
|
|
562
|
+
return ExecutionDefinition(
|
|
563
|
+
diagnostic=self.diagnostic,
|
|
564
|
+
key=f"test-{self.test_case_name}",
|
|
565
|
+
datasets=datasets,
|
|
566
|
+
output_directory=output_dir,
|
|
567
|
+
root_directory=tmp_dir,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
def validate(self, definition: ExecutionDefinition) -> None:
|
|
571
|
+
"""Validate CMEC bundles in the regression output."""
|
|
572
|
+
result = self.diagnostic.build_execution_result(definition)
|
|
573
|
+
result.to_output_path("out.log").touch() # Log file not tracked in regression
|
|
574
|
+
validate_cmec_bundles(self.diagnostic, result)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def collect_test_case_params(provider: DiagnosticProvider) -> list[ParameterSet]:
|
|
578
|
+
"""
|
|
579
|
+
Collect all diagnostic/test_case pairs from a provider for parameterized testing.
|
|
580
|
+
|
|
581
|
+
Returns a list of pytest.param objects with (diagnostic, test_case_name) tuples,
|
|
582
|
+
each with an id of "{diagnostic.slug}/{test_case.name}".
|
|
583
|
+
|
|
584
|
+
Parameters
|
|
585
|
+
----------
|
|
586
|
+
provider
|
|
587
|
+
The diagnostic provider to collect test cases from
|
|
588
|
+
|
|
589
|
+
Returns
|
|
590
|
+
-------
|
|
591
|
+
:
|
|
592
|
+
List of pytest.param objects for use with @pytest.mark.parametrize
|
|
593
|
+
|
|
594
|
+
Example
|
|
595
|
+
-------
|
|
596
|
+
```python
|
|
597
|
+
from climate_ref_core.testing import collect_test_case_params
|
|
598
|
+
from my_provider import provider
|
|
599
|
+
|
|
600
|
+
test_case_params = collect_test_case_params(provider)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
@pytest.mark.parametrize("diagnostic,test_case_name", test_case_params)
|
|
604
|
+
def test_my_test(diagnostic, test_case_name): ...
|
|
605
|
+
```
|
|
606
|
+
"""
|
|
607
|
+
import pytest # noqa: PLC0415
|
|
608
|
+
|
|
609
|
+
params: list[ParameterSet] = []
|
|
610
|
+
for diagnostic in provider.diagnostics():
|
|
611
|
+
if diagnostic.test_data_spec is None:
|
|
612
|
+
continue
|
|
613
|
+
for test_case in diagnostic.test_data_spec.test_cases:
|
|
614
|
+
params.append(
|
|
615
|
+
pytest.param(
|
|
616
|
+
diagnostic,
|
|
617
|
+
test_case.name,
|
|
618
|
+
id=f"{diagnostic.slug}/{test_case.name}",
|
|
619
|
+
)
|
|
620
|
+
)
|
|
621
|
+
return params
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climate-ref-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Core library for the CMIP Rapid Evaluation Framework
|
|
5
5
|
Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -21,10 +21,13 @@ Requires-Python: >=3.11
|
|
|
21
21
|
Requires-Dist: attrs>=23.2.0
|
|
22
22
|
Requires-Dist: cattrs>=24.1
|
|
23
23
|
Requires-Dist: environs>=11
|
|
24
|
+
Requires-Dist: fastprogress==1.0.5
|
|
25
|
+
Requires-Dist: intake-esgf>=2025.7.16
|
|
24
26
|
Requires-Dist: loguru>=0.7.0
|
|
25
|
-
Requires-Dist: numpy>=
|
|
26
|
-
Requires-Dist: pandas
|
|
27
|
+
Requires-Dist: numpy>=2.0.0
|
|
28
|
+
Requires-Dist: pandas<3,>=2.1.0
|
|
27
29
|
Requires-Dist: pooch<2,>=1.8.0
|
|
30
|
+
Requires-Dist: pyarrow>=17.0.0
|
|
28
31
|
Requires-Dist: pydantic>=2.10.6
|
|
29
32
|
Requires-Dist: pyyaml>=6.0.2
|
|
30
33
|
Requires-Dist: requests
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
climate_ref_core/__init__.py,sha256=MtmPThF2F9_2UODEN6rt1x30LDxrHIZ0wyRN_wsHx5I,127
|
|
2
|
+
climate_ref_core/cmip6_to_cmip7.py,sha256=XQYhGN7Q0EVFADrdh-zu8pOwh9q8cmi_ef_FB2IBg2s,19522
|
|
3
|
+
climate_ref_core/constraints.py,sha256=OlSpXwLRENS4-2LKOhDq-uZ7QznymMwKhtL4Lf6uhso,17761
|
|
4
|
+
climate_ref_core/dataset_registry.py,sha256=9RB7VhYBTlwlJYHAah5jHXw11HxqKzb6SPkPgztBLM8,8358
|
|
5
|
+
climate_ref_core/datasets.py,sha256=cx1L-CDf8uv7_MYtnhx3xu2oYKVeZTK72nLKw0ZImL0,6472
|
|
6
|
+
climate_ref_core/diagnostics.py,sha256=OttDlwn1oU0xQXTgw3kqPIoicXaoudIb7UV5C5cjJ_g,20076
|
|
7
|
+
climate_ref_core/env.py,sha256=4aarSN3NzolFejZAkAk5Kzh8eeNVtKayuDs7aoQpTE4,1953
|
|
8
|
+
climate_ref_core/exceptions.py,sha256=sV_jK66BbmowqXbslVFOVklk8YQtGp0cmci1Ba5eHaw,2354
|
|
9
|
+
climate_ref_core/executor.py,sha256=9mKVkm0S7ikub3_FP7CrgdC4Qj9ynOi0r_DIfzCDS-0,5459
|
|
10
|
+
climate_ref_core/logging.py,sha256=xO0j7OKkuO9JoMtMTnMc62yLO2mJZmhQKAvj-CojblI,7396
|
|
11
|
+
climate_ref_core/providers.py,sha256=g7U399XINhXSC7fOKFwLfcpC1qjEGNrSC6y1TucZhGU,19689
|
|
12
|
+
climate_ref_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
climate_ref_core/testing.py,sha256=TXZSktHFCCTntQ66YzDNiL8IrHHmNlTnxxgfQcX_b0E,20487
|
|
14
|
+
climate_ref_core/esgf/__init__.py,sha256=IK20fJ2hTvDIvfvmL9BqtJmk5dkHOcm6XB51_PI9_F8,600
|
|
15
|
+
climate_ref_core/esgf/base.py,sha256=TrPeUZ54TEfwBkvyQekoNw1yFGANY7LsKHsLy1WRGQ8,3577
|
|
16
|
+
climate_ref_core/esgf/cmip6.py,sha256=EeW6FK8jTjiYdq57TK2AcjdwnE59X-ZhohnJD4-FKTc,2989
|
|
17
|
+
climate_ref_core/esgf/fetcher.py,sha256=dm00nWkc9FXoc6QCgewOtzPAUvQzCLLwSbFr9xcHpi0,4123
|
|
18
|
+
climate_ref_core/esgf/obs4mips.py,sha256=nvnKkSkVRj55WgLcQ5GubqSdyHgh007VsrfcyYNYEIs,2417
|
|
19
|
+
climate_ref_core/esgf/registry.py,sha256=l87DfVjBrrBoAcsb4fvV3ZATzP0WifcuZQr_2m9GhPQ,10032
|
|
20
|
+
climate_ref_core/metric_values/__init__.py,sha256=aHfwRrqzLOmmaBKf1-4q97DnHb8KwmW0Dhwd79ZQiNQ,634
|
|
21
|
+
climate_ref_core/metric_values/typing.py,sha256=n4I3kcEBYPhoWL8aPRCBAHjwNLUrDz40c289iLsOMRM,4677
|
|
22
|
+
climate_ref_core/pycmec/README.md,sha256=PzkovlPpsXqFopsYzz5GRvCAipNRGO1Wo-0gc17qr2Y,36
|
|
23
|
+
climate_ref_core/pycmec/__init__.py,sha256=hXvKGEJQWyAp1i-ndr3D4zuYxkRhcR2LfXgFXlhYOk4,28
|
|
24
|
+
climate_ref_core/pycmec/controlled_vocabulary.py,sha256=kgMEvQ1P6EwXC7sFgdC77IQDo8I0DnnQ2CPXXQaavjE,5944
|
|
25
|
+
climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=gx5QyW88pZQVUfiYXmsJtJO6AJg6NbIZgdU4vDIa3fE,4390
|
|
26
|
+
climate_ref_core/pycmec/metric.py,sha256=k6kB_tvXbaixmTPGiUVZyCn5kvyoKwzHEMk7GCNNY4M,18742
|
|
27
|
+
climate_ref_core/pycmec/output.py,sha256=Il4j6sjGrChBioiQS7lQ_CJmofT1BEesmSZbuZDnXN8,6102
|
|
28
|
+
climate_ref_core-0.9.0.dist-info/METADATA,sha256=Fqxsrl0gjA9LsnFMZ9BhtpxYQTIRTpv_cX6D5VhN_yg,3031
|
|
29
|
+
climate_ref_core-0.9.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
30
|
+
climate_ref_core-0.9.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
31
|
+
climate_ref_core-0.9.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
|
|
32
|
+
climate_ref_core-0.9.0.dist-info/RECORD,,
|