climate-ref-core 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref_core/cmip6_to_cmip7.py +598 -0
- climate_ref_core/dataset_registry.py +43 -0
- climate_ref_core/diagnostics.py +10 -0
- climate_ref_core/env.py +37 -0
- climate_ref_core/esgf/__init__.py +21 -0
- climate_ref_core/esgf/base.py +122 -0
- climate_ref_core/esgf/cmip6.py +119 -0
- climate_ref_core/esgf/fetcher.py +138 -0
- climate_ref_core/esgf/obs4mips.py +94 -0
- climate_ref_core/esgf/registry.py +307 -0
- climate_ref_core/exceptions.py +24 -0
- climate_ref_core/providers.py +143 -17
- climate_ref_core/testing.py +621 -0
- {climate_ref_core-0.8.1.dist-info → climate_ref_core-0.9.0.dist-info}/METADATA +4 -2
- climate_ref_core-0.9.0.dist-info/RECORD +32 -0
- climate_ref_core-0.8.1.dist-info/RECORD +0 -24
- {climate_ref_core-0.8.1.dist-info → climate_ref_core-0.9.0.dist-info}/WHEEL +0 -0
- {climate_ref_core-0.8.1.dist-info → climate_ref_core-0.9.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref_core-0.8.1.dist-info → climate_ref_core-0.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CMIP6 to CMIP7 format converter.
|
|
3
|
+
|
|
4
|
+
This module provides utilities to convert CMIP6 xarray datasets to CMIP7 format,
|
|
5
|
+
following the CMIP7 Global Attributes V1.0 specification (DOI: 10.5281/zenodo.17250297).
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Key differences between CMIP6 and CMIP7
|
|
9
|
+
---------------------------------------
|
|
10
|
+
- Variable naming: CMIP7 uses branded names like `tas_tavg-h2m-hxy-u` instead of `tas`
|
|
11
|
+
- Branding suffix: `<temporal>-<vertical>-<horizontal>-<area>` labels (e.g., `tavg-h2m-hxy-u`)
|
|
12
|
+
- Variant indices: Changed from integers to prefixed strings (1 -> "r1", "i1", "p1", "f1")
|
|
13
|
+
- New mandatory attributes: license_id
|
|
14
|
+
- table_id: Uses realm names instead of CMOR table names (atmos vs Amon)
|
|
15
|
+
- Directory structure: MIP-DRS7 specification
|
|
16
|
+
- Filename format: Includes branding suffix, region, and grid_label
|
|
17
|
+
- Removed CMIP6 attributes: further_info_url, grid, member_id, sub_experiment, sub_experiment_id
|
|
18
|
+
|
|
19
|
+
References
|
|
20
|
+
----------
|
|
21
|
+
- CMIP7 Global Attributes V1.0: https://doi.org/10.5281/zenodo.17250297
|
|
22
|
+
- CMIP7 CVs: https://github.com/WCRP-CMIP/CMIP7_CVs
|
|
23
|
+
- CMIP7 Guidance: https://wcrp-cmip.github.io/cmip7-guidance/
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
import uuid
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
from typing import TYPE_CHECKING, Any
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
import xarray as xr
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# CMIP6 table_id to CMIP7 realm mapping
|
|
39
|
+
TABLE_TO_REALM = {
|
|
40
|
+
"Amon": "atmos",
|
|
41
|
+
"Omon": "ocean",
|
|
42
|
+
"Lmon": "land",
|
|
43
|
+
"LImon": "landIce",
|
|
44
|
+
"SImon": "seaIce",
|
|
45
|
+
"AERmon": "aerosol",
|
|
46
|
+
"Oday": "ocean",
|
|
47
|
+
"day": "atmos",
|
|
48
|
+
"Aday": "atmos",
|
|
49
|
+
"Eday": "atmos",
|
|
50
|
+
"CFday": "atmos",
|
|
51
|
+
"3hr": "atmos",
|
|
52
|
+
"6hrLev": "atmos",
|
|
53
|
+
"6hrPlev": "atmos",
|
|
54
|
+
"6hrPlevPt": "atmos",
|
|
55
|
+
"fx": "atmos", # Fixed fields default to atmos
|
|
56
|
+
"Ofx": "ocean",
|
|
57
|
+
"Efx": "atmos",
|
|
58
|
+
"Lfx": "land",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# CMIP6 frequency values (table_id prefix patterns)
|
|
62
|
+
FREQUENCY_MAP = {
|
|
63
|
+
"mon": "mon",
|
|
64
|
+
"day": "day",
|
|
65
|
+
"3hr": "3hr",
|
|
66
|
+
"6hr": "6hr",
|
|
67
|
+
"1hr": "1hr",
|
|
68
|
+
"yr": "yr",
|
|
69
|
+
"fx": "fx",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# CMIP6-only attributes that should be removed when converting to CMIP7
|
|
73
|
+
# These are not part of the CMIP7 Global Attributes specification (V1.0)
|
|
74
|
+
# These may be included in output, but they won't be checked
|
|
75
|
+
CMIP6_ONLY_ATTRIBUTES = {
|
|
76
|
+
"further_info_url", # CMIP6-specific URL format, replaced by different mechanism in CMIP7
|
|
77
|
+
"grid", # Replaced by grid_label in CMIP7
|
|
78
|
+
"member_id", # Redundant with variant_label, not in CMIP7 spec
|
|
79
|
+
"sub_experiment", # Not in CMIP7 spec
|
|
80
|
+
"sub_experiment_id", # Not in CMIP7 spec
|
|
81
|
+
"table_id", # Not in CMIP7 spec
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class BrandingSuffix:
|
|
87
|
+
"""
|
|
88
|
+
CMIP7 branding suffix components.
|
|
89
|
+
|
|
90
|
+
Format: <temporal_label>-<vertical_label>-<horizontal_label>-<area_label>
|
|
91
|
+
Example: tavg-h2m-hxy-u
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
temporal_label: str = "tavg" # tavg, tpt, tmax, tmin, tsum, tclm, ti
|
|
95
|
+
vertical_label: str = "u" # h2m, h10m, u (unspecified), p19, etc.
|
|
96
|
+
horizontal_label: str = "hxy" # hxy (gridded), hm (mean), hy (zonal), etc.
|
|
97
|
+
area_label: str = "u" # u (unmasked), lnd, sea, si, etc.
|
|
98
|
+
|
|
99
|
+
def __str__(self) -> str:
|
|
100
|
+
return f"{self.temporal_label}-{self.vertical_label}-{self.horizontal_label}-{self.area_label}"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Common variable to branding suffix mappings
|
|
104
|
+
# These are based on typical CMIP6 variable definitions
|
|
105
|
+
VARIABLE_BRANDING: dict[str, BrandingSuffix] = {
|
|
106
|
+
# Atmosphere 2D variables
|
|
107
|
+
"tas": BrandingSuffix("tavg", "h2m", "hxy", "u"),
|
|
108
|
+
"tasmax": BrandingSuffix("tmax", "h2m", "hxy", "u"),
|
|
109
|
+
"tasmin": BrandingSuffix("tmin", "h2m", "hxy", "u"),
|
|
110
|
+
"pr": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
111
|
+
"psl": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
112
|
+
"ps": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
113
|
+
"uas": BrandingSuffix("tavg", "h10m", "hxy", "u"),
|
|
114
|
+
"vas": BrandingSuffix("tavg", "h10m", "hxy", "u"),
|
|
115
|
+
"sfcWind": BrandingSuffix("tavg", "h10m", "hxy", "u"),
|
|
116
|
+
"hurs": BrandingSuffix("tavg", "h2m", "hxy", "u"),
|
|
117
|
+
"huss": BrandingSuffix("tavg", "h2m", "hxy", "u"),
|
|
118
|
+
"clt": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
119
|
+
"rsds": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
120
|
+
"rsus": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
121
|
+
"rlds": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
122
|
+
"rlus": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
123
|
+
"rsdt": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
124
|
+
"rsut": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
125
|
+
"rlut": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
126
|
+
"evspsbl": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
127
|
+
"tauu": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
128
|
+
"tauv": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
129
|
+
# Ocean 2D variables
|
|
130
|
+
"tos": BrandingSuffix("tavg", "d0m", "hxy", "sea"),
|
|
131
|
+
"sos": BrandingSuffix("tavg", "d0m", "hxy", "sea"),
|
|
132
|
+
"zos": BrandingSuffix("tavg", "u", "hxy", "sea"),
|
|
133
|
+
"mlotst": BrandingSuffix("tavg", "u", "hxy", "sea"),
|
|
134
|
+
# Sea ice variables
|
|
135
|
+
"siconc": BrandingSuffix("tavg", "u", "hxy", "u"),
|
|
136
|
+
"sithick": BrandingSuffix("tavg", "u", "hxy", "si"),
|
|
137
|
+
"sisnthick": BrandingSuffix("tavg", "u", "hxy", "si"),
|
|
138
|
+
# Land variables
|
|
139
|
+
"mrso": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
140
|
+
"mrsos": BrandingSuffix("tavg", "d10cm", "hxy", "lnd"),
|
|
141
|
+
"mrro": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
142
|
+
"snw": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
143
|
+
"lai": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
144
|
+
"gpp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
145
|
+
"npp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
146
|
+
"nbp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
147
|
+
"cVeg": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
148
|
+
"cSoil": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
149
|
+
"treeFrac": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
150
|
+
"vegFrac": BrandingSuffix("tavg", "u", "hxy", "lnd"),
|
|
151
|
+
# Fixed fields
|
|
152
|
+
"areacella": BrandingSuffix("ti", "u", "hxy", "u"),
|
|
153
|
+
"areacello": BrandingSuffix("ti", "u", "hxy", "u"),
|
|
154
|
+
"sftlf": BrandingSuffix("ti", "u", "hxy", "u"),
|
|
155
|
+
"sftof": BrandingSuffix("ti", "u", "hxy", "u"),
|
|
156
|
+
"orog": BrandingSuffix("ti", "u", "hxy", "u"),
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_branding_suffix(variable_id: str, cell_methods: str | None = None) -> BrandingSuffix:
|
|
161
|
+
"""
|
|
162
|
+
Determine the CMIP7 branding suffix for a variable.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
variable_id
|
|
167
|
+
The CMIP6 variable ID (e.g., "tas", "pr")
|
|
168
|
+
cell_methods
|
|
169
|
+
Optional cell_methods attribute to help determine temporal/spatial operations
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
BrandingSuffix
|
|
174
|
+
The branding suffix components
|
|
175
|
+
"""
|
|
176
|
+
# Use predefined mapping if available
|
|
177
|
+
if variable_id in VARIABLE_BRANDING:
|
|
178
|
+
return VARIABLE_BRANDING[variable_id]
|
|
179
|
+
|
|
180
|
+
# Fallback: infer from variable name patterns
|
|
181
|
+
suffix = BrandingSuffix()
|
|
182
|
+
|
|
183
|
+
# Check for max/min in variable name
|
|
184
|
+
if variable_id.endswith("max") or (cell_methods and "maximum" in cell_methods):
|
|
185
|
+
suffix = BrandingSuffix(temporal_label="tmax")
|
|
186
|
+
elif variable_id.endswith("min") or (cell_methods and "minimum" in cell_methods):
|
|
187
|
+
suffix = BrandingSuffix(temporal_label="tmin")
|
|
188
|
+
|
|
189
|
+
return suffix
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_cmip7_variable_name(variable_id: str, branding: BrandingSuffix | None = None) -> str:
|
|
193
|
+
"""
|
|
194
|
+
Convert a CMIP6 variable name to CMIP7 branded format.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
variable_id
|
|
199
|
+
The CMIP6 variable ID (e.g., "tas")
|
|
200
|
+
branding
|
|
201
|
+
Optional branding suffix; if None, determined automatically
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
str
|
|
206
|
+
The CMIP7 variable name (e.g., "tas_tavg-h2m-hxy-u")
|
|
207
|
+
"""
|
|
208
|
+
if branding is None:
|
|
209
|
+
branding = get_branding_suffix(variable_id)
|
|
210
|
+
return f"{variable_id}_{branding}"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def get_frequency_from_table(table_id: str) -> str: # noqa: PLR0911
|
|
214
|
+
"""
|
|
215
|
+
Extract frequency from CMIP6 table_id.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
table_id
|
|
220
|
+
CMIP6 table identifier (e.g., "Amon", "Oday", "3hr")
|
|
221
|
+
|
|
222
|
+
Returns
|
|
223
|
+
-------
|
|
224
|
+
str
|
|
225
|
+
Frequency string (e.g., "mon", "day", "3hr")
|
|
226
|
+
"""
|
|
227
|
+
# Check common patterns
|
|
228
|
+
if "mon" in table_id.lower():
|
|
229
|
+
return "mon"
|
|
230
|
+
elif "day" in table_id.lower():
|
|
231
|
+
return "day"
|
|
232
|
+
elif "yr" in table_id.lower():
|
|
233
|
+
return "yr"
|
|
234
|
+
elif "hr" in table_id.lower():
|
|
235
|
+
# Extract hour value
|
|
236
|
+
match = re.search(r"(\d+)hr", table_id.lower())
|
|
237
|
+
if match:
|
|
238
|
+
return f"{match.group(1)}hr"
|
|
239
|
+
return "1hr"
|
|
240
|
+
elif table_id.lower().startswith("fx") or table_id.lower().endswith("fx"):
|
|
241
|
+
return "fx"
|
|
242
|
+
|
|
243
|
+
return "mon" # Default
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def get_realm_from_table(table_id: str) -> str:
|
|
247
|
+
"""
|
|
248
|
+
Convert CMIP6 table_id to CMIP7 realm.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
table_id
|
|
253
|
+
CMIP6 table identifier (e.g., "Amon", "Omon")
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
str
|
|
258
|
+
CMIP7 realm (e.g., "atmos", "ocean")
|
|
259
|
+
"""
|
|
260
|
+
return TABLE_TO_REALM.get(table_id, "atmos")
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def convert_variant_index(value: int | str, prefix: str) -> str:
|
|
264
|
+
"""
|
|
265
|
+
Convert CMIP6 numeric variant index to CMIP7 string format.
|
|
266
|
+
|
|
267
|
+
In CMIP6, indices like realization_index were integers (e.g., 1).
|
|
268
|
+
In CMIP7, they are strings with a prefix (e.g., "r1").
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
value
|
|
273
|
+
The index value (int or str)
|
|
274
|
+
prefix
|
|
275
|
+
The prefix to use ("r", "i", "p", or "f")
|
|
276
|
+
|
|
277
|
+
Returns
|
|
278
|
+
-------
|
|
279
|
+
str
|
|
280
|
+
The CMIP7 format index (e.g., "r1", "i1", "p1", "f1")
|
|
281
|
+
"""
|
|
282
|
+
if isinstance(value, int):
|
|
283
|
+
return f"{prefix}{value}"
|
|
284
|
+
elif isinstance(value, str):
|
|
285
|
+
# Already has prefix
|
|
286
|
+
if value.startswith(prefix):
|
|
287
|
+
return value
|
|
288
|
+
# Try to extract numeric part
|
|
289
|
+
try:
|
|
290
|
+
return f"{prefix}{int(value)}"
|
|
291
|
+
except ValueError:
|
|
292
|
+
return f"{prefix}{value}"
|
|
293
|
+
|
|
294
|
+
return f"{prefix}1" # type: ignore
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@dataclass
|
|
298
|
+
class CMIP7Metadata:
|
|
299
|
+
"""
|
|
300
|
+
CMIP7 metadata attributes for conversion.
|
|
301
|
+
|
|
302
|
+
This captures the additional/modified attributes needed for CMIP7 format.
|
|
303
|
+
Based on CMIP7 Global Attributes V1.0 (DOI: 10.5281/zenodo.17250297).
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
# Required new attributes
|
|
307
|
+
mip_era: str = "CMIP7"
|
|
308
|
+
region: str = "glb"
|
|
309
|
+
drs_specs: str = "MIP-DRS7"
|
|
310
|
+
data_specs_version: str = "MIP-DS7.1.0.0"
|
|
311
|
+
product: str = "model-output"
|
|
312
|
+
license_id: str = "CC-BY-4.0"
|
|
313
|
+
|
|
314
|
+
# Label attributes (derived from branding_suffix)
|
|
315
|
+
temporal_label: str = "tavg"
|
|
316
|
+
vertical_label: str = "u"
|
|
317
|
+
horizontal_label: str = "hxy"
|
|
318
|
+
area_label: str = "u"
|
|
319
|
+
|
|
320
|
+
# Derived attributes
|
|
321
|
+
branding_suffix: str = field(init=False)
|
|
322
|
+
|
|
323
|
+
def __post_init__(self) -> None:
|
|
324
|
+
self.branding_suffix = (
|
|
325
|
+
f"{self.temporal_label}-{self.vertical_label}-{self.horizontal_label}-{self.area_label}"
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
@classmethod
|
|
329
|
+
def from_branding(cls, branding: BrandingSuffix, **kwargs: Any) -> CMIP7Metadata:
|
|
330
|
+
"""Create metadata from a BrandingSuffix."""
|
|
331
|
+
return cls(
|
|
332
|
+
temporal_label=branding.temporal_label,
|
|
333
|
+
vertical_label=branding.vertical_label,
|
|
334
|
+
horizontal_label=branding.horizontal_label,
|
|
335
|
+
area_label=branding.area_label,
|
|
336
|
+
**kwargs,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def convert_cmip6_to_cmip7_attrs(
|
|
341
|
+
cmip6_attrs: dict[str, Any],
|
|
342
|
+
variable_id: str | None = None,
|
|
343
|
+
branding: BrandingSuffix | None = None,
|
|
344
|
+
) -> dict[str, Any]:
|
|
345
|
+
"""
|
|
346
|
+
Convert CMIP6 global attributes to CMIP7 format.
|
|
347
|
+
|
|
348
|
+
Based on CMIP7 Global Attributes V1.0 (DOI: 10.5281/zenodo.17250297).
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
cmip6_attrs
|
|
353
|
+
Dictionary of CMIP6 global attributes
|
|
354
|
+
variable_id
|
|
355
|
+
Variable ID for determining branding suffix
|
|
356
|
+
branding
|
|
357
|
+
Optional explicit branding suffix
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
dict
|
|
362
|
+
Dictionary of CMIP7 global attributes
|
|
363
|
+
"""
|
|
364
|
+
# Start with a copy of existing attributes
|
|
365
|
+
attrs = dict(cmip6_attrs)
|
|
366
|
+
|
|
367
|
+
# Determine variable_id if not provided
|
|
368
|
+
if variable_id is None:
|
|
369
|
+
variable_id = attrs.get("variable_id", "unknown")
|
|
370
|
+
|
|
371
|
+
# Get branding suffix
|
|
372
|
+
if branding is None:
|
|
373
|
+
branding = get_branding_suffix(variable_id, attrs.get("cell_methods"))
|
|
374
|
+
|
|
375
|
+
# Create CMIP7 metadata
|
|
376
|
+
cmip7_meta = CMIP7Metadata.from_branding(branding)
|
|
377
|
+
|
|
378
|
+
# Update mip_era
|
|
379
|
+
attrs["mip_era"] = cmip7_meta.mip_era
|
|
380
|
+
attrs["parent_mip_era"] = attrs.get("parent_mip_era", "CMIP6")
|
|
381
|
+
|
|
382
|
+
# New/updated CMIP7 attributes
|
|
383
|
+
attrs["region"] = cmip7_meta.region
|
|
384
|
+
attrs["drs_specs"] = cmip7_meta.drs_specs
|
|
385
|
+
attrs["data_specs_version"] = cmip7_meta.data_specs_version
|
|
386
|
+
attrs["product"] = cmip7_meta.product
|
|
387
|
+
attrs["license_id"] = cmip7_meta.license_id
|
|
388
|
+
|
|
389
|
+
# Add tracking_id with CMIP7 handle prefix
|
|
390
|
+
attrs["tracking_id"] = f"hdl:21.14107/{uuid.uuid4()}"
|
|
391
|
+
|
|
392
|
+
# Add creation_date in ISO format
|
|
393
|
+
attrs["creation_date"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
394
|
+
|
|
395
|
+
# Add label attributes
|
|
396
|
+
attrs["temporal_label"] = cmip7_meta.temporal_label
|
|
397
|
+
attrs["vertical_label"] = cmip7_meta.vertical_label
|
|
398
|
+
attrs["horizontal_label"] = cmip7_meta.horizontal_label
|
|
399
|
+
attrs["area_label"] = cmip7_meta.area_label
|
|
400
|
+
attrs["branding_suffix"] = cmip7_meta.branding_suffix
|
|
401
|
+
|
|
402
|
+
# Add branded_variable (required in CMIP7)
|
|
403
|
+
attrs["branded_variable"] = f"{variable_id}_{cmip7_meta.branding_suffix}"
|
|
404
|
+
|
|
405
|
+
# Convert variant indices from CMIP6 integer to CMIP7 string format
|
|
406
|
+
if "realization_index" in attrs:
|
|
407
|
+
attrs["realization_index"] = convert_variant_index(attrs["realization_index"], "r")
|
|
408
|
+
if "initialization_index" in attrs:
|
|
409
|
+
attrs["initialization_index"] = convert_variant_index(attrs["initialization_index"], "i")
|
|
410
|
+
if "physics_index" in attrs:
|
|
411
|
+
attrs["physics_index"] = convert_variant_index(attrs["physics_index"], "p")
|
|
412
|
+
if "forcing_index" in attrs:
|
|
413
|
+
attrs["forcing_index"] = convert_variant_index(attrs["forcing_index"], "f")
|
|
414
|
+
|
|
415
|
+
# Rebuild variant_label from converted indices
|
|
416
|
+
r = attrs.get("realization_index", "r1")
|
|
417
|
+
i = attrs.get("initialization_index", "i1")
|
|
418
|
+
p = attrs.get("physics_index", "p1")
|
|
419
|
+
f = attrs.get("forcing_index", "f1")
|
|
420
|
+
attrs["variant_label"] = f"{r}{i}{p}{f}"
|
|
421
|
+
|
|
422
|
+
# Convert table_id to realm-based and set realm attribute
|
|
423
|
+
if "table_id" in attrs:
|
|
424
|
+
old_table_id = attrs["table_id"]
|
|
425
|
+
realm = get_realm_from_table(old_table_id)
|
|
426
|
+
attrs["realm"] = realm
|
|
427
|
+
# Also update frequency if not present
|
|
428
|
+
if "frequency" not in attrs:
|
|
429
|
+
attrs["frequency"] = get_frequency_from_table(old_table_id)
|
|
430
|
+
# Store legacy CMIP6 compound name for reference (optional but recommended)
|
|
431
|
+
attrs["cmip6_compound_name"] = f"{old_table_id}.{variable_id}"
|
|
432
|
+
|
|
433
|
+
# Update Conventions (CF version only, per CMIP7 spec)
|
|
434
|
+
attrs["Conventions"] = "CF-1.12"
|
|
435
|
+
|
|
436
|
+
# Remove CMIP6-only attributes that are not in CMIP7 spec
|
|
437
|
+
for attr in CMIP6_ONLY_ATTRIBUTES:
|
|
438
|
+
attrs.pop(attr, None)
|
|
439
|
+
|
|
440
|
+
return attrs
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def convert_cmip6_dataset(
|
|
444
|
+
ds: xr.Dataset,
|
|
445
|
+
inplace: bool = False,
|
|
446
|
+
) -> xr.Dataset:
|
|
447
|
+
"""
|
|
448
|
+
Convert a CMIP6 xarray Dataset to CMIP7 format in-memory.
|
|
449
|
+
|
|
450
|
+
This function modifies the dataset attributes and optionally renames
|
|
451
|
+
variables to use CMIP7 branded names.
|
|
452
|
+
|
|
453
|
+
Parameters
|
|
454
|
+
----------
|
|
455
|
+
ds
|
|
456
|
+
The CMIP6 xarray Dataset to convert
|
|
457
|
+
inplace
|
|
458
|
+
If True, modify the dataset in place; otherwise return a copy
|
|
459
|
+
|
|
460
|
+
Returns
|
|
461
|
+
-------
|
|
462
|
+
xr.Dataset
|
|
463
|
+
The converted CMIP7-style dataset
|
|
464
|
+
"""
|
|
465
|
+
if not inplace:
|
|
466
|
+
ds = ds.copy(deep=False)
|
|
467
|
+
|
|
468
|
+
# Determine the primary variable (skip coordinates/bounds)
|
|
469
|
+
data_vars = [str(v) for v in ds.data_vars if not str(v).endswith("_bnds") and v not in ds.coords]
|
|
470
|
+
|
|
471
|
+
# Convert global attributes
|
|
472
|
+
variable_id = ds.attrs.get("variable_id")
|
|
473
|
+
if variable_id is None and data_vars:
|
|
474
|
+
variable_id = data_vars[0]
|
|
475
|
+
|
|
476
|
+
branding = get_branding_suffix(variable_id) if variable_id else None
|
|
477
|
+
ds.attrs = convert_cmip6_to_cmip7_attrs(ds.attrs, variable_id=variable_id, branding=branding)
|
|
478
|
+
|
|
479
|
+
return ds
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def create_cmip7_filename(
|
|
483
|
+
attrs: dict[str, Any],
|
|
484
|
+
time_range: str | None = None,
|
|
485
|
+
) -> str:
|
|
486
|
+
"""
|
|
487
|
+
Create a CMIP7 filename from attributes.
|
|
488
|
+
|
|
489
|
+
The CMIP7 filename follows the MIP-DRS7 specification (V1.0):
|
|
490
|
+
<variable_id>_<branding_suffix>_<frequency>_<region>_<grid_label>_<source_id>_<experiment_id>_<variant_label>[_<timeRangeDD>].nc
|
|
491
|
+
|
|
492
|
+
Parameters
|
|
493
|
+
----------
|
|
494
|
+
attrs
|
|
495
|
+
Dictionary containing CMIP7 attributes
|
|
496
|
+
time_range
|
|
497
|
+
Optional time range string (e.g., "190001-190912").
|
|
498
|
+
Format depends on frequency: "YYYY" for yearly, "YYYYMM" for monthly, "YYYYMMDD" for daily.
|
|
499
|
+
Omit for fixed/time-independent variables.
|
|
500
|
+
|
|
501
|
+
Returns
|
|
502
|
+
-------
|
|
503
|
+
str
|
|
504
|
+
The CMIP7 filename
|
|
505
|
+
|
|
506
|
+
Examples
|
|
507
|
+
--------
|
|
508
|
+
>>> attrs = {
|
|
509
|
+
... "variable_id": "tas",
|
|
510
|
+
... "branding_suffix": "tavg-h2m-hxy-u",
|
|
511
|
+
... "frequency": "mon",
|
|
512
|
+
... "region": "glb",
|
|
513
|
+
... "grid_label": "g13s",
|
|
514
|
+
... "source_id": "CanESM6-MR",
|
|
515
|
+
... "experiment_id": "historical",
|
|
516
|
+
... "variant_label": "r2i1p1f1",
|
|
517
|
+
... }
|
|
518
|
+
>>> create_cmip7_filename(attrs, "190001-190912")
|
|
519
|
+
'tas_tavg-h2m-hxy-u_mon_glb_g13s_CanESM6-MR_historical_r2i1p1f1_190001-190912.nc'
|
|
520
|
+
"""
|
|
521
|
+
components = [
|
|
522
|
+
attrs.get("variable_id", ""),
|
|
523
|
+
attrs.get("branding_suffix", ""),
|
|
524
|
+
attrs.get("frequency", "mon"),
|
|
525
|
+
attrs.get("region", "glb"),
|
|
526
|
+
attrs.get("grid_label", "gn"),
|
|
527
|
+
attrs.get("source_id", ""),
|
|
528
|
+
attrs.get("experiment_id", ""),
|
|
529
|
+
attrs.get("variant_label", ""),
|
|
530
|
+
]
|
|
531
|
+
|
|
532
|
+
filename = "_".join(str(c) for c in components)
|
|
533
|
+
|
|
534
|
+
# Add time range if provided (omit for fixed/time-independent variables)
|
|
535
|
+
if time_range:
|
|
536
|
+
filename = f"{filename}_{time_range}"
|
|
537
|
+
|
|
538
|
+
return f"{filename}.nc"
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def create_cmip7_path(attrs: dict[str, Any], version: str | None = None) -> str:
|
|
542
|
+
"""
|
|
543
|
+
Create a CMIP7 directory path from attributes.
|
|
544
|
+
|
|
545
|
+
The CMIP7 path follows the MIP-DRS7 specification (V1.0):
|
|
546
|
+
<drs_specs>/<mip_era>/<activity_id>/<institution_id>/<source_id>/<experiment_id>/
|
|
547
|
+
<variant_label>/<region>/<frequency>/<variable_id>/<branding_suffix>/<grid_label>/<version>
|
|
548
|
+
|
|
549
|
+
Parameters
|
|
550
|
+
----------
|
|
551
|
+
attrs
|
|
552
|
+
Dictionary containing CMIP7 attributes
|
|
553
|
+
version
|
|
554
|
+
Optional version string (e.g., "v20250622"). If not provided, uses attrs["version"]
|
|
555
|
+
or defaults to "v1".
|
|
556
|
+
|
|
557
|
+
Returns
|
|
558
|
+
-------
|
|
559
|
+
str
|
|
560
|
+
The CMIP7 directory path
|
|
561
|
+
|
|
562
|
+
Examples
|
|
563
|
+
--------
|
|
564
|
+
>>> attrs = {
|
|
565
|
+
... "drs_specs": "MIP-DRS7",
|
|
566
|
+
... "mip_era": "CMIP7",
|
|
567
|
+
... "activity_id": "CMIP",
|
|
568
|
+
... "institution_id": "CCCma",
|
|
569
|
+
... "source_id": "CanESM6-MR",
|
|
570
|
+
... "experiment_id": "historical",
|
|
571
|
+
... "variant_label": "r2i1p1f1",
|
|
572
|
+
... "region": "glb",
|
|
573
|
+
... "frequency": "mon",
|
|
574
|
+
... "variable_id": "tas",
|
|
575
|
+
... "branding_suffix": "tavg-h2m-hxy-u",
|
|
576
|
+
... "grid_label": "g13s",
|
|
577
|
+
... }
|
|
578
|
+
>>> create_cmip7_path(attrs, "v20250622")
|
|
579
|
+
'MIP-DRS7/CMIP7/CMIP/CCCma/CanESM6-MR/historical/r2i1p1f1/glb/mon/tas/tavg-h2m-hxy-u/g13s/v20250622'
|
|
580
|
+
"""
|
|
581
|
+
version_str = version or attrs.get("version", "v1")
|
|
582
|
+
|
|
583
|
+
components = [
|
|
584
|
+
attrs.get("drs_specs", "MIP-DRS7"),
|
|
585
|
+
attrs.get("mip_era", "CMIP7"),
|
|
586
|
+
attrs.get("activity_id", "CMIP"),
|
|
587
|
+
attrs.get("institution_id", ""),
|
|
588
|
+
attrs.get("source_id", ""),
|
|
589
|
+
attrs.get("experiment_id", ""),
|
|
590
|
+
attrs.get("variant_label", ""),
|
|
591
|
+
attrs.get("region", "glb"),
|
|
592
|
+
attrs.get("frequency", "mon"),
|
|
593
|
+
attrs.get("variable_id", ""),
|
|
594
|
+
attrs.get("branding_suffix", ""),
|
|
595
|
+
attrs.get("grid_label", "gn"),
|
|
596
|
+
version_str,
|
|
597
|
+
]
|
|
598
|
+
return "/".join(str(c) for c in components)
|
|
@@ -68,6 +68,49 @@ def _verify_hash_matches(fname: str | pathlib.Path, known_hash: str) -> bool:
|
|
|
68
68
|
return matches
|
|
69
69
|
|
|
70
70
|
|
|
71
|
+
def validate_registry_cache(
|
|
72
|
+
registry: pooch.Pooch,
|
|
73
|
+
name: str,
|
|
74
|
+
) -> list[str]:
|
|
75
|
+
"""
|
|
76
|
+
Validate that all files in a registry are cached and have correct checksums.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
registry
|
|
81
|
+
Pooch registry to validate.
|
|
82
|
+
name
|
|
83
|
+
Name of the registry (for error messages).
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
list[str]
|
|
88
|
+
List of error messages for any validation failures.
|
|
89
|
+
Empty list if all files are valid.
|
|
90
|
+
"""
|
|
91
|
+
errors: list[str] = []
|
|
92
|
+
|
|
93
|
+
for key in registry.registry.keys():
|
|
94
|
+
expected_hash = registry.registry[key]
|
|
95
|
+
if not isinstance(expected_hash, str) or not expected_hash: # pragma: no cover
|
|
96
|
+
errors.append(f"{name}: No hash defined for {key}")
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Check if file exists in cache
|
|
100
|
+
cached_path = registry.abspath / key # type: ignore[attr-defined]
|
|
101
|
+
if not cached_path.exists():
|
|
102
|
+
errors.append(f"{name}: File not cached: {key}")
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# Verify checksum
|
|
106
|
+
try:
|
|
107
|
+
_verify_hash_matches(cached_path, expected_hash)
|
|
108
|
+
except ValueError as e:
|
|
109
|
+
errors.append(f"{name}: {e}")
|
|
110
|
+
|
|
111
|
+
return errors
|
|
112
|
+
|
|
113
|
+
|
|
71
114
|
def fetch_all_files(
|
|
72
115
|
registry: pooch.Pooch,
|
|
73
116
|
name: str,
|
climate_ref_core/diagnostics.py
CHANGED
|
@@ -20,6 +20,7 @@ from climate_ref_core.pycmec.output import CMECOutput
|
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from climate_ref_core.providers import CommandLineDiagnosticProvider, DiagnosticProvider
|
|
23
|
+
from climate_ref_core.testing import TestDataSpecification
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def ensure_relative_path(path: pathlib.Path | str, root_directory: pathlib.Path) -> pathlib.Path:
|
|
@@ -459,6 +460,14 @@ class AbstractDiagnostic(Protocol):
|
|
|
459
460
|
The provider that provides the diagnostic.
|
|
460
461
|
"""
|
|
461
462
|
|
|
463
|
+
test_data_spec: TestDataSpecification | None
|
|
464
|
+
"""
|
|
465
|
+
Optional specification of test data and test cases for this diagnostic.
|
|
466
|
+
|
|
467
|
+
If provided, defines how to fetch test data from ESGF
|
|
468
|
+
and what test cases are available for testing this diagnostic.
|
|
469
|
+
"""
|
|
470
|
+
|
|
462
471
|
def execute(self, definition: ExecutionDefinition) -> None:
|
|
463
472
|
"""
|
|
464
473
|
Execute the diagnostic on the given configuration.
|
|
@@ -516,6 +525,7 @@ class Diagnostic(AbstractDiagnostic):
|
|
|
516
525
|
"""
|
|
517
526
|
|
|
518
527
|
series: Sequence[SeriesDefinition] = tuple()
|
|
528
|
+
test_data_spec: TestDataSpecification | None = None
|
|
519
529
|
|
|
520
530
|
def __init__(self) -> None:
|
|
521
531
|
super().__init__()
|