climate-ref-core 0.8.1__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/.gitignore +10 -1
  2. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/PKG-INFO +4 -2
  3. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/pyproject.toml +6 -2
  4. climate_ref_core-0.9.0/src/climate_ref_core/cmip6_to_cmip7.py +598 -0
  5. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/dataset_registry.py +43 -0
  6. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/diagnostics.py +10 -0
  7. climate_ref_core-0.9.0/src/climate_ref_core/env.py +72 -0
  8. climate_ref_core-0.9.0/src/climate_ref_core/esgf/__init__.py +21 -0
  9. climate_ref_core-0.9.0/src/climate_ref_core/esgf/base.py +122 -0
  10. climate_ref_core-0.9.0/src/climate_ref_core/esgf/cmip6.py +119 -0
  11. climate_ref_core-0.9.0/src/climate_ref_core/esgf/fetcher.py +138 -0
  12. climate_ref_core-0.9.0/src/climate_ref_core/esgf/obs4mips.py +94 -0
  13. climate_ref_core-0.9.0/src/climate_ref_core/esgf/registry.py +307 -0
  14. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/exceptions.py +24 -0
  15. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/providers.py +143 -17
  16. climate_ref_core-0.9.0/src/climate_ref_core/testing.py +621 -0
  17. climate_ref_core-0.9.0/tests/unit/test_cmip6_to_cmip7.py +420 -0
  18. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_dataset_registry/test_dataset_registry.py +102 -0
  19. climate_ref_core-0.9.0/tests/unit/test_esgf_base.py +255 -0
  20. climate_ref_core-0.9.0/tests/unit/test_esgf_cmip6.py +113 -0
  21. climate_ref_core-0.9.0/tests/unit/test_esgf_fetcher.py +359 -0
  22. climate_ref_core-0.9.0/tests/unit/test_esgf_obs4mips.py +77 -0
  23. climate_ref_core-0.9.0/tests/unit/test_esgf_registry.py +369 -0
  24. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_providers.py +255 -46
  25. climate_ref_core-0.9.0/tests/unit/test_testing.py +913 -0
  26. climate_ref_core-0.8.1/src/climate_ref_core/env.py +0 -35
  27. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/LICENCE +0 -0
  28. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/NOTICE +0 -0
  29. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/README.md +0 -0
  30. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/__init__.py +0 -0
  31. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/constraints.py +0 -0
  32. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/datasets.py +0 -0
  33. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/executor.py +0 -0
  34. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/logging.py +0 -0
  35. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/metric_values/__init__.py +0 -0
  36. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/metric_values/typing.py +0 -0
  37. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/py.typed +0 -0
  38. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/README.md +0 -0
  39. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/__init__.py +0 -0
  40. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/controlled_vocabulary.py +0 -0
  41. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/cv_cmip7_aft.yaml +0 -0
  42. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/metric.py +0 -0
  43. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/src/climate_ref_core/pycmec/output.py +0 -0
  44. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/metric_values/test_typing.py +0 -0
  45. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/cmec_testdata/cmec_metric_sample.json +0 -0
  46. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/cmec_testdata/cmec_output_sample.json +0 -0
  47. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/cmec_testdata/cv_sample.yaml +0 -0
  48. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/cmec_testdata/test_metric_json_schema.yml +0 -0
  49. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/cmec_testdata/test_output_json_schema.yml +0 -0
  50. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/conftest.py +0 -0
  51. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/test_cmec_metric.py +0 -0
  52. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/test_cmec_output.py +0 -0
  53. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/pycmec/test_controlled_vocabulary.py +0 -0
  54. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_constraints.py +0 -0
  55. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_datasets/dataset_collection_hash.yml +0 -0
  56. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_datasets/dataset_collection_obs4mips_hash.yml +0 -0
  57. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_datasets/execution_dataset_hash.yml +0 -0
  58. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_datasets.py +0 -0
  59. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_diagnostics.py +0 -0
  60. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_exceptions.py +0 -0
  61. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_executor.py +0 -0
  62. {climate_ref_core-0.8.1 → climate_ref_core-0.9.0}/tests/unit/test_logging.py +0 -0
@@ -150,7 +150,7 @@ dmypy.json
150
150
 
151
151
  # Generated output
152
152
  out
153
- .ref
153
+ .ref*
154
154
 
155
155
  # Ignore copied LICENCE/NOTICE files
156
156
  packages/*/LICENCE
@@ -158,3 +158,12 @@ packages/*/NOTICE
158
158
 
159
159
  # Local directory for data
160
160
  /data
161
+
162
+ # Generated SDK
163
+ /climate_ref_client
164
+
165
+ # User-specific catalog paths (test data)
166
+ *.paths.yaml
167
+
168
+ # Helm dependencies
169
+ helm/charts/*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-core
3
- Version: 0.8.1
3
+ Version: 0.9.0
4
4
  Summary: Core library for the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -22,10 +22,12 @@ Requires-Dist: attrs>=23.2.0
22
22
  Requires-Dist: cattrs>=24.1
23
23
  Requires-Dist: environs>=11
24
24
  Requires-Dist: fastprogress==1.0.5
25
+ Requires-Dist: intake-esgf>=2025.7.16
25
26
  Requires-Dist: loguru>=0.7.0
26
27
  Requires-Dist: numpy>=2.0.0
27
- Requires-Dist: pandas>=2.1.0
28
+ Requires-Dist: pandas<3,>=2.1.0
28
29
  Requires-Dist: pooch<2,>=1.8.0
30
+ Requires-Dist: pyarrow>=17.0.0
29
31
  Requires-Dist: pydantic>=2.10.6
30
32
  Requires-Dist: pyyaml>=6.0.2
31
33
  Requires-Dist: requests
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climate-ref-core"
3
- version = "0.8.1"
3
+ version = "0.9.0"
4
4
  description = "Core library for the CMIP Rapid Evaluation Framework"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -41,10 +41,14 @@ dependencies = [
41
41
  # Not used directly, but required to support some installations
42
42
  "setuptools<81",
43
43
 
44
+ # ESGF data fetching
45
+ "intake-esgf>=2025.7.16",
46
+ "pyarrow>=17.0.0",
47
+
44
48
  # SPEC 0000 constraints
45
49
  # We follow [SPEC-0000](https://scientific-python.org/specs/spec-0000/)
46
50
  # which defines a 2-year support window for key libraries and 3-year window for Python versions
47
- "pandas>=2.1.0",
51
+ "pandas>=2.1.0,<3",
48
52
  "numpy>=2.0.0",
49
53
 
50
54
  # Temporarily pin fastprogress dependency (from intake-esm) due to bug in recent version
@@ -0,0 +1,598 @@
1
+ """
2
+ CMIP6 to CMIP7 format converter.
3
+
4
+ This module provides utilities to convert CMIP6 xarray datasets to CMIP7 format,
5
+ following the CMIP7 Global Attributes V1.0 specification (DOI: 10.5281/zenodo.17250297).
6
+
7
+
8
+ Key differences between CMIP6 and CMIP7
9
+ ---------------------------------------
10
+ - Variable naming: CMIP7 uses branded names like `tas_tavg-h2m-hxy-u` instead of `tas`
11
+ - Branding suffix: `<temporal>-<vertical>-<horizontal>-<area>` labels (e.g., `tavg-h2m-hxy-u`)
12
+ - Variant indices: Changed from integers to prefixed strings (1 -> "r1", "i1", "p1", "f1")
13
+ - New mandatory attributes: license_id
14
+ - table_id: Uses realm names instead of CMOR table names (atmos vs Amon)
15
+ - Directory structure: MIP-DRS7 specification
16
+ - Filename format: Includes branding suffix, region, and grid_label
17
+ - Removed CMIP6 attributes: further_info_url, grid, member_id, sub_experiment, sub_experiment_id
18
+
19
+ References
20
+ ----------
21
+ - CMIP7 Global Attributes V1.0: https://doi.org/10.5281/zenodo.17250297
22
+ - CMIP7 CVs: https://github.com/WCRP-CMIP/CMIP7_CVs
23
+ - CMIP7 Guidance: https://wcrp-cmip.github.io/cmip7-guidance/
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import re
29
+ import uuid
30
+ from dataclasses import dataclass, field
31
+ from datetime import datetime, timezone
32
+ from typing import TYPE_CHECKING, Any
33
+
34
+ if TYPE_CHECKING:
35
+ import xarray as xr
36
+
37
+
38
+ # CMIP6 table_id to CMIP7 realm mapping
39
+ TABLE_TO_REALM = {
40
+ "Amon": "atmos",
41
+ "Omon": "ocean",
42
+ "Lmon": "land",
43
+ "LImon": "landIce",
44
+ "SImon": "seaIce",
45
+ "AERmon": "aerosol",
46
+ "Oday": "ocean",
47
+ "day": "atmos",
48
+ "Aday": "atmos",
49
+ "Eday": "atmos",
50
+ "CFday": "atmos",
51
+ "3hr": "atmos",
52
+ "6hrLev": "atmos",
53
+ "6hrPlev": "atmos",
54
+ "6hrPlevPt": "atmos",
55
+ "fx": "atmos", # Fixed fields default to atmos
56
+ "Ofx": "ocean",
57
+ "Efx": "atmos",
58
+ "Lfx": "land",
59
+ }
60
+
61
+ # CMIP6 frequency values (table_id prefix patterns)
62
+ FREQUENCY_MAP = {
63
+ "mon": "mon",
64
+ "day": "day",
65
+ "3hr": "3hr",
66
+ "6hr": "6hr",
67
+ "1hr": "1hr",
68
+ "yr": "yr",
69
+ "fx": "fx",
70
+ }
71
+
72
+ # CMIP6-only attributes that should be removed when converting to CMIP7
73
+ # These are not part of the CMIP7 Global Attributes specification (V1.0)
74
+ # These may be included in output, but they won't be checked
75
+ CMIP6_ONLY_ATTRIBUTES = {
76
+ "further_info_url", # CMIP6-specific URL format, replaced by different mechanism in CMIP7
77
+ "grid", # Replaced by grid_label in CMIP7
78
+ "member_id", # Redundant with variant_label, not in CMIP7 spec
79
+ "sub_experiment", # Not in CMIP7 spec
80
+ "sub_experiment_id", # Not in CMIP7 spec
81
+ "table_id", # Not in CMIP7 spec
82
+ }
83
+
84
+
85
+ @dataclass
86
+ class BrandingSuffix:
87
+ """
88
+ CMIP7 branding suffix components.
89
+
90
+ Format: <temporal_label>-<vertical_label>-<horizontal_label>-<area_label>
91
+ Example: tavg-h2m-hxy-u
92
+ """
93
+
94
+ temporal_label: str = "tavg" # tavg, tpt, tmax, tmin, tsum, tclm, ti
95
+ vertical_label: str = "u" # h2m, h10m, u (unspecified), p19, etc.
96
+ horizontal_label: str = "hxy" # hxy (gridded), hm (mean), hy (zonal), etc.
97
+ area_label: str = "u" # u (unmasked), lnd, sea, si, etc.
98
+
99
+ def __str__(self) -> str:
100
+ return f"{self.temporal_label}-{self.vertical_label}-{self.horizontal_label}-{self.area_label}"
101
+
102
+
103
+ # Common variable to branding suffix mappings
104
+ # These are based on typical CMIP6 variable definitions
105
+ VARIABLE_BRANDING: dict[str, BrandingSuffix] = {
106
+ # Atmosphere 2D variables
107
+ "tas": BrandingSuffix("tavg", "h2m", "hxy", "u"),
108
+ "tasmax": BrandingSuffix("tmax", "h2m", "hxy", "u"),
109
+ "tasmin": BrandingSuffix("tmin", "h2m", "hxy", "u"),
110
+ "pr": BrandingSuffix("tavg", "u", "hxy", "u"),
111
+ "psl": BrandingSuffix("tavg", "u", "hxy", "u"),
112
+ "ps": BrandingSuffix("tavg", "u", "hxy", "u"),
113
+ "uas": BrandingSuffix("tavg", "h10m", "hxy", "u"),
114
+ "vas": BrandingSuffix("tavg", "h10m", "hxy", "u"),
115
+ "sfcWind": BrandingSuffix("tavg", "h10m", "hxy", "u"),
116
+ "hurs": BrandingSuffix("tavg", "h2m", "hxy", "u"),
117
+ "huss": BrandingSuffix("tavg", "h2m", "hxy", "u"),
118
+ "clt": BrandingSuffix("tavg", "u", "hxy", "u"),
119
+ "rsds": BrandingSuffix("tavg", "u", "hxy", "u"),
120
+ "rsus": BrandingSuffix("tavg", "u", "hxy", "u"),
121
+ "rlds": BrandingSuffix("tavg", "u", "hxy", "u"),
122
+ "rlus": BrandingSuffix("tavg", "u", "hxy", "u"),
123
+ "rsdt": BrandingSuffix("tavg", "u", "hxy", "u"),
124
+ "rsut": BrandingSuffix("tavg", "u", "hxy", "u"),
125
+ "rlut": BrandingSuffix("tavg", "u", "hxy", "u"),
126
+ "evspsbl": BrandingSuffix("tavg", "u", "hxy", "u"),
127
+ "tauu": BrandingSuffix("tavg", "u", "hxy", "u"),
128
+ "tauv": BrandingSuffix("tavg", "u", "hxy", "u"),
129
+ # Ocean 2D variables
130
+ "tos": BrandingSuffix("tavg", "d0m", "hxy", "sea"),
131
+ "sos": BrandingSuffix("tavg", "d0m", "hxy", "sea"),
132
+ "zos": BrandingSuffix("tavg", "u", "hxy", "sea"),
133
+ "mlotst": BrandingSuffix("tavg", "u", "hxy", "sea"),
134
+ # Sea ice variables
135
+ "siconc": BrandingSuffix("tavg", "u", "hxy", "u"),
136
+ "sithick": BrandingSuffix("tavg", "u", "hxy", "si"),
137
+ "sisnthick": BrandingSuffix("tavg", "u", "hxy", "si"),
138
+ # Land variables
139
+ "mrso": BrandingSuffix("tavg", "u", "hxy", "lnd"),
140
+ "mrsos": BrandingSuffix("tavg", "d10cm", "hxy", "lnd"),
141
+ "mrro": BrandingSuffix("tavg", "u", "hxy", "lnd"),
142
+ "snw": BrandingSuffix("tavg", "u", "hxy", "lnd"),
143
+ "lai": BrandingSuffix("tavg", "u", "hxy", "lnd"),
144
+ "gpp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
145
+ "npp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
146
+ "nbp": BrandingSuffix("tavg", "u", "hxy", "lnd"),
147
+ "cVeg": BrandingSuffix("tavg", "u", "hxy", "lnd"),
148
+ "cSoil": BrandingSuffix("tavg", "u", "hxy", "lnd"),
149
+ "treeFrac": BrandingSuffix("tavg", "u", "hxy", "lnd"),
150
+ "vegFrac": BrandingSuffix("tavg", "u", "hxy", "lnd"),
151
+ # Fixed fields
152
+ "areacella": BrandingSuffix("ti", "u", "hxy", "u"),
153
+ "areacello": BrandingSuffix("ti", "u", "hxy", "u"),
154
+ "sftlf": BrandingSuffix("ti", "u", "hxy", "u"),
155
+ "sftof": BrandingSuffix("ti", "u", "hxy", "u"),
156
+ "orog": BrandingSuffix("ti", "u", "hxy", "u"),
157
+ }
158
+
159
+
160
+ def get_branding_suffix(variable_id: str, cell_methods: str | None = None) -> BrandingSuffix:
161
+ """
162
+ Determine the CMIP7 branding suffix for a variable.
163
+
164
+ Parameters
165
+ ----------
166
+ variable_id
167
+ The CMIP6 variable ID (e.g., "tas", "pr")
168
+ cell_methods
169
+ Optional cell_methods attribute to help determine temporal/spatial operations
170
+
171
+ Returns
172
+ -------
173
+ BrandingSuffix
174
+ The branding suffix components
175
+ """
176
+ # Use predefined mapping if available
177
+ if variable_id in VARIABLE_BRANDING:
178
+ return VARIABLE_BRANDING[variable_id]
179
+
180
+ # Fallback: infer from variable name patterns
181
+ suffix = BrandingSuffix()
182
+
183
+ # Check for max/min in variable name
184
+ if variable_id.endswith("max") or (cell_methods and "maximum" in cell_methods):
185
+ suffix = BrandingSuffix(temporal_label="tmax")
186
+ elif variable_id.endswith("min") or (cell_methods and "minimum" in cell_methods):
187
+ suffix = BrandingSuffix(temporal_label="tmin")
188
+
189
+ return suffix
190
+
191
+
192
+ def get_cmip7_variable_name(variable_id: str, branding: BrandingSuffix | None = None) -> str:
193
+ """
194
+ Convert a CMIP6 variable name to CMIP7 branded format.
195
+
196
+ Parameters
197
+ ----------
198
+ variable_id
199
+ The CMIP6 variable ID (e.g., "tas")
200
+ branding
201
+ Optional branding suffix; if None, determined automatically
202
+
203
+ Returns
204
+ -------
205
+ str
206
+ The CMIP7 variable name (e.g., "tas_tavg-h2m-hxy-u")
207
+ """
208
+ if branding is None:
209
+ branding = get_branding_suffix(variable_id)
210
+ return f"{variable_id}_{branding}"
211
+
212
+
213
+ def get_frequency_from_table(table_id: str) -> str: # noqa: PLR0911
214
+ """
215
+ Extract frequency from CMIP6 table_id.
216
+
217
+ Parameters
218
+ ----------
219
+ table_id
220
+ CMIP6 table identifier (e.g., "Amon", "Oday", "3hr")
221
+
222
+ Returns
223
+ -------
224
+ str
225
+ Frequency string (e.g., "mon", "day", "3hr")
226
+ """
227
+ # Check common patterns
228
+ if "mon" in table_id.lower():
229
+ return "mon"
230
+ elif "day" in table_id.lower():
231
+ return "day"
232
+ elif "yr" in table_id.lower():
233
+ return "yr"
234
+ elif "hr" in table_id.lower():
235
+ # Extract hour value
236
+ match = re.search(r"(\d+)hr", table_id.lower())
237
+ if match:
238
+ return f"{match.group(1)}hr"
239
+ return "1hr"
240
+ elif table_id.lower().startswith("fx") or table_id.lower().endswith("fx"):
241
+ return "fx"
242
+
243
+ return "mon" # Default
244
+
245
+
246
+ def get_realm_from_table(table_id: str) -> str:
247
+ """
248
+ Convert CMIP6 table_id to CMIP7 realm.
249
+
250
+ Parameters
251
+ ----------
252
+ table_id
253
+ CMIP6 table identifier (e.g., "Amon", "Omon")
254
+
255
+ Returns
256
+ -------
257
+ str
258
+ CMIP7 realm (e.g., "atmos", "ocean")
259
+ """
260
+ return TABLE_TO_REALM.get(table_id, "atmos")
261
+
262
+
263
+ def convert_variant_index(value: int | str, prefix: str) -> str:
264
+ """
265
+ Convert CMIP6 numeric variant index to CMIP7 string format.
266
+
267
+ In CMIP6, indices like realization_index were integers (e.g., 1).
268
+ In CMIP7, they are strings with a prefix (e.g., "r1").
269
+
270
+ Parameters
271
+ ----------
272
+ value
273
+ The index value (int or str)
274
+ prefix
275
+ The prefix to use ("r", "i", "p", or "f")
276
+
277
+ Returns
278
+ -------
279
+ str
280
+ The CMIP7 format index (e.g., "r1", "i1", "p1", "f1")
281
+ """
282
+ if isinstance(value, int):
283
+ return f"{prefix}{value}"
284
+ elif isinstance(value, str):
285
+ # Already has prefix
286
+ if value.startswith(prefix):
287
+ return value
288
+ # Try to extract numeric part
289
+ try:
290
+ return f"{prefix}{int(value)}"
291
+ except ValueError:
292
+ return f"{prefix}{value}"
293
+
294
+ return f"{prefix}1" # type: ignore
295
+
296
+
297
+ @dataclass
298
+ class CMIP7Metadata:
299
+ """
300
+ CMIP7 metadata attributes for conversion.
301
+
302
+ This captures the additional/modified attributes needed for CMIP7 format.
303
+ Based on CMIP7 Global Attributes V1.0 (DOI: 10.5281/zenodo.17250297).
304
+ """
305
+
306
+ # Required new attributes
307
+ mip_era: str = "CMIP7"
308
+ region: str = "glb"
309
+ drs_specs: str = "MIP-DRS7"
310
+ data_specs_version: str = "MIP-DS7.1.0.0"
311
+ product: str = "model-output"
312
+ license_id: str = "CC-BY-4.0"
313
+
314
+ # Label attributes (derived from branding_suffix)
315
+ temporal_label: str = "tavg"
316
+ vertical_label: str = "u"
317
+ horizontal_label: str = "hxy"
318
+ area_label: str = "u"
319
+
320
+ # Derived attributes
321
+ branding_suffix: str = field(init=False)
322
+
323
+ def __post_init__(self) -> None:
324
+ self.branding_suffix = (
325
+ f"{self.temporal_label}-{self.vertical_label}-{self.horizontal_label}-{self.area_label}"
326
+ )
327
+
328
+ @classmethod
329
+ def from_branding(cls, branding: BrandingSuffix, **kwargs: Any) -> CMIP7Metadata:
330
+ """Create metadata from a BrandingSuffix."""
331
+ return cls(
332
+ temporal_label=branding.temporal_label,
333
+ vertical_label=branding.vertical_label,
334
+ horizontal_label=branding.horizontal_label,
335
+ area_label=branding.area_label,
336
+ **kwargs,
337
+ )
338
+
339
+
340
+ def convert_cmip6_to_cmip7_attrs(
341
+ cmip6_attrs: dict[str, Any],
342
+ variable_id: str | None = None,
343
+ branding: BrandingSuffix | None = None,
344
+ ) -> dict[str, Any]:
345
+ """
346
+ Convert CMIP6 global attributes to CMIP7 format.
347
+
348
+ Based on CMIP7 Global Attributes V1.0 (DOI: 10.5281/zenodo.17250297).
349
+
350
+ Parameters
351
+ ----------
352
+ cmip6_attrs
353
+ Dictionary of CMIP6 global attributes
354
+ variable_id
355
+ Variable ID for determining branding suffix
356
+ branding
357
+ Optional explicit branding suffix
358
+
359
+ Returns
360
+ -------
361
+ dict
362
+ Dictionary of CMIP7 global attributes
363
+ """
364
+ # Start with a copy of existing attributes
365
+ attrs = dict(cmip6_attrs)
366
+
367
+ # Determine variable_id if not provided
368
+ if variable_id is None:
369
+ variable_id = attrs.get("variable_id", "unknown")
370
+
371
+ # Get branding suffix
372
+ if branding is None:
373
+ branding = get_branding_suffix(variable_id, attrs.get("cell_methods"))
374
+
375
+ # Create CMIP7 metadata
376
+ cmip7_meta = CMIP7Metadata.from_branding(branding)
377
+
378
+ # Update mip_era
379
+ attrs["mip_era"] = cmip7_meta.mip_era
380
+ attrs["parent_mip_era"] = attrs.get("parent_mip_era", "CMIP6")
381
+
382
+ # New/updated CMIP7 attributes
383
+ attrs["region"] = cmip7_meta.region
384
+ attrs["drs_specs"] = cmip7_meta.drs_specs
385
+ attrs["data_specs_version"] = cmip7_meta.data_specs_version
386
+ attrs["product"] = cmip7_meta.product
387
+ attrs["license_id"] = cmip7_meta.license_id
388
+
389
+ # Add tracking_id with CMIP7 handle prefix
390
+ attrs["tracking_id"] = f"hdl:21.14107/{uuid.uuid4()}"
391
+
392
+ # Add creation_date in ISO format
393
+ attrs["creation_date"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
394
+
395
+ # Add label attributes
396
+ attrs["temporal_label"] = cmip7_meta.temporal_label
397
+ attrs["vertical_label"] = cmip7_meta.vertical_label
398
+ attrs["horizontal_label"] = cmip7_meta.horizontal_label
399
+ attrs["area_label"] = cmip7_meta.area_label
400
+ attrs["branding_suffix"] = cmip7_meta.branding_suffix
401
+
402
+ # Add branded_variable (required in CMIP7)
403
+ attrs["branded_variable"] = f"{variable_id}_{cmip7_meta.branding_suffix}"
404
+
405
+ # Convert variant indices from CMIP6 integer to CMIP7 string format
406
+ if "realization_index" in attrs:
407
+ attrs["realization_index"] = convert_variant_index(attrs["realization_index"], "r")
408
+ if "initialization_index" in attrs:
409
+ attrs["initialization_index"] = convert_variant_index(attrs["initialization_index"], "i")
410
+ if "physics_index" in attrs:
411
+ attrs["physics_index"] = convert_variant_index(attrs["physics_index"], "p")
412
+ if "forcing_index" in attrs:
413
+ attrs["forcing_index"] = convert_variant_index(attrs["forcing_index"], "f")
414
+
415
+ # Rebuild variant_label from converted indices
416
+ r = attrs.get("realization_index", "r1")
417
+ i = attrs.get("initialization_index", "i1")
418
+ p = attrs.get("physics_index", "p1")
419
+ f = attrs.get("forcing_index", "f1")
420
+ attrs["variant_label"] = f"{r}{i}{p}{f}"
421
+
422
+ # Convert table_id to realm-based and set realm attribute
423
+ if "table_id" in attrs:
424
+ old_table_id = attrs["table_id"]
425
+ realm = get_realm_from_table(old_table_id)
426
+ attrs["realm"] = realm
427
+ # Also update frequency if not present
428
+ if "frequency" not in attrs:
429
+ attrs["frequency"] = get_frequency_from_table(old_table_id)
430
+ # Store legacy CMIP6 compound name for reference (optional but recommended)
431
+ attrs["cmip6_compound_name"] = f"{old_table_id}.{variable_id}"
432
+
433
+ # Update Conventions (CF version only, per CMIP7 spec)
434
+ attrs["Conventions"] = "CF-1.12"
435
+
436
+ # Remove CMIP6-only attributes that are not in CMIP7 spec
437
+ for attr in CMIP6_ONLY_ATTRIBUTES:
438
+ attrs.pop(attr, None)
439
+
440
+ return attrs
441
+
442
+
443
+ def convert_cmip6_dataset(
444
+ ds: xr.Dataset,
445
+ inplace: bool = False,
446
+ ) -> xr.Dataset:
447
+ """
448
+ Convert a CMIP6 xarray Dataset to CMIP7 format in-memory.
449
+
450
+ This function modifies the dataset attributes and optionally renames
451
+ variables to use CMIP7 branded names.
452
+
453
+ Parameters
454
+ ----------
455
+ ds
456
+ The CMIP6 xarray Dataset to convert
457
+ inplace
458
+ If True, modify the dataset in place; otherwise return a copy
459
+
460
+ Returns
461
+ -------
462
+ xr.Dataset
463
+ The converted CMIP7-style dataset
464
+ """
465
+ if not inplace:
466
+ ds = ds.copy(deep=False)
467
+
468
+ # Determine the primary variable (skip coordinates/bounds)
469
+ data_vars = [str(v) for v in ds.data_vars if not str(v).endswith("_bnds") and v not in ds.coords]
470
+
471
+ # Convert global attributes
472
+ variable_id = ds.attrs.get("variable_id")
473
+ if variable_id is None and data_vars:
474
+ variable_id = data_vars[0]
475
+
476
+ branding = get_branding_suffix(variable_id) if variable_id else None
477
+ ds.attrs = convert_cmip6_to_cmip7_attrs(ds.attrs, variable_id=variable_id, branding=branding)
478
+
479
+ return ds
480
+
481
+
482
+ def create_cmip7_filename(
483
+ attrs: dict[str, Any],
484
+ time_range: str | None = None,
485
+ ) -> str:
486
+ """
487
+ Create a CMIP7 filename from attributes.
488
+
489
+ The CMIP7 filename follows the MIP-DRS7 specification (V1.0):
490
+ <variable_id>_<branding_suffix>_<frequency>_<region>_<grid_label>_<source_id>_<experiment_id>_<variant_label>[_<timeRangeDD>].nc
491
+
492
+ Parameters
493
+ ----------
494
+ attrs
495
+ Dictionary containing CMIP7 attributes
496
+ time_range
497
+ Optional time range string (e.g., "190001-190912").
498
+ Format depends on frequency: "YYYY" for yearly, "YYYYMM" for monthly, "YYYYMMDD" for daily.
499
+ Omit for fixed/time-independent variables.
500
+
501
+ Returns
502
+ -------
503
+ str
504
+ The CMIP7 filename
505
+
506
+ Examples
507
+ --------
508
+ >>> attrs = {
509
+ ... "variable_id": "tas",
510
+ ... "branding_suffix": "tavg-h2m-hxy-u",
511
+ ... "frequency": "mon",
512
+ ... "region": "glb",
513
+ ... "grid_label": "g13s",
514
+ ... "source_id": "CanESM6-MR",
515
+ ... "experiment_id": "historical",
516
+ ... "variant_label": "r2i1p1f1",
517
+ ... }
518
+ >>> create_cmip7_filename(attrs, "190001-190912")
519
+ 'tas_tavg-h2m-hxy-u_mon_glb_g13s_CanESM6-MR_historical_r2i1p1f1_190001-190912.nc'
520
+ """
521
+ components = [
522
+ attrs.get("variable_id", ""),
523
+ attrs.get("branding_suffix", ""),
524
+ attrs.get("frequency", "mon"),
525
+ attrs.get("region", "glb"),
526
+ attrs.get("grid_label", "gn"),
527
+ attrs.get("source_id", ""),
528
+ attrs.get("experiment_id", ""),
529
+ attrs.get("variant_label", ""),
530
+ ]
531
+
532
+ filename = "_".join(str(c) for c in components)
533
+
534
+ # Add time range if provided (omit for fixed/time-independent variables)
535
+ if time_range:
536
+ filename = f"{filename}_{time_range}"
537
+
538
+ return f"{filename}.nc"
539
+
540
+
541
+ def create_cmip7_path(attrs: dict[str, Any], version: str | None = None) -> str:
542
+ """
543
+ Create a CMIP7 directory path from attributes.
544
+
545
+ The CMIP7 path follows the MIP-DRS7 specification (V1.0):
546
+ <drs_specs>/<mip_era>/<activity_id>/<institution_id>/<source_id>/<experiment_id>/
547
+ <variant_label>/<region>/<frequency>/<variable_id>/<branding_suffix>/<grid_label>/<version>
548
+
549
+ Parameters
550
+ ----------
551
+ attrs
552
+ Dictionary containing CMIP7 attributes
553
+ version
554
+ Optional version string (e.g., "v20250622"). If not provided, uses attrs["version"]
555
+ or defaults to "v1".
556
+
557
+ Returns
558
+ -------
559
+ str
560
+ The CMIP7 directory path
561
+
562
+ Examples
563
+ --------
564
+ >>> attrs = {
565
+ ... "drs_specs": "MIP-DRS7",
566
+ ... "mip_era": "CMIP7",
567
+ ... "activity_id": "CMIP",
568
+ ... "institution_id": "CCCma",
569
+ ... "source_id": "CanESM6-MR",
570
+ ... "experiment_id": "historical",
571
+ ... "variant_label": "r2i1p1f1",
572
+ ... "region": "glb",
573
+ ... "frequency": "mon",
574
+ ... "variable_id": "tas",
575
+ ... "branding_suffix": "tavg-h2m-hxy-u",
576
+ ... "grid_label": "g13s",
577
+ ... }
578
+ >>> create_cmip7_path(attrs, "v20250622")
579
+ 'MIP-DRS7/CMIP7/CMIP/CCCma/CanESM6-MR/historical/r2i1p1f1/glb/mon/tas/tavg-h2m-hxy-u/g13s/v20250622'
580
+ """
581
+ version_str = version or attrs.get("version", "v1")
582
+
583
+ components = [
584
+ attrs.get("drs_specs", "MIP-DRS7"),
585
+ attrs.get("mip_era", "CMIP7"),
586
+ attrs.get("activity_id", "CMIP"),
587
+ attrs.get("institution_id", ""),
588
+ attrs.get("source_id", ""),
589
+ attrs.get("experiment_id", ""),
590
+ attrs.get("variant_label", ""),
591
+ attrs.get("region", "glb"),
592
+ attrs.get("frequency", "mon"),
593
+ attrs.get("variable_id", ""),
594
+ attrs.get("branding_suffix", ""),
595
+ attrs.get("grid_label", "gn"),
596
+ version_str,
597
+ ]
598
+ return "/".join(str(c) for c in components)