climate-ref-ilamb 0.5.4__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/PKG-INFO +6 -6
  2. climate_ref_ilamb-0.6.0/pyproject.toml +32 -0
  3. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/src/climate_ref_ilamb/__init__.py +4 -11
  4. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/configure/ilamb.yaml +59 -0
  5. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/configure/iomb.yaml +49 -0
  6. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/dataset_registry/ilamb.txt +13 -0
  7. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/dataset_registry/iomb.txt +4 -0
  8. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/dataset_registry/test.txt +3 -0
  9. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/src/climate_ref_ilamb/datasets.py +0 -2
  10. climate_ref_ilamb-0.6.0/src/climate_ref_ilamb/standard.py +294 -0
  11. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/tests/integration/test_diagnostics.py +2 -2
  12. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/tests/unit/test_standard_metrics.py +62 -7
  13. climate_ref_ilamb-0.5.4/pyproject.toml +0 -34
  14. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/configure/ilamb.yaml +0 -45
  15. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/configure/iomb.yaml +0 -27
  16. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/dataset_registry/ilamb.txt +0 -11
  17. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/dataset_registry/iomb.txt +0 -3
  18. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/dataset_registry/test.txt +0 -3
  19. climate_ref_ilamb-0.5.4/src/climate_ref_ilamb/standard.py +0 -207
  20. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/.gitignore +0 -0
  21. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/LICENCE +0 -0
  22. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/NOTICE +0 -0
  23. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/README.md +0 -0
  24. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/src/climate_ref_ilamb/py.typed +0 -0
  25. {climate_ref_ilamb-0.5.4 → climate_ref_ilamb-0.6.0}/tests/unit/test_provider.py +0 -0
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-ilamb
3
- Version: 0.5.4
3
+ Version: 0.6.0
4
4
  Summary: ILAMB diagnostic provider for the Rapid Evaluation Framework
5
- Author-email: Nathan Collier <nathaniel.collier@gmail.com>
6
- License: Apache-2.0
5
+ Author-email: Nathan Collier <nathaniel.collier@gmail.com>, Jared Lewis <jared.lewis@climate-resource.com>
6
+ License-Expression: Apache-2.0
7
7
  License-File: LICENCE
8
8
  License-File: NOTICE
9
- Classifier: Development Status :: 4 - Beta
9
+ Classifier: Development Status :: 3 - Alpha
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
12
13
  Classifier: Operating System :: OS Independent
13
14
  Classifier: Programming Language :: Python
14
15
  Classifier: Programming Language :: Python :: 3
@@ -18,8 +19,7 @@ Classifier: Programming Language :: Python :: 3.13
18
19
  Classifier: Topic :: Scientific/Engineering
19
20
  Requires-Python: >=3.11
20
21
  Requires-Dist: climate-ref-core
21
- Requires-Dist: ilamb3>=2025.4.28
22
- Requires-Dist: types-pyyaml>=6.0.12.20241230
22
+ Requires-Dist: ilamb3>=2025.5.20
23
23
  Description-Content-Type: text/markdown
24
24
 
25
25
  # climate-ref-ilamb
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "climate-ref-ilamb"
3
+ version = "0.6.0"
4
+ description = "ILAMB diagnostic provider for the Rapid Evaluation Framework"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Nathan Collier", email = "nathaniel.collier@gmail.com" },
8
+ { name = "Jared Lewis", email = "jared.lewis@climate-resource.com" },
9
+ ]
10
+ requires-python = ">=3.11"
11
+ license = "Apache-2.0"
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Operating System :: OS Independent",
15
+ "Intended Audience :: Developers",
16
+ "Intended Audience :: Science/Research",
17
+ "Programming Language :: Python",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Scientific/Engineering",
23
+ "License :: OSI Approved :: Apache Software License",
24
+ ]
25
+ dependencies = ["climate-ref-core", "ilamb3>=2025.5.20"]
26
+
27
+ [dependency-groups]
28
+ dev = ["types-pyyaml>=6.0.12"]
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
@@ -10,9 +10,8 @@ import importlib.resources
10
10
 
11
11
  import yaml
12
12
 
13
- from climate_ref_core.dataset_registry import dataset_registry_manager
13
+ from climate_ref_core.dataset_registry import DATASET_URL, dataset_registry_manager
14
14
  from climate_ref_core.providers import DiagnosticProvider
15
- from climate_ref_ilamb.datasets import ILAMB_DATA_VERSION
16
15
  from climate_ref_ilamb.standard import ILAMBStandard
17
16
 
18
17
  __version__ = importlib.metadata.version("climate-ref-ilamb")
@@ -22,27 +21,21 @@ provider = DiagnosticProvider("ILAMB", __version__)
22
21
  # Register some datasets
23
22
  dataset_registry_manager.register(
24
23
  "ilamb-test",
25
- base_url="https://www.ilamb.org/ILAMB-Data/DATA",
24
+ base_url=DATASET_URL,
26
25
  package="climate_ref_ilamb.dataset_registry",
27
26
  resource="test.txt",
28
- cache_name="ilamb3",
29
- version=ILAMB_DATA_VERSION,
30
27
  )
31
28
  dataset_registry_manager.register(
32
29
  "ilamb",
33
- base_url="https://www.ilamb.org/ILAMB-Data/DATA",
30
+ base_url=DATASET_URL,
34
31
  package="climate_ref_ilamb.dataset_registry",
35
32
  resource="ilamb.txt",
36
- cache_name="ilamb3",
37
- version=ILAMB_DATA_VERSION,
38
33
  )
39
34
  dataset_registry_manager.register(
40
35
  "iomb",
41
- base_url="https://www.ilamb.org/ilamb3-data/",
36
+ base_url=DATASET_URL,
42
37
  package="climate_ref_ilamb.dataset_registry",
43
38
  resource="iomb.txt",
44
- cache_name="ilamb3",
45
- version=ILAMB_DATA_VERSION,
46
39
  )
47
40
 
48
41
  # Dynamically register ILAMB diagnostics
@@ -0,0 +1,59 @@
1
+ registry: ilamb
2
+
3
+ gpp-WECANN:
4
+ sources:
5
+ # TODO: Update to use the obs4REF equiv
6
+ gpp: ilamb/gpp/WECANN/gpp.nc
7
+ relationships:
8
+ pr: ilamb/pr/GPCPv2.3/pr.nc
9
+ tas: ilamb/tas/CRU4.02/tas.nc
10
+ variable_cmap: Greens
11
+
12
+ gpp-FLUXNET2015:
13
+ sources:
14
+ gpp: ilamb/gpp/FLUXNET2015/gpp.nc
15
+ variable_cmap: Greens
16
+
17
+ mrro-LORA:
18
+ sources:
19
+ # TODO: Update to use the obs4REF equiv
20
+ mrro: ilamb/mrro/LORA/LORA.nc
21
+ variable_cmap: Blues
22
+
23
+ mrsos-WangMao:
24
+ sources:
25
+ mrsol: ilamb/mrsol/WangMao/mrsol_olc.nc
26
+ alternate_vars:
27
+ - mrsos
28
+ transform:
29
+ - select_depth:
30
+ value: 0
31
+ - soil_moisture_to_vol_fraction
32
+ variable_cmap: Blues
33
+
34
+ cSoil-HWSD2:
35
+ sources:
36
+ cSoil: ilamb/cSoil/HWSD2/cSoil_fx_HWSD2_19600101-20220101.nc
37
+ variable_cmap: viridis
38
+
39
+ lai-AVH15C1:
40
+ sources:
41
+ lai: ilamb/lai/AVH15C1/lai.nc
42
+ variable_cmap: Greens
43
+
44
+ nbp-Hoffman:
45
+ analyses:
46
+ - nbp
47
+ sources:
48
+ # TODO: Update to use the obs4REF equiv
49
+ nbp: ilamb/nbp/HOFFMAN/nbp_1850-2010.nc
50
+
51
+ snc-ESACCI:
52
+ sources:
53
+ snc: ilamb/snc/Snow-cci/snc_mon_Snow-cci_BE_gn_198201-201906.nc
54
+
55
+ burntFractionAll-GFED:
56
+ sources:
57
+ burntArea: ilamb/burntFractionAll/GFED/burntArea.nc
58
+ alternate_vars:
59
+ - burntFractionAll
@@ -0,0 +1,49 @@
1
+ registry: iomb
2
+
3
+ thetao-WOA2023-surface:
4
+ sources:
5
+ # TODO: Update to use the obs4REF equiv
6
+ thetao: ilamb/WOA/thetao_mon_WOA_A5B4_gn_200501-201412.nc
7
+ variable_cmap: Reds
8
+ transform:
9
+ - select_depth:
10
+ value: 0
11
+ alternate_vars:
12
+ - tos
13
+
14
+ so-WOA2023-surface:
15
+ sources:
16
+ # TODO: Update to use the obs4REF equiv
17
+ so: ilamb/WOA/so_mon_WOA_A5B4_gn_200501-201412.nc
18
+ transform:
19
+ - select_depth:
20
+ value: 0
21
+ variable_cmap: YlGn
22
+ alternate_vars:
23
+ - sos
24
+
25
+ amoc-RAPID:
26
+ analyses:
27
+ - timeseries
28
+ related_vars:
29
+ - msftmz
30
+ transform:
31
+ - msftmz_to_rapid
32
+ sources:
33
+ # TODO: Update to use the obs4REF equiv
34
+ amoc: ilamb/RAPID/amoc_mon_RAPID_BE_NA_200404-202302.nc
35
+
36
+ ohc-NOAA:
37
+ sources:
38
+ ohc: ilamb/NOAA/ohc_yr_OHC_BE_gm_200506-202406.nc
39
+ related_vars:
40
+ - thetao
41
+ - volcello
42
+ transform:
43
+ - select_depth:
44
+ min: 0
45
+ max: 2000
46
+ - ocean_heat_content:
47
+ reference_year: 2005
48
+ analyses:
49
+ - accumulate
@@ -0,0 +1,13 @@
1
+ ilamb/cSoil/HWSD2/cSoil_fx_HWSD2_19600101-20220101.nc sha1:7138b0b53aa600878adf95e6aef65f4322a8e287
2
+ ilamb/gpp/FLUXNET2015/gpp.nc sha1:16fd177e007caef2565687e2cd32884e20ef16e5
3
+ ilamb/gpp/WECANN/gpp.nc sha1:6e864a6ae201195cdf995a3a81720188af441e13
4
+ ilamb/lai/AVH15C1/lai.nc sha1:ccace4f84912d63acbb9ee09ee7b743412207a0d
5
+ ilamb/mrro/LORA/LORA.nc sha1:72bb16787877591d0c54a36d74697d0d208f985a
6
+ ilamb/mrsol/WangMao/mrsol_olc.nc sha1:24cbc9df69569bed3a39c20e499cfe4f911bd30e
7
+ ilamb/regions/GlobalLand.nc sha1:2f987d44fdba6ad0e72d14d6a2fecb7e8df2a9c5
8
+ ilamb/regions/Koppen_coarse.nc sha1:e464030db49f0295a6a22a81ca602b0f3c499b72
9
+ ilamb/pr/GPCPv2.3/pr.nc sha1:e1b942863ec76a75aa972b6d75e2e08646741259
10
+ ilamb/tas/CRU4.02/tas.nc sha1:2674da18a1a93483b50b1626e7a7ab741bf53d09
11
+ ilamb/nbp/HOFFMAN/nbp_1850-2010.nc sha1:8350af00614d6afc6b70ad314aa499a9ece80ec2
12
+ ilamb/snc/Snow-cci/snc_mon_Snow-cci_BE_gn_198201-201906.nc sha1:c0bfecd2f8b886e9301428d28bb6ff0507601be2
13
+ ilamb/burntFractionAll/GFED/burntArea.nc sha1:cf9d73c6a8bfc594737c9ba6ca4df613df4a28ab
@@ -0,0 +1,4 @@
1
+ ilamb/WOA/so_mon_WOA_A5B4_gn_200501-201412.nc sha1:831c42c3b2ba443c255150289a2c725d7f3e5838
2
+ ilamb/WOA/thetao_mon_WOA_A5B4_gn_200501-201412.nc sha1:86d9056208291d76233e65b26c658c1fa54c3ea6
3
+ ilamb/RAPID/amoc_mon_RAPID_BE_NA_200404-202302.nc sha1:3efe773e5c2a3c832977791ff7fd9cb9f473fe65
4
+ ilamb/NOAA/ohc_yr_OHC_BE_gm_200506-202406.nc sha1:a918799d8e24e4f0015b9047a74d470ae9f0445c
@@ -0,0 +1,3 @@
1
+ ilamb/test/Site/tas.nc sha1:3e757c9feadea581a92e6f4afb402b86168e113a
2
+ ilamb/test/Grid/gpp.nc sha1:2688085f95fa13617b20c723c66b0009bd661847
3
+ ilamb/test/Grid/pr.nc sha1:d667eb216939f5797d59b59cb4ade5ec31e67659
@@ -7,8 +7,6 @@ import pooch
7
7
 
8
8
  from climate_ref_core.datasets import DatasetCollection
9
9
 
10
- ILAMB_DATA_VERSION = "0.1" # we don't really have data versions for the collection :/
11
-
12
10
 
13
11
  def registry_to_collection(registry: pooch.Pooch) -> DatasetCollection:
14
12
  """
@@ -0,0 +1,294 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import ilamb3 # type: ignore
5
+ import ilamb3.regions as ilr # type: ignore
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import pooch
9
+ from ilamb3 import run
10
+
11
+ from climate_ref_core.constraints import AddSupplementaryDataset
12
+ from climate_ref_core.dataset_registry import dataset_registry_manager
13
+ from climate_ref_core.datasets import FacetFilter, SourceDatasetType
14
+ from climate_ref_core.diagnostics import (
15
+ DataRequirement,
16
+ Diagnostic,
17
+ ExecutionDefinition,
18
+ ExecutionResult,
19
+ )
20
+ from climate_ref_core.pycmec.metric import CMECMetric
21
+ from climate_ref_core.pycmec.output import CMECOutput
22
+ from climate_ref_ilamb.datasets import (
23
+ registry_to_collection,
24
+ )
25
+
26
+
27
+ def format_cmec_output_bundle(
28
+ dataset: pd.DataFrame,
29
+ dimensions: list[str],
30
+ metadata_columns: list[str],
31
+ value_column: str = "value",
32
+ ) -> dict[str, Any]:
33
+ """
34
+ Create a CMEC output bundle for the dataset.
35
+
36
+ Parameters
37
+ ----------
38
+ dataset
39
+ Processed dataset
40
+ dimensions
41
+ The dimensions of the dataset (e.g., ["source_id", "member_id", "region"])
42
+ metadata_columns
43
+ The columns to be used as metadata (e.g., ["Description", "LongName"])
44
+ value_column
45
+ The column containing the values
46
+
47
+ Returns
48
+ -------
49
+ A CMEC output bundle ready to be written to disk
50
+ """
51
+ # Validate that all required columns exist
52
+ required_columns = set(dimensions) | {value_column} | set(metadata_columns)
53
+ missing_columns = required_columns - set(dataset.columns)
54
+ if missing_columns:
55
+ raise ValueError(f"Missing required columns: {missing_columns}")
56
+
57
+ # Build the dimensions section
58
+ dimensions_dict: dict[str, dict[str, dict[str, str]]] = {}
59
+
60
+ # For each dimension, create a dictionary of unique values and their metadata
61
+ for dim in dimensions:
62
+ unique_values = dataset[dim].unique()
63
+ dim_dict: dict[str, dict[str, str]] = {}
64
+
65
+ for val in unique_values:
66
+ # Get the row for this dimension value
67
+
68
+ dim_dict[str(val)] = {}
69
+
70
+ if dim == dimensions[-1]:
71
+ # If this is the last dimension, add the value column to the metadata
72
+
73
+ dim_dict[str(val)] = dataset[dataset[dim] == val].iloc[0][metadata_columns].to_dict()
74
+
75
+ dimensions_dict[dim] = dim_dict
76
+
77
+ # Build the results section - create nested structure based on dimensions
78
+ def nest_results(df: pd.DataFrame, dims: list[str]) -> dict[str, Any] | float:
79
+ if not dims:
80
+ return float(df[value_column].iloc[0].item())
81
+
82
+ current_dim = dims[0]
83
+ remaining_dims = dims[1:]
84
+
85
+ return {
86
+ str(group_name): nest_results(group_df, remaining_dims)
87
+ for group_name, group_df in df.groupby(current_dim)
88
+ }
89
+
90
+ results = nest_results(dataset, list(dimensions))
91
+
92
+ return {"DIMENSIONS": {"json_structure": list(dimensions), **dimensions_dict}, "RESULTS": results}
93
+
94
+
95
+ def _build_cmec_bundle(df: pd.DataFrame) -> dict[str, Any]:
96
+ """
97
+ Build a CMEC bundle from information in the dataframe.
98
+
99
+ """
100
+ # TODO: Handle the reference data
101
+ # reference_df = df[df["source"] == "Reference"]
102
+ model_df = df[df["source"] != "Reference"]
103
+
104
+ # Source is formatted as "ACCESS-ESM1-5-r1i1p1f1-gn"
105
+ # This assumes that the member_id and grid_label are always the last two parts of the source string
106
+ # and don't contain '-'
107
+ extracted_source = model_df.source.str.extract(r"([\w-]+)-([\w\d]+)-([\w\d]+)")
108
+ model_df["source_id"] = extracted_source[0]
109
+ model_df["member_id"] = extracted_source[1]
110
+ model_df["grid_label"] = extracted_source[2]
111
+
112
+ # Strip out units from the name
113
+ # These are available in the attributes
114
+ extracted_source = model_df.name.str.extract(r"(.*)\s\[.*\]")
115
+ model_df["name"] = extracted_source[0]
116
+
117
+ model_df = model_df.rename(
118
+ columns={
119
+ "analysis": "metric",
120
+ "name": "statistic",
121
+ }
122
+ )
123
+
124
+ # Convert the value column to numeric, coercing errors to NaN
125
+ model_df["value"] = pd.to_numeric(model_df["value"], errors="coerce")
126
+
127
+ dimensions = ["experiment_id", "source_id", "member_id", "grid_label", "region", "metric", "statistic"]
128
+ attributes = ["type", "units"]
129
+
130
+ bundle = format_cmec_output_bundle(
131
+ model_df,
132
+ dimensions=dimensions,
133
+ metadata_columns=attributes,
134
+ value_column="value",
135
+ )
136
+
137
+ ilamb_regions = ilr.Regions()
138
+ for region, region_info in bundle["DIMENSIONS"]["region"].items():
139
+ if region == "None":
140
+ region_info["LongName"] = "None"
141
+ region_info["Description"] = "Reference data extents"
142
+ region_info["Generator"] = "N/A"
143
+ else:
144
+ region_info["LongName"] = ilamb_regions.get_name(region)
145
+ region_info["Description"] = ilamb_regions.get_name(region)
146
+ region_info["Generator"] = ilamb_regions.get_source(region)
147
+
148
+ return bundle
149
+
150
+
151
+ def _form_bundles(df: pd.DataFrame) -> tuple[CMECMetric, CMECOutput]:
152
+ """
153
+ Create the output bundles (really a lift to make Ruff happy with the size of run()).
154
+ """
155
+ metric_bundle = _build_cmec_bundle(df)
156
+ output_bundle = CMECOutput.create_template()
157
+ return CMECMetric.model_validate(metric_bundle), CMECOutput.model_validate(output_bundle)
158
+
159
+
160
+ def _set_ilamb3_options(registry: pooch.Pooch, registry_file: str) -> None:
161
+ """
162
+ Set options for ILAMB based on which registry file is being used.
163
+ """
164
+ ilamb3.conf.reset()
165
+ ilamb_regions = ilr.Regions()
166
+ if registry_file == "ilamb":
167
+ ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/GlobalLand.nc"))
168
+ ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/Koppen_coarse.nc"))
169
+ ilamb3.conf.set(regions=["global", "tropical"])
170
+
171
+
172
+ def _load_csv_and_merge(output_directory: Path) -> pd.DataFrame:
173
+ """
174
+ Load individual csv scalar data and merge into a dataframe.
175
+ """
176
+ df = pd.concat(
177
+ [pd.read_csv(f, keep_default_na=False, na_values=["NaN"]) for f in output_directory.glob("*.csv")]
178
+ ).drop_duplicates(subset=["source", "region", "analysis", "name"])
179
+ return df
180
+
181
+
182
+ class ILAMBStandard(Diagnostic):
183
+ """
184
+ Apply the standard ILAMB analysis with respect to a given reference dataset.
185
+ """
186
+
187
+ def __init__(
188
+ self,
189
+ registry_file: str,
190
+ metric_name: str,
191
+ sources: dict[str, str],
192
+ **ilamb_kwargs: Any,
193
+ ):
194
+ # Setup the diagnostic
195
+ if len(sources) != 1:
196
+ raise ValueError("Only single source ILAMB diagnostics have been implemented.")
197
+ self.variable_id = next(iter(sources.keys()))
198
+ if "sources" not in ilamb_kwargs: # pragma: no cover
199
+ ilamb_kwargs["sources"] = sources
200
+ if "relationships" not in ilamb_kwargs:
201
+ ilamb_kwargs["relationships"] = {}
202
+ self.ilamb_kwargs = ilamb_kwargs
203
+
204
+ # REF stuff
205
+ self.name = metric_name
206
+ self.slug = self.name.lower().replace(" ", "-")
207
+ self.data_requirements = (
208
+ DataRequirement(
209
+ source_type=SourceDatasetType.CMIP6,
210
+ filters=(
211
+ FacetFilter(
212
+ facets={
213
+ "variable_id": (
214
+ self.variable_id,
215
+ *ilamb_kwargs.get("relationships", {}).keys(),
216
+ *ilamb_kwargs.get("alternate_vars", []),
217
+ *ilamb_kwargs.get("related_vars", []),
218
+ )
219
+ }
220
+ ),
221
+ FacetFilter(facets={"frequency": ("mon",)}),
222
+ FacetFilter(facets={"experiment_id": ("historical", "land-hist")}),
223
+ # Exclude unneeded snc tables
224
+ FacetFilter(facets={"table_id": ("ImonAnt", "ImonGre")}, keep=False),
225
+ ),
226
+ constraints=(
227
+ AddSupplementaryDataset.from_defaults("areacella", SourceDatasetType.CMIP6),
228
+ AddSupplementaryDataset.from_defaults("sftlf", SourceDatasetType.CMIP6),
229
+ )
230
+ if registry_file == "ilamb"
231
+ else (
232
+ AddSupplementaryDataset.from_defaults("areacello", SourceDatasetType.CMIP6),
233
+ AddSupplementaryDataset.from_defaults("sftof", SourceDatasetType.CMIP6),
234
+ ),
235
+ group_by=("experiment_id",),
236
+ ),
237
+ )
238
+ self.facets = (
239
+ "experiment_id",
240
+ "source_id",
241
+ "member_id",
242
+ "grid_label",
243
+ "region",
244
+ "metric",
245
+ "statistic",
246
+ )
247
+
248
+ # Setup ILAMB data and options
249
+ self.registry_file = registry_file
250
+ self.registry = dataset_registry_manager[self.registry_file]
251
+ self.ilamb_data = registry_to_collection(
252
+ dataset_registry_manager[self.registry_file],
253
+ )
254
+
255
+ def execute(self, definition: ExecutionDefinition) -> None:
256
+ """
257
+ Run the ILAMB standard analysis.
258
+ """
259
+ plt.rcParams.update({"figure.max_open_warning": 0})
260
+ _set_ilamb3_options(self.registry, self.registry_file)
261
+ ref_datasets = self.ilamb_data.datasets.set_index(self.ilamb_data.slug_column)
262
+ run.run_simple(
263
+ ref_datasets,
264
+ self.slug,
265
+ definition.datasets[SourceDatasetType.CMIP6].datasets,
266
+ definition.output_directory,
267
+ **self.ilamb_kwargs,
268
+ )
269
+
270
+ def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
271
+ """
272
+ Build the diagnostic result after running ILAMB.
273
+
274
+ Parameters
275
+ ----------
276
+ definition
277
+ The definition of the diagnostic execution
278
+
279
+ Returns
280
+ -------
281
+ An execution result object
282
+ """
283
+ selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
284
+ _set_ilamb3_options(self.registry, self.registry_file)
285
+
286
+ df = _load_csv_and_merge(definition.output_directory)
287
+ # Add the selectors to the dataframe
288
+ for key, value in selectors.items():
289
+ df[key] = value
290
+ metric_bundle, output_bundle = _form_bundles(df)
291
+
292
+ return ExecutionResult.build_from_output_bundle(
293
+ definition, cmec_output_bundle=output_bundle, cmec_metric_bundle=metric_bundle
294
+ )
@@ -4,8 +4,7 @@ from climate_ref_ilamb import provider as ilamb_provider
4
4
  from climate_ref_core.diagnostics import Diagnostic
5
5
 
6
6
  skipped_diagnostics = [
7
- "csoil-hwsd2", # Incorrect time spans
8
- "nbp-hoffman", # Incorrect time spans
7
+ "ohc-noaa", # Missing sample data
9
8
  ]
10
9
 
11
10
  diagnostics = [
@@ -35,3 +34,4 @@ def test_build_results(diagnostic: Diagnostic, diagnostic_validation):
35
34
 
36
35
  definition = validator.get_regression_definition()
37
36
  validator.validate(definition)
37
+ validator.execution_regression.check(definition.key, definition.output_directory)
@@ -1,14 +1,17 @@
1
1
  import ilamb3
2
+ import pandas as pd
2
3
  import pytest
3
4
  from climate_ref_ilamb.standard import ILAMBStandard, _set_ilamb3_options
5
+ from climate_ref_pmp import provider as ilamb_provider
4
6
 
7
+ from climate_ref.solver import solve_executions
5
8
  from climate_ref_core.dataset_registry import dataset_registry_manager
6
- from climate_ref_core.datasets import DatasetCollection
9
+ from climate_ref_core.datasets import DatasetCollection, SourceDatasetType
7
10
 
8
11
 
9
12
  def test_standard_site(cmip6_data_catalog, definition_factory):
10
13
  diagnostic = ILAMBStandard(
11
- registry_file="ilamb-test", metric_name="test-site-tas", sources={"tas": "test/Site/tas.nc"}
14
+ registry_file="ilamb-test", metric_name="test-site-tas", sources={"tas": "ilamb/test/Site/tas.nc"}
12
15
  )
13
16
  ds = (
14
17
  cmip6_data_catalog[
@@ -19,7 +22,10 @@ def test_standard_site(cmip6_data_catalog, definition_factory):
19
22
  .first()
20
23
  )
21
24
 
22
- definition = definition_factory(diagnostic=diagnostic, cmip6=DatasetCollection(ds, "instance_id"))
25
+ definition = definition_factory(
26
+ diagnostic=diagnostic,
27
+ cmip6=DatasetCollection(ds, "instance_id", selector=(("experiment_id", "historical"),)),
28
+ )
23
29
  definition.output_directory.mkdir(parents=True, exist_ok=True)
24
30
 
25
31
  result = diagnostic.run(definition)
@@ -45,8 +51,8 @@ def test_standard_grid(cmip6_data_catalog, definition_factory):
45
51
  diagnostic = ILAMBStandard(
46
52
  registry_file="ilamb-test",
47
53
  metric_name="test-grid-gpp",
48
- sources={"gpp": "test/Grid/gpp.nc"},
49
- relationships={"pr": "test/Grid/pr.nc"},
54
+ sources={"gpp": "ilamb/test/Grid/gpp.nc"},
55
+ relationships={"pr": "ilamb/test/Grid/pr.nc"},
50
56
  )
51
57
  grp = cmip6_data_catalog[
52
58
  (cmip6_data_catalog["experiment_id"] == "historical")
@@ -54,7 +60,10 @@ def test_standard_grid(cmip6_data_catalog, definition_factory):
54
60
  ].groupby(["source_id", "member_id", "grid_label"])
55
61
  _, ds = next(iter(grp))
56
62
 
57
- definition = definition_factory(diagnostic=diagnostic, cmip6=DatasetCollection(ds, "instance_id"))
63
+ definition = definition_factory(
64
+ diagnostic=diagnostic,
65
+ cmip6=DatasetCollection(ds, "instance_id", selector=(("experiment_id", "historical"),)),
66
+ )
58
67
  definition.output_directory.mkdir(parents=True, exist_ok=True)
59
68
 
60
69
  result = diagnostic.run(definition)
@@ -81,10 +90,56 @@ def test_standard_fail():
81
90
  ILAMBStandard(
82
91
  registry_file="ilamb-test",
83
92
  metric_name="test-fail",
84
- sources={"gpp": "test/Grid/gpp.nc", "pr": "test/Grid/pr.nc"},
93
+ sources={"gpp": "ilamb/test/Grid/gpp.nc", "pr": "ilamb/test/Grid/pr.nc"},
85
94
  )
86
95
 
87
96
 
88
97
  def test_options():
89
98
  _set_ilamb3_options(dataset_registry_manager["ilamb"], "ilamb")
90
99
  assert set(["global", "tropical"]).issubset(ilamb3.conf["regions"])
100
+
101
+
102
+ def test_expected_executions():
103
+ diagnostic = ILAMBStandard(
104
+ registry_file="ilamb",
105
+ metric_name="cSoil-HWSD2",
106
+ sources={"cSoil": "ilamb/cSoil/HWSD2/cSoil_fx_HWSD2_19600101-20220101.nc"},
107
+ )
108
+
109
+ # No Obs4MIPs datasets are used yet
110
+ data_catalog = {
111
+ SourceDatasetType.CMIP6: pd.DataFrame(
112
+ [
113
+ ["cSoil", "ACCESS-ESM1-5", "historical", "r1i1p1f1", "mon", "gn", "Amon", "v20191115"],
114
+ ["cSoil", "ACCESS-ESM1-5", "ssp119", "r1i1p1f1", "mon", "gn", "Amon", "v20191115"],
115
+ ["cSoil", "ACCESS-ESM1-5", "historical", "r2i1p1f1", "mon", "gn", "Amon", "v20191115"],
116
+ ["ts", "ACCESS-ESM1-5", "historical", "r1i1p1f1", "mon", "gn", "Amon", "v20191115"],
117
+ ["areacella", "ACCESS-ESM1-5", "fx", "r1i1p1f1", "mon", "gn", "Amon", "v20191115"],
118
+ ],
119
+ columns=(
120
+ "variable_id",
121
+ "source_id",
122
+ "experiment_id",
123
+ "member_id",
124
+ "frequency",
125
+ "grid_label",
126
+ "table_id",
127
+ "version",
128
+ ),
129
+ ),
130
+ }
131
+ executions = list(solve_executions(data_catalog, diagnostic, provider=ilamb_provider))
132
+ assert len(executions) == 1
133
+
134
+ # ts
135
+ assert executions[0].datasets[SourceDatasetType.CMIP6].selector == (("experiment_id", "historical"),)
136
+ assert executions[0].datasets[SourceDatasetType.CMIP6].datasets["variable_id"].tolist() == [
137
+ "cSoil",
138
+ "cSoil",
139
+ "areacella",
140
+ ]
141
+ assert executions[0].datasets[SourceDatasetType.CMIP6].datasets["member_id"].tolist() == [
142
+ "r1i1p1f1",
143
+ "r2i1p1f1",
144
+ "r1i1p1f1",
145
+ ]
@@ -1,34 +0,0 @@
1
- [project]
2
- name = "climate-ref-ilamb"
3
- version = "0.5.4"
4
- description = "ILAMB diagnostic provider for the Rapid Evaluation Framework"
5
- readme = "README.md"
6
- authors = [{ name = "Nathan Collier", email = "nathaniel.collier@gmail.com" }]
7
- requires-python = ">=3.11"
8
- classifiers = [
9
- "Development Status :: 4 - Beta",
10
- "Intended Audience :: Developers",
11
- "Operating System :: OS Independent",
12
- "Intended Audience :: Science/Research",
13
- "Programming Language :: Python",
14
- "Programming Language :: Python :: 3",
15
- "Programming Language :: Python :: 3.11",
16
- "Programming Language :: Python :: 3.12",
17
- "Programming Language :: Python :: 3.13",
18
- "Topic :: Scientific/Engineering",
19
- ]
20
- dependencies = [
21
- "climate-ref-core",
22
- "ilamb3>=2025.4.28",
23
- "types-pyyaml>=6.0.12.20241230",
24
- ]
25
-
26
- [project.license]
27
- text = "Apache-2.0"
28
-
29
- [tool.uv]
30
- dev-dependencies = []
31
-
32
- [build-system]
33
- requires = ["hatchling"]
34
- build-backend = "hatchling.build"
@@ -1,45 +0,0 @@
1
- registry: ilamb
2
-
3
- gpp-WECANN:
4
- sources:
5
- gpp: gpp/WECANN/gpp.nc
6
- relationships:
7
- pr: pr/GPCPv2.3/pr.nc
8
- tas: tas/CRU4.02/tas.nc
9
- variable_cmap: Greens
10
-
11
- gpp-FLUXNET2015:
12
- sources:
13
- gpp: gpp/FLUXNET2015/gpp.nc
14
- variable_cmap: Greens
15
-
16
- mrro-LORA:
17
- sources:
18
- mrro: mrro/LORA/LORA.nc
19
- variable_cmap: Blues
20
-
21
- mrsos-WangMao:
22
- sources:
23
- mrsol: mrsol/WangMao/mrsol_olc.nc
24
- alternate_vars:
25
- - mrsos
26
- depth: 0.0
27
- transform:
28
- - soil_moisture_to_vol_fraction
29
- variable_cmap: Blues
30
-
31
- cSoil-HWSD2:
32
- sources:
33
- cSoil: cSoil/HWSD2/hwsd2_cSoil.nc
34
- variable_cmap: viridis
35
-
36
- lai-AVH15C1:
37
- sources:
38
- lai: lai/AVH15C1/lai.nc
39
- variable_cmap: Greens
40
-
41
- nbp-Hoffman:
42
- analyses:
43
- - nbp
44
- sources:
45
- nbp: nbp/HOFFMAN/nbp_1850-2010.nc
@@ -1,27 +0,0 @@
1
- registry: iomb
2
-
3
- thetao-WOA2023-surface:
4
- sources:
5
- thetao: WOA/thetao_mon_WOA_A5B4_gn_200501-201412.nc
6
- variable_cmap: Reds
7
- depth: 0.0
8
- alternate_vars:
9
- - tos
10
-
11
- so-WOA2023-surface:
12
- sources:
13
- so: WOA/so_mon_WOA_A5B4_gn_200501-201412.nc
14
- variable_cmap: YlGn
15
- depth: 0.0
16
- alternate_vars:
17
- - sos
18
-
19
- amoc-RAPID:
20
- analyses:
21
- - timeseries
22
- alternate_vars:
23
- - msftmz
24
- transform:
25
- - msftmz_to_rapid
26
- sources:
27
- amoc: RAPID/amoc_mon_RAPID_BE_NA_200404-202302.nc
@@ -1,11 +0,0 @@
1
- cSoil/HWSD2/hwsd2_cSoil.nc sha1:9a6377e4c5ff457c08c194d2c376c46e003a4f84
2
- gpp/FLUXNET2015/gpp.nc sha1:16fd177e007caef2565687e2cd32884e20ef16e5
3
- gpp/WECANN/gpp.nc sha1:6e864a6ae201195cdf995a3a81720188af441e13
4
- lai/AVH15C1/lai.nc sha1:ccace4f84912d63acbb9ee09ee7b743412207a0d
5
- mrro/LORA/LORA.nc sha1:72bb16787877591d0c54a36d74697d0d208f985a
6
- mrsol/WangMao/mrsol_olc.nc sha1:24cbc9df69569bed3a39c20e499cfe4f911bd30e
7
- regions/GlobalLand.nc sha1:2f987d44fdba6ad0e72d14d6a2fecb7e8df2a9c5
8
- regions/Koppen_coarse.nc sha1:e464030db49f0295a6a22a81ca602b0f3c499b72
9
- pr/GPCPv2.3/pr.nc sha1:e1b942863ec76a75aa972b6d75e2e08646741259
10
- tas/CRU4.02/tas.nc sha1:2674da18a1a93483b50b1626e7a7ab741bf53d09
11
- nbp/HOFFMAN/nbp_1850-2010.nc sha1:8350af00614d6afc6b70ad314aa499a9ece80ec2
@@ -1,3 +0,0 @@
1
- WOA/so_mon_WOA_A5B4_gn_200501-201412.nc sha1:831c42c3b2ba443c255150289a2c725d7f3e5838
2
- WOA/thetao_mon_WOA_A5B4_gn_200501-201412.nc sha1:86d9056208291d76233e65b26c658c1fa54c3ea6
3
- RAPID/amoc_mon_RAPID_BE_NA_200404-202302.nc sha1:3efe773e5c2a3c832977791ff7fd9cb9f473fe65
@@ -1,3 +0,0 @@
1
- test/Site/tas.nc sha1:3e757c9feadea581a92e6f4afb402b86168e113a
2
- test/Grid/gpp.nc sha1:2688085f95fa13617b20c723c66b0009bd661847
3
- test/Grid/pr.nc sha1:d667eb216939f5797d59b59cb4ade5ec31e67659
@@ -1,207 +0,0 @@
1
- from pathlib import Path
2
- from typing import Any
3
-
4
- import ilamb3 # type: ignore
5
- import ilamb3.regions as ilr # type: ignore
6
- import matplotlib.pyplot as plt
7
- import pandas as pd
8
- import pooch
9
- from ilamb3 import run
10
-
11
- from climate_ref_core.dataset_registry import dataset_registry_manager
12
- from climate_ref_core.datasets import FacetFilter, SourceDatasetType
13
- from climate_ref_core.diagnostics import (
14
- DataRequirement,
15
- Diagnostic,
16
- ExecutionDefinition,
17
- ExecutionResult,
18
- )
19
- from climate_ref_core.pycmec.metric import CMECMetric
20
- from climate_ref_core.pycmec.output import CMECOutput
21
- from climate_ref_ilamb.datasets import (
22
- registry_to_collection,
23
- )
24
-
25
-
26
- def _build_cmec_bundle(name: str, df: pd.DataFrame) -> dict[str, Any]:
27
- """
28
- Build a CMEC bundle from information in the dataframe.
29
-
30
- TODO: Migrate to use pycmec when ready.
31
- TODO: Add plots and html output.
32
- """
33
- ilamb_regions = ilr.Regions()
34
- bundle = {
35
- "DIMENSIONS": {
36
- "json_structure": ["region", "model", "metric", "statistic"],
37
- "region": {
38
- r: {
39
- "LongName": "None" if r == "None" else ilamb_regions.get_name(r),
40
- "Description": "Reference data extents" if r == "None" else ilamb_regions.get_name(r),
41
- "Generator": "N/A" if r == "None" else ilamb_regions.get_source(r),
42
- }
43
- for r in df["region"].unique()
44
- },
45
- "model": {m: {"Description": m, "Source": m} for m in df["source"].unique() if m != "Reference"},
46
- "metric": {
47
- name: {
48
- "Name": name,
49
- "Abstract": "benchmark score",
50
- "URI": [
51
- "https://www.osti.gov/biblio/1330803",
52
- "https://doi.org/10.1029/2018MS001354",
53
- ],
54
- "Contact": "forrest AT climatemodeling.org",
55
- }
56
- },
57
- "statistic": {s: {} for s in df["name"].unique()},
58
- },
59
- "RESULTS": {
60
- r: {
61
- m: {
62
- name: {
63
- s: float(
64
- df[(df["source"] == m) & (df["region"] == r) & (df["name"] == s)].iloc[0]["value"]
65
- )
66
- for s in df["name"].unique()
67
- }
68
- }
69
- for m in df["source"].unique()
70
- if m != "Reference"
71
- }
72
- for r in df["region"].unique()
73
- },
74
- }
75
- return bundle
76
-
77
-
78
- def _form_bundles(key: str, df: pd.DataFrame) -> tuple[CMECMetric, CMECOutput]:
79
- """
80
- Create the output bundles (really a lift to make Ruff happy with the size of run()).
81
- """
82
- metric_bundle = _build_cmec_bundle(key, df)
83
- output_bundle = CMECOutput.create_template()
84
- return CMECMetric.model_validate(metric_bundle), CMECOutput.model_validate(output_bundle)
85
-
86
-
87
- def _set_ilamb3_options(registry: pooch.Pooch, registry_file: str) -> None:
88
- """
89
- Set options for ILAMB based on which registry file is being used.
90
- """
91
- ilamb3.conf.reset()
92
- ilamb_regions = ilr.Regions()
93
- if registry_file == "ilamb":
94
- ilamb_regions.add_netcdf(registry.fetch("regions/GlobalLand.nc"))
95
- ilamb_regions.add_netcdf(registry.fetch("regions/Koppen_coarse.nc"))
96
- ilamb3.conf.set(regions=["global", "tropical"])
97
-
98
-
99
- def _measure_facets(registry_file: str) -> list[str]:
100
- """
101
- Set options for ILAMB based on which registry file is being used.
102
- """
103
- if registry_file == "ilamb":
104
- return ["areacella", "sftlf"]
105
- return []
106
-
107
-
108
- def _load_csv_and_merge(output_directory: Path) -> pd.DataFrame:
109
- """
110
- Load individual csv scalar data and merge into a dataframe.
111
- """
112
- df = pd.concat(
113
- [pd.read_csv(f, keep_default_na=False, na_values=["NaN"]) for f in output_directory.glob("*.csv")]
114
- ).drop_duplicates(subset=["source", "region", "analysis", "name"])
115
- return df
116
-
117
-
118
- class ILAMBStandard(Diagnostic):
119
- """
120
- Apply the standard ILAMB analysis with respect to a given reference dataset.
121
- """
122
-
123
- def __init__(
124
- self,
125
- registry_file: str,
126
- metric_name: str,
127
- sources: dict[str, str],
128
- **ilamb_kwargs: Any,
129
- ):
130
- # Setup the diagnostic
131
- if len(sources) != 1:
132
- raise ValueError("Only single source ILAMB diagnostics have been implemented.")
133
- self.variable_id = next(iter(sources.keys()))
134
- if "sources" not in ilamb_kwargs: # pragma: no cover
135
- ilamb_kwargs["sources"] = sources
136
- if "relationships" not in ilamb_kwargs:
137
- ilamb_kwargs["relationships"] = {}
138
- self.ilamb_kwargs = ilamb_kwargs
139
-
140
- # REF stuff
141
- self.name = metric_name
142
- self.slug = self.name.lower().replace(" ", "-")
143
- self.data_requirements = (
144
- DataRequirement(
145
- source_type=SourceDatasetType.CMIP6,
146
- filters=(
147
- FacetFilter(
148
- facets={
149
- "variable_id": (
150
- self.variable_id,
151
- *ilamb_kwargs.get("relationships", {}).keys(),
152
- *ilamb_kwargs.get("alternate_vars", []),
153
- *_measure_facets(registry_file),
154
- )
155
- }
156
- ),
157
- FacetFilter(facets={"frequency": ("mon", "fx")}),
158
- FacetFilter(facets={"experiment_id": ("historical", "land-hist")}),
159
- ),
160
- group_by=("experiment_id",),
161
- ),
162
- )
163
- self.facets = ("region", "model", "metric", "statistic")
164
-
165
- # Setup ILAMB data and options
166
- self.registry_file = registry_file
167
- self.registry = dataset_registry_manager[self.registry_file]
168
- self.ilamb_data = registry_to_collection(
169
- dataset_registry_manager[self.registry_file],
170
- )
171
-
172
- def execute(self, definition: ExecutionDefinition) -> None:
173
- """
174
- Run the ILAMB standard analysis.
175
- """
176
- plt.rcParams.update({"figure.max_open_warning": 0})
177
- _set_ilamb3_options(self.registry, self.registry_file)
178
- ref_datasets = self.ilamb_data.datasets.set_index(self.ilamb_data.slug_column)
179
- run.run_simple(
180
- ref_datasets,
181
- self.slug,
182
- definition.datasets[SourceDatasetType.CMIP6].datasets,
183
- definition.output_directory,
184
- **self.ilamb_kwargs,
185
- )
186
-
187
- def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
188
- """
189
- Build the diagnostic result after running ILAMB.
190
-
191
- Parameters
192
- ----------
193
- definition
194
- The definition of the diagnostic execution
195
-
196
- Returns
197
- -------
198
- An execution result object
199
- """
200
- _set_ilamb3_options(self.registry, self.registry_file)
201
-
202
- df = _load_csv_and_merge(definition.output_directory)
203
- metric_bundle, output_bundle = _form_bundles(definition.key, df)
204
-
205
- return ExecutionResult.build_from_output_bundle(
206
- definition, cmec_output_bundle=output_bundle, cmec_metric_bundle=metric_bundle
207
- )