climate-ref-ilamb 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@ mrsos-WangMao:
25
25
  mrsol: ilamb/mrsol/WangMao/mrsol_olc.nc
26
26
  alternate_vars:
27
27
  - mrsos
28
- transform:
28
+ transforms:
29
29
  - select_depth:
30
30
  value: 0
31
31
  - soil_moisture_to_vol_fraction
@@ -5,7 +5,7 @@ thetao-WOA2023-surface:
5
5
  # TODO: Update to use the obs4REF equiv
6
6
  thetao: ilamb/WOA/thetao_mon_WOA_A5B4_gn_200501-201412.nc
7
7
  variable_cmap: Reds
8
- transform:
8
+ transforms:
9
9
  - select_depth:
10
10
  value: 0
11
11
  alternate_vars:
@@ -15,7 +15,7 @@ so-WOA2023-surface:
15
15
  sources:
16
16
  # TODO: Update to use the obs4REF equiv
17
17
  so: ilamb/WOA/so_mon_WOA_A5B4_gn_200501-201412.nc
18
- transform:
18
+ transforms:
19
19
  - select_depth:
20
20
  value: 0
21
21
  variable_cmap: YlGn
@@ -27,7 +27,7 @@ amoc-RAPID:
27
27
  - timeseries
28
28
  related_vars:
29
29
  - msftmz
30
- transform:
30
+ transforms:
31
31
  - msftmz_to_rapid
32
32
  sources:
33
33
  # TODO: Update to use the obs4REF equiv
@@ -39,10 +39,10 @@ ohc-NOAA:
39
39
  related_vars:
40
40
  - thetao
41
41
  - volcello
42
- transform:
42
+ transforms:
43
43
  - select_depth:
44
- min: 0
45
- max: 2000
44
+ vmin: 0
45
+ vmax: 2000
46
46
  - ocean_heat_content:
47
47
  reference_year: 2005
48
48
  analyses:
@@ -1,14 +1,15 @@
1
1
  from pathlib import Path
2
2
  from typing import Any
3
3
 
4
- import ilamb3 # type: ignore
5
- import ilamb3.regions as ilr # type: ignore
6
- import matplotlib.pyplot as plt
4
+ import dask.config
5
+ import ilamb3
6
+ import ilamb3.regions as ilr
7
7
  import pandas as pd
8
8
  import pooch
9
+ import xarray as xr
9
10
  from ilamb3 import run
10
11
 
11
- from climate_ref_core.constraints import AddSupplementaryDataset
12
+ from climate_ref_core.constraints import AddSupplementaryDataset, RequireFacets
12
13
  from climate_ref_core.dataset_registry import dataset_registry_manager
13
14
  from climate_ref_core.datasets import FacetFilter, SourceDatasetType
14
15
  from climate_ref_core.diagnostics import (
@@ -17,8 +18,9 @@ from climate_ref_core.diagnostics import (
17
18
  ExecutionDefinition,
18
19
  ExecutionResult,
19
20
  )
21
+ from climate_ref_core.metric_values.typing import SeriesMetricValue
20
22
  from climate_ref_core.pycmec.metric import CMECMetric
21
- from climate_ref_core.pycmec.output import CMECOutput
23
+ from climate_ref_core.pycmec.output import CMECOutput, OutputCV
22
24
  from climate_ref_ilamb.datasets import (
23
25
  registry_to_collection,
24
26
  )
@@ -101,16 +103,7 @@ def _build_cmec_bundle(df: pd.DataFrame) -> dict[str, Any]:
101
103
  # reference_df = df[df["source"] == "Reference"]
102
104
  model_df = df[df["source"] != "Reference"]
103
105
 
104
- # Source is formatted as "ACCESS-ESM1-5-r1i1p1f1-gn"
105
- # This assumes that the member_id and grid_label are always the last two parts of the source string
106
- # and don't contain '-'
107
- extracted_source = model_df.source.str.extract(r"([\w-]+)-([\w\d]+)-([\w\d]+)")
108
- model_df.loc[:, "source_id"] = extracted_source[0]
109
- model_df.loc[:, "member_id"] = extracted_source[1]
110
- model_df.loc[:, "grid_label"] = extracted_source[2]
111
-
112
- # Strip out units from the name
113
- # These are available in the attributes
106
+ # Strip out units from the name (available in the attributes)
114
107
  extracted_source = model_df.name.str.extract(r"(.*)\s\[.*\]")
115
108
  model_df.loc[:, "name"] = extracted_source[0]
116
109
 
@@ -149,25 +142,21 @@ def _build_cmec_bundle(df: pd.DataFrame) -> dict[str, Any]:
149
142
  return bundle
150
143
 
151
144
 
152
- def _form_bundles(df: pd.DataFrame) -> tuple[CMECMetric, CMECOutput]:
153
- """
154
- Create the output bundles (really a lift to make Ruff happy with the size of run()).
155
- """
156
- metric_bundle = _build_cmec_bundle(df)
157
- output_bundle = CMECOutput.create_template()
158
- return CMECMetric.model_validate(metric_bundle), CMECOutput.model_validate(output_bundle)
159
-
160
-
161
145
  def _set_ilamb3_options(registry: pooch.Pooch, registry_file: str) -> None:
162
146
  """
163
147
  Set options for ILAMB based on which registry file is being used.
164
148
  """
165
- ilamb3.conf.reset()
149
+ ilamb3.conf.reset() # type: ignore
166
150
  ilamb_regions = ilr.Regions()
167
151
  if registry_file == "ilamb":
168
152
  ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/GlobalLand.nc"))
169
153
  ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/Koppen_coarse.nc"))
170
154
  ilamb3.conf.set(regions=["global", "tropical"])
155
+ # REF's data requirement correctly will add measure data from another
156
+ # ensemble, but internally I also groupby. Since REF is only giving 1
157
+ # source_id/member_id/grid_label at a time, relax the groupby option here so
158
+ # these measures are part of the dataframe in ilamb3.
159
+ ilamb3.conf.set(comparison_groupby=["source_id", "grid_label"])
171
160
 
172
161
 
173
162
  def _load_csv_and_merge(output_directory: Path) -> pd.DataFrame:
@@ -213,29 +202,63 @@ class ILAMBStandard(Diagnostic):
213
202
  facets={
214
203
  "variable_id": (
215
204
  self.variable_id,
216
- *ilamb_kwargs.get("relationships", {}).keys(),
217
205
  *ilamb_kwargs.get("alternate_vars", []),
218
206
  *ilamb_kwargs.get("related_vars", []),
219
- )
207
+ *ilamb_kwargs.get("relationships", {}).keys(),
208
+ ),
209
+ "frequency": "mon",
210
+ "experiment_id": ("historical", "land-hist"),
211
+ "table_id": (
212
+ "AERmonZ",
213
+ "Amon",
214
+ "CFmon",
215
+ "Emon",
216
+ "EmonZ",
217
+ "LImon",
218
+ "Lmon",
219
+ "Omon",
220
+ "SImon",
221
+ ),
220
222
  }
221
223
  ),
222
- FacetFilter(facets={"frequency": ("mon",)}),
223
- FacetFilter(facets={"experiment_id": ("historical", "land-hist")}),
224
- # Exclude unneeded snc tables
225
- FacetFilter(facets={"table_id": ("ImonAnt", "ImonGre")}, keep=False),
226
224
  ),
227
225
  constraints=(
228
- AddSupplementaryDataset.from_defaults("areacella", SourceDatasetType.CMIP6),
229
- AddSupplementaryDataset.from_defaults("sftlf", SourceDatasetType.CMIP6),
230
- )
231
- if registry_file == "ilamb"
232
- else (
233
- AddSupplementaryDataset.from_defaults("areacello", SourceDatasetType.CMIP6),
234
- AddSupplementaryDataset.from_defaults("sftof", SourceDatasetType.CMIP6),
226
+ RequireFacets(
227
+ "variable_id",
228
+ (
229
+ self.variable_id,
230
+ *ilamb_kwargs.get("alternate_vars", []),
231
+ *ilamb_kwargs.get("related_vars", []),
232
+ ),
233
+ operator="any",
234
+ ),
235
+ *(
236
+ [
237
+ RequireFacets(
238
+ "variable_id",
239
+ required_facets=tuple(ilamb_kwargs.get("relationships", {}).keys()),
240
+ )
241
+ ]
242
+ if "relationships" in ilamb_kwargs
243
+ else []
244
+ ),
245
+ *(
246
+ (
247
+ AddSupplementaryDataset.from_defaults("areacella", SourceDatasetType.CMIP6),
248
+ AddSupplementaryDataset.from_defaults("sftlf", SourceDatasetType.CMIP6),
249
+ )
250
+ if registry_file == "ilamb"
251
+ else (
252
+ AddSupplementaryDataset.from_defaults("volcello", SourceDatasetType.CMIP6),
253
+ AddSupplementaryDataset.from_defaults("areacello", SourceDatasetType.CMIP6),
254
+ AddSupplementaryDataset.from_defaults("sftof", SourceDatasetType.CMIP6),
255
+ )
256
+ ),
235
257
  ),
236
- group_by=("experiment_id",),
258
+ group_by=("experiment_id", "source_id", "member_id", "grid_label"),
237
259
  ),
238
260
  )
261
+
239
262
  self.facets = (
240
263
  "experiment_id",
241
264
  "source_id",
@@ -257,16 +280,18 @@ class ILAMBStandard(Diagnostic):
257
280
  """
258
281
  Run the ILAMB standard analysis.
259
282
  """
260
- plt.rcParams.update({"figure.max_open_warning": 0})
261
283
  _set_ilamb3_options(self.registry, self.registry_file)
262
284
  ref_datasets = self.ilamb_data.datasets.set_index(self.ilamb_data.slug_column)
263
- run.run_simple(
264
- ref_datasets,
265
- self.slug,
266
- definition.datasets[SourceDatasetType.CMIP6].datasets,
267
- definition.output_directory,
268
- **self.ilamb_kwargs,
269
- )
285
+
286
+ # Run ILAMB in a single-threaded mode to avoid issues with multithreading (#394)
287
+ with dask.config.set(scheduler="synchronous"):
288
+ run.run_single_block(
289
+ self.slug,
290
+ ref_datasets,
291
+ definition.datasets[SourceDatasetType.CMIP6].datasets,
292
+ definition.output_directory,
293
+ **self.ilamb_kwargs,
294
+ )
270
295
 
271
296
  def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
272
297
  """
@@ -281,15 +306,162 @@ class ILAMBStandard(Diagnostic):
281
306
  -------
282
307
  An execution result object
283
308
  """
284
- selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
285
309
  _set_ilamb3_options(self.registry, self.registry_file)
286
-
310
+ # In ILAMB, scalars are saved in CSV files in the output directory. To
311
+ # be compatible with the REF system we will need to add the metadata
312
+ # that is associated with the execution group, called the selector.
287
313
  df = _load_csv_and_merge(definition.output_directory)
288
- # Add the selectors to the dataframe
289
- for key, value in selectors.items():
314
+ selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
315
+
316
+ # TODO: Fix reference data once we are using the obs4MIPs dataset
317
+ dataset_source = self.name.split("-")[1] if "-" in self.name else "None"
318
+ common_dimensions = {**selectors, "reference_source_id": dataset_source}
319
+ for key, value in common_dimensions.items():
290
320
  df[key] = value
291
- metric_bundle, output_bundle = _form_bundles(df)
321
+ metric_bundle = CMECMetric.model_validate(_build_cmec_bundle(df))
322
+
323
+ # Add each png file plot to the output
324
+ output_bundle = CMECOutput.create_template()
325
+ for plotfile in definition.output_directory.glob("*.png"):
326
+ relative_path = str(definition.as_relative_path(plotfile))
327
+ caption, figure_dimensions = _caption_from_filename(plotfile, common_dimensions)
328
+
329
+ output_bundle[OutputCV.PLOTS.value][relative_path] = {
330
+ OutputCV.FILENAME.value: relative_path,
331
+ OutputCV.LONG_NAME.value: caption,
332
+ OutputCV.DESCRIPTION.value: "",
333
+ OutputCV.DIMENSIONS.value: figure_dimensions,
334
+ }
335
+
336
+ # Add the html page to the output
337
+ index_html = definition.to_output_path("index.html")
338
+ if index_html.exists():
339
+ relative_path = str(definition.as_relative_path(index_html))
340
+ output_bundle[OutputCV.HTML.value][relative_path] = {
341
+ OutputCV.FILENAME.value: relative_path,
342
+ OutputCV.LONG_NAME.value: "Results page",
343
+ OutputCV.DESCRIPTION.value: "Page displaying scalars and plots from the ILAMB execution.",
344
+ OutputCV.DIMENSIONS.value: common_dimensions,
345
+ }
346
+ output_bundle[OutputCV.INDEX.value] = relative_path
347
+
348
+ # Add series to the output based on the time traces we find in the
349
+ # output files
350
+ series = []
351
+ for ncfile in definition.output_directory.glob("*.nc"):
352
+ ds = xr.open_dataset(ncfile, use_cftime=True)
353
+ for name, da in ds.items():
354
+ # Only create series for 1d DataArray's with these dimensions
355
+ if not (da.ndim == 1 and set(da.dims).intersection(["time", "month"])):
356
+ continue
357
+ # Convert dimension values
358
+ attrs = {
359
+ "units": da.attrs.get("units", ""),
360
+ "long_name": da.attrs.get("long_name", str(name)),
361
+ "standard_name": da.attrs.get("standard_name", ""),
362
+ }
363
+ str_name = str(name)
364
+ index_name = str(da.dims[0])
365
+ index = ds[index_name].values.tolist()
366
+ if hasattr(index[0], "isoformat"):
367
+ index = [v.isoformat() for v in index]
368
+ if hasattr(index[0], "calendar"):
369
+ attrs["calendar"] = index[0].calendar
370
+
371
+ # Parse out some dimensions
372
+ if ncfile.stem == "Reference":
373
+ dimensions = {
374
+ "source_id": "Reference",
375
+ "metric": str_name,
376
+ }
377
+ else:
378
+ dimensions = {"metric": str_name, **common_dimensions}
379
+
380
+ # Split the metric into metric and region if possible
381
+ if "_" in str_name:
382
+ dimensions["metric"] = str_name.split("_")[0]
383
+ dimensions["region"] = str_name.split("_")[1]
384
+ else:
385
+ dimensions["region"] = "None"
386
+
387
+ series.append(
388
+ SeriesMetricValue(
389
+ dimensions=dimensions,
390
+ values=da.values.tolist(),
391
+ index=index,
392
+ index_name=index_name,
393
+ attributes=attrs,
394
+ )
395
+ )
292
396
 
293
397
  return ExecutionResult.build_from_output_bundle(
294
- definition, cmec_output_bundle=output_bundle, cmec_metric_bundle=metric_bundle
398
+ definition, cmec_output_bundle=output_bundle, cmec_metric_bundle=metric_bundle, series=series
295
399
  )
400
+
401
+
402
+ def _caption_from_filename(filename: Path, common_dimensions: dict[str, str]) -> tuple[str, dict[str, str]]:
403
+ source, region, plot = filename.stem.split("_")
404
+ plot_texts = {
405
+ "bias": "bias",
406
+ "biasscore": "bias score",
407
+ "cycle": "annual cycle",
408
+ "cyclescore": "annual cycle score",
409
+ "mean": "period mean",
410
+ "rmse": "RMSE",
411
+ "rmsescore": "RMSE score",
412
+ "shift": "shift in maximum month",
413
+ "tmax": "maxmimum month",
414
+ "trace": "regional mean",
415
+ "taylor": "Taylor diagram",
416
+ "distribution": "distribution",
417
+ "response": "response",
418
+ }
419
+ # Name of statistics dimension in CMEC output
420
+ plot_statistics = {
421
+ "bias": "Bias",
422
+ "biasscore": "Bias score",
423
+ "cycle": "Annual cycle",
424
+ "cyclescore": "Annual cycle score",
425
+ "mean": "Period Mean",
426
+ "rmse": "RMSE",
427
+ "rmsescore": "RMSE score",
428
+ "shift": "Shift in maximum month",
429
+ "tmax": "Maximum month",
430
+ "trace": "Regional mean",
431
+ "taylor": "Taylor diagram",
432
+ "distribution": "Distribution",
433
+ "response": "Response",
434
+ }
435
+ figure_dimensions = {
436
+ "region": region,
437
+ }
438
+ plot_option = None
439
+ # Some plots have options appended with a dash (distribution-pr, response-tas)
440
+ if "-" in plot:
441
+ plot, plot_option = plot.split("-", 1)
442
+
443
+ if plot not in plot_texts:
444
+ return "", figure_dimensions
445
+
446
+ # Build the caption
447
+ caption = f"The {plot_texts.get(plot)}"
448
+ if plot_option is not None:
449
+ caption += f" of {plot_option}"
450
+ if source != "None":
451
+ caption += f" for {'the reference data' if source == 'Reference' else source}"
452
+ if region.lower() != "none":
453
+ caption += f" over the {ilr.Regions().get_name(region)} region."
454
+
455
+ # Use the statistic dimension to determine what is being plotted
456
+ if plot_statistics.get(plot) is not None:
457
+ figure_dimensions["statistic"] = plot_statistics[plot]
458
+ if plot_option is not None:
459
+ figure_dimensions["statistic"] += f"|{plot_option}"
460
+
461
+ # If the source is the reference we don't need some dimensions as they are not applicable
462
+ if source == "Reference":
463
+ figure_dimensions["source_id"] = "Reference"
464
+ else:
465
+ figure_dimensions = {**common_dimensions, **figure_dimensions}
466
+
467
+ return caption, figure_dimensions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-ilamb
3
- Version: 0.6.5
3
+ Version: 0.7.0
4
4
  Summary: ILAMB diagnostic provider for the Rapid Evaluation Framework
5
5
  Author-email: Nathan Collier <nathaniel.collier@gmail.com>, Jared Lewis <jared.lewis@climate-resource.com>
6
6
  License-Expression: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.13
19
19
  Classifier: Topic :: Scientific/Engineering
20
20
  Requires-Python: >=3.11
21
21
  Requires-Dist: climate-ref-core
22
- Requires-Dist: ilamb3>=2025.5.20
22
+ Requires-Dist: ilamb3>=2025.9.9
23
23
  Requires-Dist: scipy<1.16
24
24
  Description-Content-Type: text/markdown
25
25
 
@@ -1,15 +1,15 @@
1
1
  climate_ref_ilamb/__init__.py,sha256=hMEkSjBY3yo-EbdMNOIvMSdGK14G2s5PERmWrBEtzFk,1414
2
2
  climate_ref_ilamb/datasets.py,sha256=MVCt1pxV5dIfYLm6huC0BZWP5stCamYNwXzc7kKW5AI,799
3
3
  climate_ref_ilamb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- climate_ref_ilamb/standard.py,sha256=rMI-GERTn_rg6qkp7jvzFZVAc7AZHkknEtNYFbo-Bak,10556
5
- climate_ref_ilamb/configure/ilamb.yaml,sha256=4lzZhtn4nq4hV0qjXq9mWlN2cqIkhh_ICtYlL4gtW3k,1194
6
- climate_ref_ilamb/configure/iomb.yaml,sha256=PxJAivXKNiYp-HbaYOJgUO8LnL9fmSztQKxLM_jFyvQ,944
4
+ climate_ref_ilamb/standard.py,sha256=oz1J6lbjg0kqG2vkTgSIrd_t7yDud1HiLIILeStL5Ig,17873
5
+ climate_ref_ilamb/configure/ilamb.yaml,sha256=keUmj7Oih-AepogB7PTwN56DTb0K0k_x1CkSbbhZjJ0,1195
6
+ climate_ref_ilamb/configure/iomb.yaml,sha256=AQ4MZFDeD4Sw-yDnLb4C-ID2JFk9jfhk_2UPUHTyH78,950
7
7
  climate_ref_ilamb/dataset_registry/ilamb.txt,sha256=_zqrq-Sa-0NTjPDFX6nQIeUalEc7tPrKr_CssOBlseg,1030
8
8
  climate_ref_ilamb/dataset_registry/iomb.txt,sha256=b95CUBYEGfeoPyRGx_E267c-2GF-E_lc4XeFkNSOJMo,375
9
9
  climate_ref_ilamb/dataset_registry/test.txt,sha256=gBjUJ6W-crghYqKN0QOFmjyqpMxKK50dU3SYTuIA6jM,206
10
- climate_ref_ilamb-0.6.5.dist-info/METADATA,sha256=OKlmtX3Tp7e5hb_tBdj_qOBl_qbH7PjlTBYQerZyS64,2343
11
- climate_ref_ilamb-0.6.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- climate_ref_ilamb-0.6.5.dist-info/entry_points.txt,sha256=SnRhJk7KRiGd3jL4OMA2SId5p838T95kGcVrr3wtZAQ,59
13
- climate_ref_ilamb-0.6.5.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
14
- climate_ref_ilamb-0.6.5.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
15
- climate_ref_ilamb-0.6.5.dist-info/RECORD,,
10
+ climate_ref_ilamb-0.7.0.dist-info/METADATA,sha256=ECoWJL9GEKTyyvrRunXcuO4e7oReRnu-suHyHxqQeTo,2342
11
+ climate_ref_ilamb-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ climate_ref_ilamb-0.7.0.dist-info/entry_points.txt,sha256=SnRhJk7KRiGd3jL4OMA2SId5p838T95kGcVrr3wtZAQ,59
13
+ climate_ref_ilamb-0.7.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
14
+ climate_ref_ilamb-0.7.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
15
+ climate_ref_ilamb-0.7.0.dist-info/RECORD,,