climate-ref-ilamb 0.6.6__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/PKG-INFO +1 -1
  2. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/pyproject.toml +1 -1
  3. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/configure/iomb.yaml +2 -2
  4. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/standard.py +121 -30
  5. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/.gitignore +0 -0
  6. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/LICENCE +0 -0
  7. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/NOTICE +0 -0
  8. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/README.md +0 -0
  9. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/__init__.py +0 -0
  10. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/configure/ilamb.yaml +0 -0
  11. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/dataset_registry/ilamb.txt +0 -0
  12. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/dataset_registry/iomb.txt +0 -0
  13. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/dataset_registry/test.txt +0 -0
  14. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/datasets.py +0 -0
  15. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/py.typed +0 -0
  16. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/tests/integration/test_diagnostics.py +0 -0
  17. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/tests/unit/test_provider.py +0 -0
  18. {climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/tests/unit/test_standard_metrics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-ilamb
3
- Version: 0.6.6
3
+ Version: 0.8.0
4
4
  Summary: ILAMB diagnostic provider for the Rapid Evaluation Framework
5
5
  Author-email: Nathan Collier <nathaniel.collier@gmail.com>, Jared Lewis <jared.lewis@climate-resource.com>
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climate-ref-ilamb"
3
- version = "0.6.6"
3
+ version = "0.8.0"
4
4
  description = "ILAMB diagnostic provider for the Rapid Evaluation Framework"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -41,8 +41,8 @@ ohc-NOAA:
41
41
  - volcello
42
42
  transforms:
43
43
  - select_depth:
44
- min: 0
45
- max: 2000
44
+ vmin: 0
45
+ vmax: 2000
46
46
  - ocean_heat_content:
47
47
  reference_year: 2005
48
48
  analyses:
@@ -1,6 +1,7 @@
1
1
  from pathlib import Path
2
2
  from typing import Any
3
3
 
4
+ import dask.config
4
5
  import ilamb3
5
6
  import ilamb3.regions as ilr
6
7
  import pandas as pd
@@ -151,6 +152,11 @@ def _set_ilamb3_options(registry: pooch.Pooch, registry_file: str) -> None:
151
152
  ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/GlobalLand.nc"))
152
153
  ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/Koppen_coarse.nc"))
153
154
  ilamb3.conf.set(regions=["global", "tropical"])
155
+ # REF's data requirement correctly will add measure data from another
156
+ # ensemble, but internally I also groupby. Since REF is only giving 1
157
+ # source_id/member_id/grid_label at a time, relax the groupby option here so
158
+ # these measures are part of the dataframe in ilamb3.
159
+ ilamb3.conf.set(comparison_groupby=["source_id", "grid_label"])
154
160
 
155
161
 
156
162
  def _load_csv_and_merge(output_directory: Path) -> pd.DataFrame:
@@ -199,12 +205,22 @@ class ILAMBStandard(Diagnostic):
199
205
  *ilamb_kwargs.get("alternate_vars", []),
200
206
  *ilamb_kwargs.get("related_vars", []),
201
207
  *ilamb_kwargs.get("relationships", {}).keys(),
202
- )
208
+ ),
209
+ "frequency": "mon",
210
+ "experiment_id": ("historical", "land-hist"),
211
+ "table_id": (
212
+ "AERmonZ",
213
+ "Amon",
214
+ "CFmon",
215
+ "Emon",
216
+ "EmonZ",
217
+ "LImon",
218
+ "Lmon",
219
+ "Omon",
220
+ "SImon",
221
+ ),
203
222
  }
204
223
  ),
205
- FacetFilter(facets={"frequency": ("mon",)}),
206
- FacetFilter(facets={"experiment_id": ("historical", "land-hist")}),
207
- FacetFilter(facets={"table_id": ("ImonAnt", "ImonGre")}, keep=False),
208
224
  ),
209
225
  constraints=(
210
226
  RequireFacets(
@@ -233,6 +249,7 @@ class ILAMBStandard(Diagnostic):
233
249
  )
234
250
  if registry_file == "ilamb"
235
251
  else (
252
+ AddSupplementaryDataset.from_defaults("volcello", SourceDatasetType.CMIP6),
236
253
  AddSupplementaryDataset.from_defaults("areacello", SourceDatasetType.CMIP6),
237
254
  AddSupplementaryDataset.from_defaults("sftof", SourceDatasetType.CMIP6),
238
255
  )
@@ -265,13 +282,16 @@ class ILAMBStandard(Diagnostic):
265
282
  """
266
283
  _set_ilamb3_options(self.registry, self.registry_file)
267
284
  ref_datasets = self.ilamb_data.datasets.set_index(self.ilamb_data.slug_column)
268
- run.run_single_block(
269
- self.slug,
270
- ref_datasets,
271
- definition.datasets[SourceDatasetType.CMIP6].datasets,
272
- definition.output_directory,
273
- **self.ilamb_kwargs,
274
- )
285
+
286
+ # Run ILAMB in a single-threaded mode to avoid issues with multithreading (#394)
287
+ with dask.config.set(scheduler="synchronous"):
288
+ run.run_single_block(
289
+ self.slug,
290
+ ref_datasets,
291
+ definition.datasets[SourceDatasetType.CMIP6].datasets,
292
+ definition.output_directory,
293
+ **self.ilamb_kwargs,
294
+ )
275
295
 
276
296
  def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
277
297
  """
@@ -292,39 +312,54 @@ class ILAMBStandard(Diagnostic):
292
312
  # that is associated with the execution group, called the selector.
293
313
  df = _load_csv_and_merge(definition.output_directory)
294
314
  selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
295
- for key, value in selectors.items():
315
+
316
+ # TODO: Fix reference data once we are using the obs4MIPs dataset
317
+ dataset_source = self.name.split("-")[1] if "-" in self.name else "None"
318
+ common_dimensions = {**selectors, "reference_source_id": dataset_source}
319
+ for key, value in common_dimensions.items():
296
320
  df[key] = value
297
321
  metric_bundle = CMECMetric.model_validate(_build_cmec_bundle(df))
298
322
 
299
323
  # Add each png file plot to the output
300
324
  output_bundle = CMECOutput.create_template()
301
325
  for plotfile in definition.output_directory.glob("*.png"):
302
- output_bundle[OutputCV.PLOTS.value][f"{plotfile}"] = {
303
- OutputCV.FILENAME.value: f"{plotfile}",
304
- OutputCV.LONG_NAME.value: _caption_from_filename(plotfile),
326
+ relative_path = str(definition.as_relative_path(plotfile))
327
+ caption, figure_dimensions = _caption_from_filename(plotfile, common_dimensions)
328
+
329
+ output_bundle[OutputCV.PLOTS.value][relative_path] = {
330
+ OutputCV.FILENAME.value: relative_path,
331
+ OutputCV.LONG_NAME.value: caption,
305
332
  OutputCV.DESCRIPTION.value: "",
333
+ OutputCV.DIMENSIONS.value: figure_dimensions,
306
334
  }
307
335
 
308
336
  # Add the html page to the output
309
- index_html = str(definition.to_output_path("index.html"))
310
- output_bundle[OutputCV.HTML.value][index_html] = {
311
- OutputCV.FILENAME.value: index_html,
312
- OutputCV.LONG_NAME.value: "Results page",
313
- OutputCV.DESCRIPTION.value: "Page displaying scalars and plots from the ILAMB execution.",
314
- }
315
- output_bundle[OutputCV.INDEX.value] = index_html
337
+ index_html = definition.to_output_path("index.html")
338
+ if index_html.exists():
339
+ relative_path = str(definition.as_relative_path(index_html))
340
+ output_bundle[OutputCV.HTML.value][relative_path] = {
341
+ OutputCV.FILENAME.value: relative_path,
342
+ OutputCV.LONG_NAME.value: "Results page",
343
+ OutputCV.DESCRIPTION.value: "Page displaying scalars and plots from the ILAMB execution.",
344
+ OutputCV.DIMENSIONS.value: common_dimensions,
345
+ }
346
+ output_bundle[OutputCV.INDEX.value] = relative_path
316
347
 
317
348
  # Add series to the output based on the time traces we find in the
318
349
  # output files
319
350
  series = []
320
351
  for ncfile in definition.output_directory.glob("*.nc"):
321
- ds = xr.open_dataset(ncfile)
352
+ ds = xr.open_dataset(ncfile, use_cftime=True)
322
353
  for name, da in ds.items():
323
354
  # Only create series for 1d DataArray's with these dimensions
324
355
  if not (da.ndim == 1 and set(da.dims).intersection(["time", "month"])):
325
356
  continue
326
357
  # Convert dimension values
327
- attrs = {}
358
+ attrs = {
359
+ "units": da.attrs.get("units", ""),
360
+ "long_name": da.attrs.get("long_name", str(name)),
361
+ "standard_name": da.attrs.get("standard_name", ""),
362
+ }
328
363
  str_name = str(name)
329
364
  index_name = str(da.dims[0])
330
365
  index = ds[index_name].values.tolist()
@@ -332,10 +367,23 @@ class ILAMBStandard(Diagnostic):
332
367
  index = [v.isoformat() for v in index]
333
368
  if hasattr(index[0], "calendar"):
334
369
  attrs["calendar"] = index[0].calendar
335
- # Parse out some CVs
336
- dimensions = {"metric": str_name, "source_id": ncfile.stem}
370
+
371
+ # Parse out some dimensions
372
+ if ncfile.stem == "Reference":
373
+ dimensions = {
374
+ "source_id": "Reference",
375
+ "metric": str_name,
376
+ }
377
+ else:
378
+ dimensions = {"metric": str_name, **common_dimensions}
379
+
380
+ # Split the metric into metric and region if possible
337
381
  if "_" in str_name:
382
+ dimensions["metric"] = str_name.split("_")[0]
338
383
  dimensions["region"] = str_name.split("_")[1]
384
+ else:
385
+ dimensions["region"] = "None"
386
+
339
387
  series.append(
340
388
  SeriesMetricValue(
341
389
  dimensions=dimensions,
@@ -351,7 +399,7 @@ class ILAMBStandard(Diagnostic):
351
399
  )
352
400
 
353
401
 
354
- def _caption_from_filename(filename: Path) -> str:
402
+ def _caption_from_filename(filename: Path, common_dimensions: dict[str, str]) -> tuple[str, dict[str, str]]:
355
403
  source, region, plot = filename.stem.split("_")
356
404
  plot_texts = {
357
405
  "bias": "bias",
@@ -365,12 +413,55 @@ def _caption_from_filename(filename: Path) -> str:
365
413
  "tmax": "maxmimum month",
366
414
  "trace": "regional mean",
367
415
  "taylor": "Taylor diagram",
416
+ "distribution": "distribution",
417
+ "response": "response",
368
418
  }
419
+ # Name of statistics dimension in CMEC output
420
+ plot_statistics = {
421
+ "bias": "Bias",
422
+ "biasscore": "Bias score",
423
+ "cycle": "Annual cycle",
424
+ "cyclescore": "Annual cycle score",
425
+ "mean": "Period Mean",
426
+ "rmse": "RMSE",
427
+ "rmsescore": "RMSE score",
428
+ "shift": "Shift in maximum month",
429
+ "tmax": "Maximum month",
430
+ "trace": "Regional mean",
431
+ "taylor": "Taylor diagram",
432
+ "distribution": "Distribution",
433
+ "response": "Response",
434
+ }
435
+ figure_dimensions = {
436
+ "region": region,
437
+ }
438
+ plot_option = None
439
+ # Some plots have options appended with a dash (distribution-pr, response-tas)
440
+ if "-" in plot:
441
+ plot, plot_option = plot.split("-", 1)
442
+
369
443
  if plot not in plot_texts:
370
- return ""
444
+ return "", figure_dimensions
445
+
446
+ # Build the caption
371
447
  caption = f"The {plot_texts.get(plot)}"
448
+ if plot_option is not None:
449
+ caption += f" of {plot_option}"
372
450
  if source != "None":
373
- caption += f" of {'the reference data' if source == 'Reference' else source}"
451
+ caption += f" for {'the reference data' if source == 'Reference' else source}"
374
452
  if region.lower() != "none":
375
453
  caption += f" over the {ilr.Regions().get_name(region)} region."
376
- return caption
454
+
455
+ # Use the statistic dimension to determine what is being plotted
456
+ if plot_statistics.get(plot) is not None:
457
+ figure_dimensions["statistic"] = plot_statistics[plot]
458
+ if plot_option is not None:
459
+ figure_dimensions["statistic"] += f"|{plot_option}"
460
+
461
+ # If the source is the reference we don't need some dimensions as they are not applicable
462
+ if source == "Reference":
463
+ figure_dimensions["source_id"] = "Reference"
464
+ else:
465
+ figure_dimensions = {**common_dimensions, **figure_dimensions}
466
+
467
+ return caption, figure_dimensions