PyPI - climate-ref-ilamb - Versions diffs - 0.6.6__tar.gz → 0.8.0__tar.gz - Mend

climate-ref-ilamb 0.6.6tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: climate-ref-ilamb
-Version: 0.6.6
+Version: 0.8.0
 Summary: ILAMB diagnostic provider for the Rapid Evaluation Framework
 Author-email: Nathan Collier <nathaniel.collier@gmail.com>, Jared Lewis <jared.lewis@climate-resource.com>
 License-Expression: Apache-2.0

{climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "climate-ref-ilamb"
-version = "0.6.6"
+version = "0.8.0"
 description = "ILAMB diagnostic provider for the Rapid Evaluation Framework"
 readme = "README.md"
 authors = [

{climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/configure/iomb.yaml RENAMED Viewed

@@ -41,8 +41,8 @@ ohc-NOAA:
   - volcello
   transforms:
   - select_depth:
-      min: 0
-      max: 2000
+      vmin: 0
+      vmax: 2000
   - ocean_heat_content:
       reference_year: 2005
   analyses:

{climate_ref_ilamb-0.6.6 → climate_ref_ilamb-0.8.0}/src/climate_ref_ilamb/standard.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 from typing import Any
+import dask.config
 import ilamb3
 import ilamb3.regions as ilr
 import pandas as pd
@@ -151,6 +152,11 @@ def _set_ilamb3_options(registry: pooch.Pooch, registry_file: str) -> None:
         ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/GlobalLand.nc"))
         ilamb_regions.add_netcdf(registry.fetch("ilamb/regions/Koppen_coarse.nc"))
         ilamb3.conf.set(regions=["global", "tropical"])
+    # REF's data requirement correctly will add measure data from another
+    # ensemble, but internally I also groupby. Since REF is only giving 1
+    # source_id/member_id/grid_label at a time, relax the groupby option here so
+    # these measures are part of the dataframe in ilamb3.
+    ilamb3.conf.set(comparison_groupby=["source_id", "grid_label"])
 def _load_csv_and_merge(output_directory: Path) -> pd.DataFrame:
@@ -199,12 +205,22 @@ class ILAMBStandard(Diagnostic):
                                 *ilamb_kwargs.get("alternate_vars", []),
                                 *ilamb_kwargs.get("related_vars", []),
                                 *ilamb_kwargs.get("relationships", {}).keys(),
-                            )
+                            ),
+                            "frequency": "mon",
+                            "experiment_id": ("historical", "land-hist"),
+                            "table_id": (
+                                "AERmonZ",
+                                "Amon",
+                                "CFmon",
+                                "Emon",
+                                "EmonZ",
+                                "LImon",
+                                "Lmon",
+                                "Omon",
+                                "SImon",
+                            ),
                         }
                     ),
-                    FacetFilter(facets={"frequency": ("mon",)}),
-                    FacetFilter(facets={"experiment_id": ("historical", "land-hist")}),
-                    FacetFilter(facets={"table_id": ("ImonAnt", "ImonGre")}, keep=False),
                 ),
                 constraints=(
                     RequireFacets(
@@ -233,6 +249,7 @@ class ILAMBStandard(Diagnostic):
                         )
                         if registry_file == "ilamb"
                         else (
+                            AddSupplementaryDataset.from_defaults("volcello", SourceDatasetType.CMIP6),
                             AddSupplementaryDataset.from_defaults("areacello", SourceDatasetType.CMIP6),
                             AddSupplementaryDataset.from_defaults("sftof", SourceDatasetType.CMIP6),
                         )
@@ -265,13 +282,16 @@ class ILAMBStandard(Diagnostic):
         """
         _set_ilamb3_options(self.registry, self.registry_file)
         ref_datasets = self.ilamb_data.datasets.set_index(self.ilamb_data.slug_column)
-        run.run_single_block(
-            self.slug,
-            ref_datasets,
-            definition.datasets[SourceDatasetType.CMIP6].datasets,
-            definition.output_directory,
-            **self.ilamb_kwargs,
-        )
+        # Run ILAMB in a single-threaded mode to avoid issues with multithreading (#394)
+        with dask.config.set(scheduler="synchronous"):
+            run.run_single_block(
+                self.slug,
+                ref_datasets,
+                definition.datasets[SourceDatasetType.CMIP6].datasets,
+                definition.output_directory,
+                **self.ilamb_kwargs,
+            )
     def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
         """
@@ -292,39 +312,54 @@ class ILAMBStandard(Diagnostic):
         # that is associated with the execution group, called the selector.
         df = _load_csv_and_merge(definition.output_directory)
         selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
-        for key, value in selectors.items():
+        # TODO: Fix reference data once we are using the obs4MIPs dataset
+        dataset_source = self.name.split("-")[1] if "-" in self.name else "None"
+        common_dimensions = {**selectors, "reference_source_id": dataset_source}
+        for key, value in common_dimensions.items():
             df[key] = value
         metric_bundle = CMECMetric.model_validate(_build_cmec_bundle(df))
         # Add each png file plot to the output
         output_bundle = CMECOutput.create_template()
         for plotfile in definition.output_directory.glob("*.png"):
-            output_bundle[OutputCV.PLOTS.value][f"{plotfile}"] = {
-                OutputCV.FILENAME.value: f"{plotfile}",
-                OutputCV.LONG_NAME.value: _caption_from_filename(plotfile),
+            relative_path = str(definition.as_relative_path(plotfile))
+            caption, figure_dimensions = _caption_from_filename(plotfile, common_dimensions)
+            output_bundle[OutputCV.PLOTS.value][relative_path] = {
+                OutputCV.FILENAME.value: relative_path,
+                OutputCV.LONG_NAME.value: caption,
                 OutputCV.DESCRIPTION.value: "",
+                OutputCV.DIMENSIONS.value: figure_dimensions,
             }
         # Add the html page to the output
-        index_html = str(definition.to_output_path("index.html"))
-        output_bundle[OutputCV.HTML.value][index_html] = {
-            OutputCV.FILENAME.value: index_html,
-            OutputCV.LONG_NAME.value: "Results page",
-            OutputCV.DESCRIPTION.value: "Page displaying scalars and plots from the ILAMB execution.",
-        }
-        output_bundle[OutputCV.INDEX.value] = index_html
+        index_html = definition.to_output_path("index.html")
+        if index_html.exists():
+            relative_path = str(definition.as_relative_path(index_html))
+            output_bundle[OutputCV.HTML.value][relative_path] = {
+                OutputCV.FILENAME.value: relative_path,
+                OutputCV.LONG_NAME.value: "Results page",
+                OutputCV.DESCRIPTION.value: "Page displaying scalars and plots from the ILAMB execution.",
+                OutputCV.DIMENSIONS.value: common_dimensions,
+            }
+            output_bundle[OutputCV.INDEX.value] = relative_path
         # Add series to the output based on the time traces we find in the
         # output files
         series = []
         for ncfile in definition.output_directory.glob("*.nc"):
-            ds = xr.open_dataset(ncfile)
+            ds = xr.open_dataset(ncfile, use_cftime=True)
             for name, da in ds.items():
                 # Only create series for 1d DataArray's with these dimensions
                 if not (da.ndim == 1 and set(da.dims).intersection(["time", "month"])):
                     continue
                 # Convert dimension values
-                attrs = {}
+                attrs = {
+                    "units": da.attrs.get("units", ""),
+                    "long_name": da.attrs.get("long_name", str(name)),
+                    "standard_name": da.attrs.get("standard_name", ""),
+                }
                 str_name = str(name)
                 index_name = str(da.dims[0])
                 index = ds[index_name].values.tolist()
@@ -332,10 +367,23 @@ class ILAMBStandard(Diagnostic):
                     index = [v.isoformat() for v in index]
                 if hasattr(index[0], "calendar"):
                     attrs["calendar"] = index[0].calendar
-                # Parse out some CVs
-                dimensions = {"metric": str_name, "source_id": ncfile.stem}
+                # Parse out some dimensions
+                if ncfile.stem == "Reference":
+                    dimensions = {
+                        "source_id": "Reference",
+                        "metric": str_name,
+                    }
+                else:
+                    dimensions = {"metric": str_name, **common_dimensions}
+                # Split the metric into metric and region if possible
                 if "_" in str_name:
+                    dimensions["metric"] = str_name.split("_")[0]
                     dimensions["region"] = str_name.split("_")[1]
+                else:
+                    dimensions["region"] = "None"
                 series.append(
                     SeriesMetricValue(
                         dimensions=dimensions,
@@ -351,7 +399,7 @@ class ILAMBStandard(Diagnostic):
         )
-def _caption_from_filename(filename: Path) -> str:
+def _caption_from_filename(filename: Path, common_dimensions: dict[str, str]) -> tuple[str, dict[str, str]]:
     source, region, plot = filename.stem.split("_")
     plot_texts = {
         "bias": "bias",
@@ -365,12 +413,55 @@ def _caption_from_filename(filename: Path) -> str:
         "tmax": "maxmimum month",
         "trace": "regional mean",
         "taylor": "Taylor diagram",
+        "distribution": "distribution",
+        "response": "response",
     }
+    # Name of statistics dimension in CMEC output
+    plot_statistics = {
+        "bias": "Bias",
+        "biasscore": "Bias score",
+        "cycle": "Annual cycle",
+        "cyclescore": "Annual cycle score",
+        "mean": "Period Mean",
+        "rmse": "RMSE",
+        "rmsescore": "RMSE score",
+        "shift": "Shift in maximum month",
+        "tmax": "Maximum month",
+        "trace": "Regional mean",
+        "taylor": "Taylor diagram",
+        "distribution": "Distribution",
+        "response": "Response",
+    }
+    figure_dimensions = {
+        "region": region,
+    }
+    plot_option = None
+    # Some plots have options appended with a dash (distribution-pr, response-tas)
+    if "-" in plot:
+        plot, plot_option = plot.split("-", 1)
     if plot not in plot_texts:
-        return ""
+        return "", figure_dimensions
+    # Build the caption
     caption = f"The {plot_texts.get(plot)}"
+    if plot_option is not None:
+        caption += f" of {plot_option}"
     if source != "None":
-        caption += f" of {'the reference data' if source == 'Reference' else source}"
+        caption += f" for {'the reference data' if source == 'Reference' else source}"
     if region.lower() != "none":
         caption += f" over the {ilr.Regions().get_name(region)} region."
-    return caption
+    # Use the statistic dimension to determine what is being plotted
+    if plot_statistics.get(plot) is not None:
+        figure_dimensions["statistic"] = plot_statistics[plot]
+        if plot_option is not None:
+            figure_dimensions["statistic"] += f"|{plot_option}"
+    # If the source is the reference we don't need some dimensions as they are not applicable
+    if source == "Reference":
+        figure_dimensions["source_id"] = "Reference"
+    else:
+        figure_dimensions = {**common_dimensions, **figure_dimensions}
+    return caption, figure_dimensions