PyPI - climate-ref-pmp - Versions diffs - 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

climate-ref-pmp 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

climate_ref_pmp/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ import importlib.metadata
 from climate_ref_core.dataset_registry import DATASET_URL, dataset_registry_manager
 from climate_ref_core.providers import CondaDiagnosticProvider
-from climate_ref_pmp.diagnostics import AnnualCycle, ExtratropicalModesOfVariability
+from climate_ref_pmp.diagnostics import ENSO, AnnualCycle, ExtratropicalModesOfVariability
 __version__ = importlib.metadata.version("climate-ref-pmp")
@@ -14,6 +14,15 @@ __version__ = importlib.metadata.version("climate-ref-pmp")
 # PMP uses a conda environment to run the diagnostics
 provider = CondaDiagnosticProvider("PMP", __version__)
+# Annual cycle diagnostics and metrics
+provider.register(AnnualCycle())
+# ENSO diagnostics and metrics
+# provider.register(ENSO("ENSO_perf"))  # Assigned to ESMValTool
+provider.register(ENSO("ENSO_tel"))
+provider.register(ENSO("ENSO_proc"))
+# Extratropical modes of variability diagnostics and metrics
 provider.register(ExtratropicalModesOfVariability("PDO"))
 provider.register(ExtratropicalModesOfVariability("NPGO"))
 provider.register(ExtratropicalModesOfVariability("NAO"))
@@ -21,7 +30,6 @@ provider.register(ExtratropicalModesOfVariability("NAM"))
 provider.register(ExtratropicalModesOfVariability("PNA"))
 provider.register(ExtratropicalModesOfVariability("NPO"))
 provider.register(ExtratropicalModesOfVariability("SAM"))
-provider.register(AnnualCycle())
 dataset_registry_manager.register(

climate_ref_pmp/diagnostics/__init__.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """PMP diagnostics."""
 from climate_ref_pmp.diagnostics.annual_cycle import AnnualCycle
+from climate_ref_pmp.diagnostics.enso import ENSO
 from climate_ref_pmp.diagnostics.variability_modes import ExtratropicalModesOfVariability
 __all__ = [
+    "ENSO",
     "AnnualCycle",
     "ExtratropicalModesOfVariability",
 ]

climate_ref_pmp/diagnostics/annual_cycle.py CHANGED Viewed

@@ -15,6 +15,44 @@ from climate_ref_core.pycmec.metric import remove_dimensions
 from climate_ref_pmp.pmp_driver import build_glob_pattern, build_pmp_command, process_json_result
+def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequirement, DataRequirement]:
+    """
+    Create a data requirement for the annual cycle diagnostic.
+    Parameters
+    ----------
+    variable_id : str
+        The variable ID to filter the data requirement.
+    obs_source : str
+        The observation source ID to filter the data requirement.
+    Returns
+    -------
+    DataRequirement
+        A DataRequirement object containing the necessary filters and groupings.
+    """
+    return (
+        DataRequirement(
+            source_type=SourceDatasetType.PMPClimatology,
+            filters=(FacetFilter(facets={"source_id": (obs_source,), "variable_id": (variable_id,)}),),
+            group_by=("variable_id", "source_id"),
+        ),
+        DataRequirement(
+            source_type=SourceDatasetType.CMIP6,
+            filters=(
+                FacetFilter(
+                    facets={
+                        "frequency": "mon",
+                        "experiment_id": ("amip", "historical", "hist-GHG", "piControl"),
+                        "variable_id": (variable_id,),
+                    }
+                ),
+            ),
+            group_by=("variable_id", "source_id", "experiment_id", "member_id", "grid_label"),
+        ),
+    )
 class AnnualCycle(CommandLineDiagnostic):
     """
     Calculate the annual cycle for a dataset
@@ -32,49 +70,20 @@ class AnnualCycle(CommandLineDiagnostic):
         "statistic",
         "season",
     )
     data_requirements = (
-        # Surface temperature
-        (
-            DataRequirement(
-                source_type=SourceDatasetType.PMPClimatology,
-                filters=(FacetFilter(facets={"source_id": ("ERA-5",), "variable_id": ("ts",)}),),
-                group_by=("variable_id", "source_id"),
-            ),
-            DataRequirement(
-                source_type=SourceDatasetType.CMIP6,
-                filters=(
-                    FacetFilter(
-                        facets={
-                            "frequency": "mon",
-                            "experiment_id": ("amip", "historical", "hist-GHG", "piControl"),
-                            "variable_id": ("ts",),
-                        }
-                    ),
-                ),
-                group_by=("variable_id", "source_id", "experiment_id", "member_id"),
-            ),
-        ),
-        # Precipitation
-        (
-            DataRequirement(
-                source_type=SourceDatasetType.PMPClimatology,
-                filters=(FacetFilter(facets={"source_id": ("GPCP-Monthly-3-2",), "variable_id": ("pr",)}),),
-                group_by=("variable_id", "source_id"),
-            ),
-            DataRequirement(
-                source_type=SourceDatasetType.CMIP6,
-                filters=(
-                    FacetFilter(
-                        facets={
-                            "frequency": "mon",
-                            "experiment_id": ("amip", "historical", "hist-GHG", "piControl"),
-                            "variable_id": ("pr",),
-                        }
-                    ),
-                ),
-                group_by=("variable_id", "source_id", "experiment_id", "member_id"),
-            ),
-        ),
+        make_data_requirement("ts", "ERA-5"),
+        make_data_requirement("uas", "ERA-5"),
+        make_data_requirement("vas", "ERA-5"),
+        make_data_requirement("psl", "ERA-5"),
+        make_data_requirement("pr", "GPCP-Monthly-3-2"),
+        make_data_requirement("rlds", "CERES-EBAF-4-2"),
+        make_data_requirement("rlus", "CERES-EBAF-4-2"),
+        make_data_requirement("rlut", "CERES-EBAF-4-2"),
+        make_data_requirement("rsds", "CERES-EBAF-4-2"),
+        make_data_requirement("rsdt", "CERES-EBAF-4-2"),
+        make_data_requirement("rsus", "CERES-EBAF-4-2"),
+        make_data_requirement("rsut", "CERES-EBAF-4-2"),
     )
     def __init__(self) -> None:

climate_ref_pmp/diagnostics/enso.py ADDED Viewed

@@ -0,0 +1,245 @@
+import json
+import os
+from collections.abc import Collection, Iterable
+from typing import Any
+from loguru import logger
+from climate_ref_core.constraints import AddSupplementaryDataset
+from climate_ref_core.datasets import DatasetCollection, FacetFilter, SourceDatasetType
+from climate_ref_core.diagnostics import (
+    CommandLineDiagnostic,
+    DataRequirement,
+    ExecutionDefinition,
+    ExecutionResult,
+)
+from climate_ref_pmp.pmp_driver import _get_resource, process_json_result
+class ENSO(CommandLineDiagnostic):
+    """
+    Calculate the ENSO performance metrics for a dataset
+    """
+    facets = ("source_id", "member_id", "grid_label", "experiment_id", "metric", "reference_datasets")
+    def __init__(self, metrics_collection: str, experiments: Collection[str] = ("historical",)) -> None:
+        self.name = metrics_collection
+        self.slug = metrics_collection.lower()
+        self.metrics_collection = metrics_collection
+        self.parameter_file = "pmp_param_enso.py"
+        self.obs_sources: tuple[str, ...]
+        self.model_variables: tuple[str, ...]
+        if metrics_collection == "ENSO_perf":  # pragma: no cover
+            self.model_variables = ("pr", "ts", "tauu")
+            self.obs_sources = ("GPCP-Monthly-3-2", "TropFlux-1-0", "HadISST-1-1")
+        elif metrics_collection == "ENSO_tel":
+            self.model_variables = ("pr", "ts")
+            self.obs_sources = ("GPCP-Monthly-3-2", "TropFlux-1-0", "HadISST-1-1")
+        elif metrics_collection == "ENSO_proc":
+            self.model_variables = ("ts", "tauu", "hfls", "hfss", "rlds", "rlus", "rsds", "rsus")
+            self.obs_sources = (
+                "GPCP-Monthly-3-2",
+                "TropFlux-1-0",
+                "HadISST-1-1",
+                "CERES-EBAF-4-2",
+            )
+        else:
+            raise ValueError(
+                f"Unknown metrics collection: {metrics_collection}. "
+                "Valid options are: ENSO_perf, ENSO_tel, ENSO_proc"
+            )
+        self.data_requirements = self._get_data_requirements(experiments)
+    def _get_data_requirements(
+        self,
+        experiments: Collection[str] = ("historical",),
+    ) -> tuple[DataRequirement, DataRequirement]:
+        filters = [
+            FacetFilter(
+                facets={
+                    "frequency": "mon",
+                    "experiment_id": tuple(experiments),
+                    "variable_id": self.model_variables,
+                }
+            )
+        ]
+        return (
+            DataRequirement(
+                source_type=SourceDatasetType.obs4MIPs,
+                filters=(
+                    FacetFilter(facets={"source_id": self.obs_sources, "variable_id": self.model_variables}),
+                ),
+                group_by=("activity_id",),
+            ),
+            DataRequirement(
+                source_type=SourceDatasetType.CMIP6,
+                filters=tuple(filters),
+                group_by=("source_id", "experiment_id", "member_id", "grid_label"),
+                constraints=(
+                    AddSupplementaryDataset.from_defaults("areacella", SourceDatasetType.CMIP6),
+                    AddSupplementaryDataset.from_defaults("sftlf", SourceDatasetType.CMIP6),
+                ),
+            ),
+        )
+    def build_cmd(self, definition: ExecutionDefinition) -> Iterable[str]:
+        """
+        Run the diagnostic on the given configuration.
+        Parameters
+        ----------
+        definition : ExecutionDefinition
+            The configuration to run the diagnostic on.
+        Returns
+        -------
+        :
+            The result of running the diagnostic.
+        """
+        mc_name = self.metrics_collection
+        # ------------------------------------------------
+        # Get the input datasets information for the model
+        # ------------------------------------------------
+        input_datasets = definition.datasets[SourceDatasetType.CMIP6]
+        input_selectors = input_datasets.selector_dict()
+        source_id = input_selectors["source_id"]
+        member_id = input_selectors["member_id"]
+        experiment_id = input_selectors["experiment_id"]
+        variable_ids = set(input_datasets["variable_id"].unique()) - {"areacella", "sftlf"}
+        mod_run = f"{source_id}_{member_id}"
+        # We only need one entry for the model run
+        dict_mod: dict[str, dict[str, Any]] = {mod_run: {}}
+        def extract_variable(dc: DatasetCollection, variable: str) -> list[str]:
+            return dc.datasets[input_datasets["variable_id"] == variable]["path"].to_list()  # type: ignore
+        # TO DO: Get the path to the files per variable
+        for variable in variable_ids:
+            list_files = extract_variable(input_datasets, variable)
+            list_areacella = extract_variable(input_datasets, "areacella")
+            list_sftlf = extract_variable(input_datasets, "sftlf")
+            if len(list_files) > 0:
+                dict_mod[mod_run][variable] = {
+                    "path + filename": list_files,
+                    "varname": variable,
+                    "path + filename_area": list_areacella,
+                    "areaname": "areacella",
+                    "path + filename_landmask": list_sftlf,
+                    "landmaskname": "sftlf",
+                }
+        # -------------------------------------------------------
+        # Get the input datasets information for the observations
+        # -------------------------------------------------------
+        reference_dataset = definition.datasets[SourceDatasetType.obs4MIPs]
+        reference_dataset_names = reference_dataset["source_id"].unique()
+        dict_obs: dict[str, dict[str, Any]] = {}
+        # TO DO: Get the path to the files per variable and per source
+        for obs_name in reference_dataset_names:
+            dict_obs[obs_name] = {}
+            for variable in variable_ids:
+                # Get the list of files for the current variable and observation source
+                list_files = reference_dataset.datasets[
+                    (reference_dataset["variable_id"] == variable)
+                    & (reference_dataset["source_id"] == obs_name)
+                ]["path"].to_list()
+                # If the list is not empty, add it to the dictionary
+                if len(list_files) > 0:
+                    dict_obs[obs_name][variable] = {
+                        "path + filename": list_files,
+                        "varname": variable,
+                    }
+        # Create input directory
+        dict_datasets = {
+            "model": dict_mod,
+            "observations": dict_obs,
+            "metricsCollection": mc_name,
+            "experiment_id": experiment_id,
+        }
+        # Create JSON file for dictDatasets
+        json_file = os.path.join(
+            definition.output_directory, f"input_{mc_name}_{source_id}_{experiment_id}_{member_id}.json"
+        )
+        with open(json_file, "w") as f:
+            json.dump(dict_datasets, f, indent=4)
+        logger.debug(f"JSON file created: {json_file}")
+        driver_file = _get_resource("climate_ref_pmp.drivers", "enso_driver.py", use_resources=True)
+        return [
+            "python",
+            driver_file,
+            "--metrics_collection",
+            mc_name,
+            "--experiment_id",
+            experiment_id,
+            "--input_json_path",
+            json_file,
+            "--output_directory",
+            str(definition.output_directory),
+        ]
+    def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
+        """
+        Build a diagnostic result from the output of the PMP driver
+        Parameters
+        ----------
+        definition
+            Definition of the diagnostic execution
+        Returns
+        -------
+            Result of the diagnostic execution
+        """
+        input_datasets = definition.datasets[SourceDatasetType.CMIP6]
+        source_id = input_datasets["source_id"].unique()[0]
+        experiment_id = input_datasets["experiment_id"].unique()[0]
+        member_id = input_datasets["member_id"].unique()[0]
+        mc_name = self.metrics_collection
+        pattern = f"{mc_name}_{source_id}_{experiment_id}_{member_id}"
+        # Find the results files
+        results_files = list(definition.output_directory.glob(f"{pattern}_cmec.json"))
+        logger.debug(f"Results files: {results_files}")
+        if len(results_files) != 1:  # pragma: no cover
+            logger.warning(f"A single cmec output file not found: {results_files}")
+            return ExecutionResult.build_from_failure(definition)
+        # Find the other outputs
+        png_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.png")]
+        data_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.nc")]
+        cmec_output, cmec_metric = process_json_result(results_files[0], png_files, data_files)
+        input_selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
+        cmec_metric_bundle = cmec_metric.remove_dimensions(
+            [
+                "model",
+                "realization",
+            ],
+        ).prepend_dimensions(
+            {
+                "source_id": input_selectors["source_id"],
+                "member_id": input_selectors["member_id"],
+                "grid_label": input_selectors["grid_label"],
+                "experiment_id": input_selectors["experiment_id"],
+            }
+        )
+        return ExecutionResult.build_from_output_bundle(
+            definition,
+            cmec_output_bundle=cmec_output,
+            cmec_metric_bundle=cmec_metric_bundle,
+        )

climate_ref_pmp/diagnostics/variability_modes.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from collections.abc import Iterable
+from pathlib import Path
+from typing import Any, Union
 from loguru import logger
@@ -37,10 +39,10 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
         self.name = f"Extratropical modes of variability: {mode_id}"
         self.slug = f"extratropical-modes-of-variability-{mode_id.lower()}"
-        def get_data_requirements(
+        def _get_data_requirements(
             obs_source: str,
             obs_variable: str,
-            cmip_variable: str,
+            model_variable: str,
             extra_experiments: str | tuple[str, ...] | list[str] = (),
         ) -> tuple[DataRequirement, DataRequirement]:
             filters = [
@@ -48,7 +50,7 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
                     facets={
                         "frequency": "mon",
                         "experiment_id": ("historical", "hist-GHG", "piControl", *extra_experiments),
-                        "variable_id": cmip_variable,
+                        "variable_id": model_variable,
                     }
                 )
             ]
@@ -64,17 +66,16 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
                 DataRequirement(
                     source_type=SourceDatasetType.CMIP6,
                     filters=tuple(filters),
-                    # TODO: remove unneeded variant_label
-                    group_by=("source_id", "experiment_id", "variant_label", "member_id"),
+                    group_by=("source_id", "experiment_id", "member_id", "grid_label"),
                 ),
             )
         if self.mode_id in self.ts_modes:
             self.parameter_file = "pmp_param_MoV-ts.py"
-            self.data_requirements = get_data_requirements("HadISST-1-1", "ts", "ts")
+            self.data_requirements = _get_data_requirements("HadISST-1-1", "ts", "ts")
         elif self.mode_id in self.psl_modes:
             self.parameter_file = "pmp_param_MoV-psl.py"
-            self.data_requirements = get_data_requirements("20CR", "psl", "psl", extra_experiments=("amip",))
+            self.data_requirements = _get_data_requirements("20CR", "psl", "psl", extra_experiments=("amip",))
         else:
             raise ValueError(
                 f"Unknown mode_id '{self.mode_id}'. Must be one of {self.ts_modes + self.psl_modes}"
@@ -172,6 +173,8 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
             logger.warning(f"A single cmec output file not found: {results_files}")
             return ExecutionResult.build_from_failure(definition)
+        clean_up_json(results_files[0])
         # Find the other outputs
         png_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.png")]
         data_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.nc")]
@@ -201,3 +204,63 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
             cmec_output_bundle=cmec_output_bundle,
             cmec_metric_bundle=cmec_metric_bundle,
         )
+def clean_up_json(json_file: Union[str, Path]) -> None:
+    """
+    Clean up the JSON file by removing unnecessary fields.
+    Parameters
+    ----------
+    json_file : str or Path
+        Path to the JSON file to clean up.
+    """
+    import json
+    with open(str(json_file)) as f:
+        data = json.load(f)
+    # Remove null values from the JSON data
+    data = remove_null_values(data)
+    with open(str(json_file), "w") as f:
+        json.dump(data, f, indent=4)
+    # Log the cleanup action
+    logger.debug(f"Cleaned up JSON file: {json_file}")
+    logger.info("JSON file cleaned up successfully.")
+def remove_null_values(data: Union[dict[Any, Any], list[Any], Any]) -> Union[dict[Any, Any], list[Any], Any]:
+    """
+    Recursively removes keys with null (None) values from a dictionary or list.
+    Parameters
+    ----------
+    data : dict, list, or Any
+        The JSON-like data structure to process. It can be a dictionary, a list,
+        or any other type of data.
+    Returns
+    -------
+    dict, list, or Any
+        A new data structure with null values removed. If the input is a dictionary,
+        keys with `None` values are removed. If the input is a list, items are
+        recursively processed to remove `None` values. For other types, the input
+        is returned unchanged.
+    Examples
+    --------
+    >>> data = {
+    ...     "key1": None,
+    ...     "key2": {"subkey1": 123, "subkey2": None},
+    ...     "key3": [None, 456, {"subkey3": None}],
+    ... }
+    >>> remove_null_values(data)
+    {'key2': {'subkey1': 123}, 'key3': [456, {}]}
+    """
+    if isinstance(data, dict):
+        return {key: remove_null_values(value) for key, value in data.items() if value is not None}
+    if isinstance(data, list):
+        return [remove_null_values(item) for item in data if item is not None]
+    return data

climate-ref-pmp 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

climate-ref-pmp 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl