PyPI - climate-ref-pmp - Versions diffs - 0.7.0__tar.gz → 0.8.1__tar.gz - Mend

climate-ref-pmp 0.7.0tar.gz → 0.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: climate-ref-pmp
-Version: 0.7.0
+Version: 0.8.1
 Summary: PMP diagnostic provider for the Rapid Evaluation Framework
 Author-email: Jiwoo Lee <jwlee@llnl.gov>, Jared Lewis <jared.lewis@climate-resource.com>
 License-Expression: Apache-2.0

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "climate-ref-pmp"
-version = "0.7.0"
+version = "0.8.1"
 description = "PMP diagnostic provider for the Rapid Evaluation Framework"
 readme = "README.md"
 authors = [

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/annual_cycle.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import datetime
 import json
+from pathlib import Path
 from typing import Any
 from loguru import logger
@@ -14,6 +15,10 @@ from climate_ref_core.diagnostics import (
 from climate_ref_core.pycmec.metric import remove_dimensions
 from climate_ref_pmp.pmp_driver import build_glob_pattern, build_pmp_command, process_json_result
+# =================================================================
+# PMP diagnostics support functions for the annual cycle diagnostic
+# =================================================================
 def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequirement, DataRequirement]:
     """
@@ -43,7 +48,7 @@ def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequir
                 FacetFilter(
                     facets={
                         "frequency": "mon",
-                        "experiment_id": ("amip", "historical", "hist-GHG", "piControl"),
+                        "experiment_id": ("amip", "historical", "hist-GHG"),
                         "variable_id": (variable_id,),
                     }
                 ),
@@ -53,6 +58,159 @@ def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequir
     )
+def _transform_results(data: dict[str, Any]) -> dict[str, Any]:
+    """
+    Transform the executions dictionary to match the expected structure.
+    Parameters
+    ----------
+    data : dict
+        The original execution dictionary.
+    Returns
+    -------
+    dict
+        The transformed executions dictionary.
+    """
+    # Remove the model, reference, rip dimensions
+    # These are later replaced with a REF-specific naming convention
+    data = remove_dimensions(data, ["model", "reference", "rip"])
+    # TODO: replace this with the ability to capture series
+    # Remove the "CalendarMonths" key from the nested structure
+    for region, region_values in data["RESULTS"].items():
+        for stat, stat_values in region_values.items():
+            if "CalendarMonths" in stat_values:
+                stat_values.pop("CalendarMonths")
+    # Remove the "CalendarMonths" key from the nested structure in "DIMENSIONS"
+    data["DIMENSIONS"]["season"].pop("CalendarMonths")
+    return data
+def transform_results_files(results_files: list[Any]) -> list[Any]:
+    """
+    Transform the results files to match the expected structure.
+    Parameters
+    ----------
+    results_files : list
+        List of result files to transform.
+    Returns
+    -------
+    list
+        List of transformed result files.
+    """
+    if len(results_files) == 0:
+        logger.warning("No results files provided for transformation.")
+        return []
+    transformed_results_files = []
+    for results_file in results_files:
+        # Rewrite the CMEC JSON file for compatibility
+        with open(results_file) as f:
+            results = json.load(f)
+            results_transformed = _transform_results(results)
+        # Get the stem (filename without extension)
+        stem = results_file.stem
+        # Create the new filename
+        results_file_transformed = results_file.with_name(f"{stem}_transformed.json")
+        with open(results_file_transformed, "w") as f:
+            # Write the transformed executions back to the file
+            json.dump(results_transformed, f, indent=4)
+            logger.debug(f"Transformed executions written to {results_file_transformed}")
+        transformed_results_files.append(results_file_transformed)
+    return transformed_results_files
+def _update_top_level_keys(combined_results: dict[str, Any], data: dict[str, Any], levels: list[str]) -> None:
+    if "DIMENSIONS" not in data:
+        data["DIMENSIONS"] = {}
+    top_level_keys = list(data.keys())
+    top_level_keys.remove("RESULTS")
+    json_structure = data.get("DIMENSIONS", {}).get("json_structure", {})
+    json_structure = ["level", *json_structure]
+    for key in top_level_keys:
+        combined_results[key] = data[key]
+        if key == "Variable":
+            combined_results[key]["level"] = levels
+        elif key == "DIMENSIONS":
+            combined_results[key]["json_structure"] = json_structure
+            if "level" not in combined_results[key]:
+                combined_results[key]["level"] = {}
+                for level in levels:
+                    combined_results[key]["level"][level] = {}
+def combine_results_files(results_files: list[Any], output_directory: str | Path) -> Path:
+    """
+    Combine multiple results files into a single file.
+    Parameters
+    ----------
+    results_files : list
+        List of result files to combine.
+    output_directory : str or Path
+        Directory where the combined file will be saved.
+    Returns
+    -------
+    Path, list[str]
+        The path to the combined results file and a list of levels found in the results files.
+    """
+    combined_results: dict[str, dict[str, dict[str, dict[str, dict[str, Any]]]]] = {}
+    combined_results["RESULTS"] = {}
+    levels = []
+    # Ensure output_directory is a Path object
+    if isinstance(output_directory, str):
+        output_directory = Path(output_directory)
+    last_data = None
+    for file in results_files:
+        with open(file) as f:
+            data = json.load(f)
+            last_data = data
+            level_key = str(int(data["Variable"]["level"]))
+            levels.append(level_key)
+            logger.debug(f"Processing file: {file}, level_key: {level_key}")
+            # Insert the results into the combined_results dictionary
+            if level_key not in combined_results["RESULTS"]:
+                combined_results["RESULTS"][level_key] = data.get("RESULTS", {})
+    if last_data is not None:
+        _update_top_level_keys(combined_results, last_data, levels)
+    # Ensure the output directory exists
+    output_directory.mkdir(parents=True, exist_ok=True)
+    # Create the combined file path
+    combined_file_path = output_directory / "combined_results.json"
+    with open(combined_file_path, "w") as f:
+        json.dump(combined_results, f, indent=4)
+    # return combined_file_path, levels
+    return combined_file_path
+# ===================================================
+# PMP diagnostics main class: annual cycle diagnostic
+# ===================================================
 class AnnualCycle(CommandLineDiagnostic):
     """
     Calculate the annual cycle for a dataset
@@ -72,10 +230,17 @@ class AnnualCycle(CommandLineDiagnostic):
     )
     data_requirements = (
+        # ERA-5 as reference dataset, spatial 2-D variables
         make_data_requirement("ts", "ERA-5"),
         make_data_requirement("uas", "ERA-5"),
         make_data_requirement("vas", "ERA-5"),
         make_data_requirement("psl", "ERA-5"),
+        # ERA-5 as reference dataset, spatial 3-D variables
+        make_data_requirement("ta", "ERA-5"),
+        make_data_requirement("ua", "ERA-5"),
+        make_data_requirement("va", "ERA-5"),
+        make_data_requirement("zg", "ERA-5"),
+        # Other reference datasets, spatial 2-D variables
         make_data_requirement("pr", "GPCP-Monthly-3-2"),
         make_data_requirement("rlds", "CERES-EBAF-4-2"),
         make_data_requirement("rlus", "CERES-EBAF-4-2"),
@@ -105,10 +270,6 @@ class AnnualCycle(CommandLineDiagnostic):
         """
         input_datasets = definition.datasets[SourceDatasetType.CMIP6]
         reference_datasets = definition.datasets[SourceDatasetType.PMPClimatology]
-        selector = input_datasets.selector_dict()
-        reference_selector = reference_datasets.selector_dict()
-        logger.debug(f"selector: {selector}")
-        logger.debug(f"reference selector: {reference_selector}")
         source_id = input_datasets["source_id"].unique()[0]
         experiment_id = input_datasets["experiment_id"].unique()[0]
@@ -159,10 +320,9 @@ class AnnualCycle(CommandLineDiagnostic):
             )
         )
-        # ----------------------------------------------
+        # --------------------------------------------------
         # PART 2: Build the command to calculate diagnostics
-        # ----------------------------------------------
+        # --------------------------------------------------
         # Reference
         obs_dict = {
             variable_id: {
@@ -179,13 +339,32 @@ class AnnualCycle(CommandLineDiagnostic):
         date = datetime.datetime.now().strftime("%Y%m%d")
+        if variable_id in ["ua", "va", "ta"]:
+            levels = ["200", "850"]
+        elif variable_id in ["zg"]:
+            levels = ["500"]
+        else:
+            levels = None
+        variables = []
+        if levels is not None:
+            for level in levels:
+                variable_id_with_level = f"{variable_id}-{level}"
+                variables.append(variable_id_with_level)
+        else:
+            variables = [variable_id]
+        logger.debug(f"variables: {variables}")
+        logger.debug(f"levels: {levels}")
+        # Build the command for each level
         params = {
-            "vars": variable_id,
+            "vars": variables,
             "custom_observations": f"{output_directory_path}/obs_dict.json",
             "test_data_path": output_directory_path,
             "test_data_set": source_id,
             "realization": member_id,
-            "filename_template": f"{variable_id}_{data_name}_clims.198101-200512.AC.v{date}.nc",
+            "filename_template": f"%(variable)_{data_name}_clims.198101-200512.AC.v{date}.nc",
             "metrics_output_path": output_directory_path,
             "cmec": "",
         }
@@ -198,6 +377,9 @@ class AnnualCycle(CommandLineDiagnostic):
             )
         )
+        logger.debug("build_cmd end")
+        logger.debug(f"cmds: {cmds}")
         return cmds
     def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
@@ -216,46 +398,39 @@ class AnnualCycle(CommandLineDiagnostic):
         input_datasets = definition.datasets[SourceDatasetType.CMIP6]
         variable_id = input_datasets["variable_id"].unique()[0]
+        if variable_id in ["ua", "va", "ta"]:
+            variable_dir_pattern = f"{variable_id}-???"
+        else:
+            variable_dir_pattern = variable_id
         results_directory = definition.output_directory
-        png_directory = results_directory / variable_id
-        data_directory = results_directory / variable_id
+        png_directory = results_directory / variable_dir_pattern
+        data_directory = results_directory / variable_dir_pattern
         logger.debug(f"results_directory: {results_directory}")
         logger.debug(f"png_directory: {png_directory}")
         logger.debug(f"data_directory: {data_directory}")
-        # Find the executions file
-        results_files = list(results_directory.glob("*_cmec.json"))
-        if len(results_files) != 1:  # pragma: no cover
-            logger.error(f"More than one or no cmec file found: {results_files}")
-            return ExecutionResult.build_from_failure(definition)
-        else:
+        # Find the CMEC JSON file(s)
+        results_files = transform_results_files(list(results_directory.glob("*_cmec.json")))
+        if len(results_files) == 1:
+            # If only one file, use it directly
             results_file = results_files[0]
             logger.debug(f"results_file: {results_file}")
+        elif len(results_files) > 1:
+            logger.info(f"More than one cmec file found: {results_files}")
+            results_file = combine_results_files(results_files, definition.output_directory)
+        else:
+            logger.error("Unexpected case: no cmec file found")
+            return ExecutionResult.build_from_failure(definition)
-        # Rewrite executions file for compatibility
-        with open(results_file) as f:
-            results = json.load(f)
-            results_transformed = _transform_results(results)
-        # Get the stem (filename without extension)
-        stem = results_file.stem
-        # Create the new filename
-        results_file_transformed = results_file.with_name(f"{stem}_transformed.json")
-        with open(results_file_transformed, "w") as f:
-            # Write the transformed executions back to the file
-            json.dump(results_transformed, f, indent=4)
-            logger.debug(f"Transformed executions written to {results_file_transformed}")
-        # Find the other outputs
+        # Find the other outputs: PNG and NetCDF files
         png_files = list(png_directory.glob("*.png"))
         data_files = list(data_directory.glob("*.nc"))
-        cmec_output_bundle, cmec_metric_bundle = process_json_result(
-            results_file_transformed, png_files, data_files
-        )
+        # Prepare the output bundles
+        cmec_output_bundle, cmec_metric_bundle = process_json_result(results_file, png_files, data_files)
         # Add missing dimensions to the output
         input_selectors = input_datasets.selector_dict()
@@ -294,34 +469,3 @@ class AnnualCycle(CommandLineDiagnostic):
         runs = [self.provider.run(cmd) for cmd in cmds]
         logger.debug(f"runs: {runs}")
-def _transform_results(data: dict[str, Any]) -> dict[str, Any]:
-    """
-    Transform the executions dictionary to match the expected structure.
-    Parameters
-    ----------
-    data : dict
-        The original execution dictionary.
-    Returns
-    -------
-    dict
-        The transformed executions dictionary.
-    """
-    # Remove the model, reference, rip dimensions
-    # These are later replaced with a REF-specific naming convention
-    data = remove_dimensions(data, ["model", "reference", "rip"])
-    # TODO: replace this with the ability to capture series
-    # Remove the "CalendarMonths" key from the nested structure
-    for region, region_values in data["RESULTS"].items():
-        for stat, stat_values in region_values.items():
-            if "CalendarMonths" in stat_values:
-                stat_values.pop("CalendarMonths")
-    # Remove the "CalendarMonths" key from the nested structure in "DIMENSIONS"
-    data["DIMENSIONS"]["season"].pop("CalendarMonths")
-    return data

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/variability_modes.py RENAMED Viewed

@@ -50,7 +50,7 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
                 FacetFilter(
                     facets={
                         "frequency": "mon",
-                        "experiment_id": ("historical", "hist-GHG", "piControl", *extra_experiments),
+                        "experiment_id": ("historical", "hist-GHG", *extra_experiments),
                         "variable_id": model_variable,
                     }
                 )
@@ -149,6 +149,11 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
             params["osyear"] = 1950
             params["oeyear"] = 2005
+        if self.mode_id in ["NPO", "NPGO"]:
+            params["eofn_obs"] = 2
+            params["eofn_mod"] = 2
+            params["eofn_mod_max"] = 2
         # Pass the parameters using **kwargs
         return build_pmp_command(
             driver_file="variability_modes_driver.py",

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/pmp_driver.py RENAMED Viewed

@@ -169,7 +169,9 @@ def build_pmp_command(
     # Loop through additional arguments if they exist
     if kwargs:  # pragma: no cover
         for key, value in kwargs.items():
-            if value:
+            if isinstance(value, list):
+                cmd.extend([f"--{key}"] + [str(v) for v in value])
+            elif value:
                 cmd.extend([f"--{key}", str(value)])
             else:
                 cmd.extend([f"--{key}"])

{climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_annual_cycle.py RENAMED Viewed

@@ -173,7 +173,7 @@ def test_annual_cycle_diagnostic(
         "--realization",
         member_id,
         "--filename_template",
-        f"{variable_id}_{source_id}_historical_{member_id}_clims.198101-200512.AC.v{datecode}.nc",
+        f"%(variable)_{source_id}_historical_{member_id}_clims.198101-200512.AC.v{datecode}.nc",
         "--metrics_output_path",
         str(output_dir),
         "--cmec",