PyPI - climate-ref-core - Versions diffs - 0.5.0__py3-none-any.whl - Mend

climate-ref-core 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

climate_ref_core/__init__.py +7 -0
climate_ref_core/constraints.py +363 -0
climate_ref_core/dataset_registry.py +158 -0
climate_ref_core/datasets.py +157 -0
climate_ref_core/diagnostics.py +549 -0
climate_ref_core/env.py +35 -0
climate_ref_core/exceptions.py +48 -0
climate_ref_core/executor.py +96 -0
climate_ref_core/logging.py +146 -0
climate_ref_core/providers.py +418 -0
climate_ref_core/py.typed +0 -0
climate_ref_core/pycmec/README.md +1 -0
climate_ref_core/pycmec/__init__.py +3 -0
climate_ref_core/pycmec/controlled_vocabulary.py +175 -0
climate_ref_core/pycmec/cv_cmip7_aft.yaml +44 -0
climate_ref_core/pycmec/metric.py +437 -0
climate_ref_core/pycmec/output.py +207 -0
climate_ref_core-0.5.0.dist-info/METADATA +63 -0
climate_ref_core-0.5.0.dist-info/RECORD +22 -0
climate_ref_core-0.5.0.dist-info/WHEEL +4 -0
climate_ref_core-0.5.0.dist-info/licenses/LICENCE +201 -0
climate_ref_core-0.5.0.dist-info/licenses/NOTICE +3 -0

climate_ref_core/pycmec/controlled_vocabulary.py ADDED Viewed

@@ -0,0 +1,175 @@
+import pathlib
+from typing import Any
+from attrs import field, frozen, validators
+from cattrs import Converter, transform_error
+from loguru import logger
+from ruamel.yaml import YAML
+from climate_ref_core.exceptions import ResultValidationError
+from climate_ref_core.pycmec.metric import CMECMetric
+yaml = YAML()
+RESERVED_DIMENSION_NAMES = {"attributes", "json_structure", "created_at", "updated_at", "value", "id"}
+"""
+These names are reserved for internal use and should not be used as dimension names.
+These names have other meanings that would conflict with the controlled vocabulary.
+"""
+@frozen
+class DimensionValue:
+    """
+    An allowed value for a dimension
+    """
+    name: str
+    long_name: str
+    description: str | None
+    units: str
+@frozen
+class Dimension:
+    """
+    Description of a dimension in a diagnostic bundle
+    This information is also used by the frontend for presentation purposes.
+    """
+    name: str = field(validator=validators.not_(validators.in_(RESERVED_DIMENSION_NAMES)))
+    """
+    A short identifier of the dimension.
+    This is used as a key in the diagnostic bundle and must be unique.
+    """
+    long_name: str
+    """
+    A longer name used for presentation
+    """
+    description: str
+    """
+    A short description of the dimension.
+    This is used for presentation
+    """
+    allow_extra_values: bool
+    """
+    If True, additional non-controlled values are allowed.
+    This is used for dimensions where not all the values are known at run time,'
+    for example, the model dimension.
+    """
+    required: bool
+    """
+    If True, this dimension is required to be specified in the executions.
+    """
+    values: list[DimensionValue] = field(factory=list)
+    """
+    The list of controlled values for a given dimension.
+    If `allow_extra_values` is False,
+    then only these values are valid for the dimension.
+    """
+@frozen
+class CV:
+    """
+    A collection of controlled dimensions and values used to validate executions.
+    A diagnostic bundle does not have to specify all dimensions,
+    but any dimensions not in the CV are not permitted.
+    """
+    # TODO: There might be some additional fields in future if this CV is project-specific
+    dimensions: tuple[Dimension, ...] = field()
+    @dimensions.validator
+    def _validate_dimensions(self, _: Any, value: tuple[Dimension, ...]) -> None:
+        """
+        Validate that all dimension names are unique and do not conflict with reserved names
+        """
+        seen = set()
+        for dim in value:
+            if dim.name in seen:
+                raise ValueError(f"Duplicate dimension name: {dim.name}")
+            if dim.name in RESERVED_DIMENSION_NAMES:
+                raise ValueError(f"Reserved dimension name: {dim.name}")
+            seen.add(dim.name)
+    def get_dimension_by_name(self, name: str) -> Dimension:
+        """
+        Get a dimension by name
+        Parameters
+        ----------
+        name
+            The name of the dimension
+        Returns
+        -------
+        Dimension
+            The dimension with the given name
+        Raises
+        ------
+        KeyError
+            If the dimension is not found
+        """
+        for dim in self.dimensions:
+            if dim.name == name:
+                return dim
+        raise KeyError(f"Dimension {name} not found")
+    def validate_metrics(self, metric_bundle: CMECMetric) -> None:
+        """
+        Validate a diagnostic bundle against a CV
+        The CV describes the accepted dimensions and values within a bundle
+        Parameters
+        ----------
+        metric_bundle
+        Raises
+        ------
+        ResultValidationError
+            If the validation of the dimensions or values fails
+        """
+        for result in metric_bundle.iter_results():
+            for k, v in result.dimensions.items():
+                try:
+                    dimension = self.get_dimension_by_name(k)
+                except KeyError:
+                    raise ResultValidationError(f"Unknown dimension: {k!r}")
+                if not dimension.allow_extra_values:
+                    if v not in [dv.name for dv in dimension.values]:
+                        raise ResultValidationError(f"Unknown value {v!r} for dimension {k!r}")
+            if not isinstance(result.value, float):  # pragma: no cover
+                # This may not be possible with the current CMECMetric implementation
+                raise ResultValidationError(f"Unexpected value: {result.value!r}")
+    @staticmethod
+    def load_from_file(filename: pathlib.Path | str) -> "CV":
+        """
+        Load a CV from disk
+        Returns
+        -------
+            A new CV instance
+        """
+        convertor = Converter(forbid_extra_keys=True)
+        contents = yaml.load(pathlib.Path(filename))
+        try:
+            return convertor.structure(contents, CV)
+        except Exception as exc:
+            logger.error(f"Error loading CV from {filename}")
+            for error in transform_error(exc):
+                logger.error(error)
+            raise

climate_ref_core/pycmec/cv_cmip7_aft.yaml ADDED Viewed

@@ -0,0 +1,44 @@
+dimensions:
+- name: model
+  long_name: model_id
+  description: ""
+  allow_extra_values: true
+  required: false
+- name: source_id
+  long_name: source_id
+  description: ""
+  allow_extra_values: true
+  required: false
+- name: variant_label
+  long_name: Variant Label
+  description: ""
+  allow_extra_values: true
+  required: false
+- name: metric
+  long_name: ""
+  description: ""
+  required: true
+  allow_extra_values: true
+- name: region
+  long_name: ""
+  description: ""
+  required: true
+  allow_extra_values: true
+- name: statistic
+  long_name: ""
+  description: ""
+  required: true
+  allow_extra_values: true
+  values:
+    - name: rmse
+      long_name: Root Mean Square Error
+      description: ""
+      units: dimensionless
+    - name: overall score
+      long_name: Overall Score
+      description: ""
+      units: dimensionless
+    - name: bias
+      long_name: Bias
+      description: ""
+      units: dimensionless

climate_ref_core/pycmec/metric.py ADDED Viewed

@@ -0,0 +1,437 @@
+"""
+CMEC diagnostic bundle class
+Following the CMEC diagnostic bundle standards at
+https://github.com/Earth-System-Diagnostics-Standards/EMDS
+To validate that a dictionary is compatible with the CMEC
+diagnostic bundle standards, please use:
+ - class instantiation: cmec = CMECMetric(**result_dict)
+ - class model_validate method: cmec = CMECMetric.model_validate(result_dict)
+Both ways will create the CMECMetric instance (cmec)
+"""
+import json
+import pathlib
+from collections import Counter
+from collections.abc import Generator
+from enum import Enum
+from typing import Any, cast
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    FilePath,
+    RootModel,
+    ValidationInfo,
+    field_validator,
+    model_validator,
+    validate_call,
+)
+from pydantic.json_schema import GenerateJsonSchema, JsonSchemaMode, JsonSchemaValue
+from pydantic_core import CoreSchema
+from typing_extensions import Self
+class MetricCV(Enum):
+    """
+    CMEC diagnostic bundle controlled vocabulary
+    """
+    DIMENSIONS = "DIMENSIONS"
+    JSON_STRUCTURE = "json_structure"
+    RESULTS = "RESULTS"
+    PROVENANCE = "PROVENANCE"
+    DISCLAIMER = "DISCLAIMER"
+    NOTES = "NOTES"
+    ATTRIBUTES = "attributes"
+class MetricDimensions(RootModel[Any]):
+    """
+    CMEC diagnostic bundle DIMENSIONS object
+    This describes the order of the dimensions and their possible values.
+    The order of the dimensions matter as that determines how the executions are nested.
+    """
+    root: dict[str, Any] = Field(
+        default={
+            MetricCV.JSON_STRUCTURE.value: ["model", "metric"],
+            "model": {},
+            "metric": {},
+        }
+    )
+    @model_validator(mode="after")
+    def _validate_dimensions(self) -> Self:
+        """Validate a MetricDimensions object"""
+        # assert the items in json_structure are same as the keys of dimensions
+        if MetricCV.JSON_STRUCTURE.value not in self.root.keys():
+            raise ValueError(f"{MetricCV.JSON_STRUCTURE.value} is required keyword")
+        if not (
+            Counter(self.root[MetricCV.JSON_STRUCTURE.value])
+            == Counter([k for k in self.root.keys() if k != MetricCV.JSON_STRUCTURE.value])
+        ):
+            raise ValueError("json_structure items are not in the keys of the DIMENSIONS")
+        return self
+    @validate_call
+    def add_dimension(self, dim_name: str, dim_content: dict[str, Any]) -> None:
+        """
+        Add or update one dimension to MetricDimensions object
+        Parameters
+        ----------
+        dim_name
+            Name of new dimension to be added
+        dim_content
+            Dictionary contains contents associated with dim_name
+        Returns
+        -------
+        :
+            CMEC MetricDimensions object with dim_name added
+        """
+        if dim_name in self.root[MetricCV.JSON_STRUCTURE.value]:
+            self.root[dim_name].update(dim_content)
+        else:
+            self.root[MetricCV.JSON_STRUCTURE.value].append(dim_name)
+            self.root[dim_name] = dim_content
+    @classmethod
+    def merge_dimension(cls, metric_dim1: Any, metric_dim2: Any) -> Self:
+        """
+        Merge two MetricDimensions objects
+        Parameters
+        ----------
+        metric_dim1
+            First CMEC MetricDimensions object to be merged
+        metric_dim2
+            Second CMEC MetricDimensions object to be merged
+        Returns
+        -------
+        :
+            Return a merged CMEC MetricDimensions object
+        """
+        mdim1 = cls.model_validate(metric_dim1)
+        mdim2 = cls.model_validate(metric_dim2)
+        if not (mdim1.root[MetricCV.JSON_STRUCTURE.value] == mdim2.root[MetricCV.JSON_STRUCTURE.value]):
+            raise ValueError("JSON_STRUCTURES are not same")
+        merged_dim = {MetricCV.JSON_STRUCTURE.value: mdim1.root[MetricCV.JSON_STRUCTURE.value]}
+        for dim in mdim1.root[MetricCV.JSON_STRUCTURE.value]:
+            merged_dim[dim] = mdim1.root[dim]
+        for dim in mdim2.root[MetricCV.JSON_STRUCTURE.value]:
+            for key in mdim2.root[dim].keys():
+                if key not in merged_dim[dim].keys():
+                    merged_dim[dim][key] = mdim2.root[dim][key]
+        return cls(merged_dim)
+    def __getitem__(self, item: str) -> Any:
+        return self.root[item]
+class MetricResults(RootModel[Any]):
+    """
+    CMEC diagnostic bundle RESULTS object
+    """
+    model_config = ConfigDict(strict=True)
+    root: dict[str, dict[Any, Any]]
+    @classmethod
+    def _check_nested_dict_keys(cls, nested: dict[Any, Any], metdims: dict[Any, Any], level: int = 0) -> None:
+        dim_name = metdims[MetricCV.JSON_STRUCTURE.value][level]
+        dict_keys = set(nested.keys())
+        if MetricCV.ATTRIBUTES.value in dict_keys:
+            dict_keys.remove(MetricCV.ATTRIBUTES.value)
+        if level < len(metdims[MetricCV.JSON_STRUCTURE.value]) - 1:
+            if not (Counter(list(metdims[dim_name].keys())) == Counter(dict_keys)):
+                raise ValueError(
+                    f"Dimension key mismatch in '{dim_name}' and level {level}\n"
+                    f"Actual keys: {sorted(dict_keys)}\n"
+                    f"Expected keys: {sorted(metdims[dim_name].keys())}\n"
+                    "Full actual structure:\n" + json.dumps(list(dict_keys), indent=2) + "\n\n"
+                    "Full expected structure:\n" + json.dumps(metdims[dim_name], indent=2)
+                )
+            for key, value in nested.items():
+                if key == MetricCV.ATTRIBUTES.value:
+                    continue
+                elif isinstance(value, dict):
+                    cls._check_nested_dict_keys(value, metdims, level + 1)
+                else:
+                    raise ValueError(
+                        f"{dim_name} is not the last/deepest dimension, \n"
+                        f"a dictionary is expected for the key {key}"
+                    )
+        else:
+            expected_keys = set(metdims[dim_name].keys())
+            if not (dict_keys.issubset(expected_keys)):
+                raise ValueError(f"Unknown dimension values: {dict_keys - expected_keys}")
+            tmp = dict(nested)
+            if MetricCV.ATTRIBUTES.value in tmp:
+                tmp.pop(MetricCV.ATTRIBUTES.value)
+            StrNumDict(tmp)
+    @field_validator("root", mode="after")
+    @classmethod
+    def _validate_results(cls, rlt: Any, info: ValidationInfo) -> Any:
+        """Validate a MetricResults object"""
+        if not isinstance(info.context, MetricDimensions):
+            s = "\nTo validate MetricResults object, MetricDimensions is needed,\n"
+            s += "please use model_validate(Results, context=MetricDimensions) to instantiate\n"
+            raise ValueError(s)
+        else:
+            # executions = rlt.root
+            results = rlt
+            metdims = info.context.root
+            cls._check_nested_dict_keys(results, metdims, level=0)
+        return rlt
+class StrNumDict(RootModel[Any]):
+    """A class contains string key and numeric value"""
+    model_config = ConfigDict(strict=True)
+    root: dict[str, float | int | list[str | float | int]]
+class MetricValue(BaseModel):
+    """
+    A flattened representation of a diagnostic value
+    This includes the dimensions and the value of the diagnostic
+    """
+    dimensions: dict[str, str]
+    value: float | str
+    attributes: dict[str, str | float | int] | None = None
+class CMECMetric(BaseModel):
+    """
+    CMEC diagnostic bundle object
+    Contains the diagnostics calculated during a diagnostic execution, in a standardised format.
+    """
+    model_config = ConfigDict(strict=True, extra="allow")
+    DIMENSIONS: MetricDimensions
+    """
+    Describes the dimensionality of the diagnostics produced.
+    This includes the order of dimensions in `RESULTS`
+    """
+    RESULTS: dict[str, Any]
+    """
+    The diagnostic values.
+    Results is a nested dictionary of values.
+    The order of the nested dictionaries corresponds to the order of the dimensions.
+    """
+    PROVENANCE: dict[str, Any] | None = None
+    """
+    Provenance information
+    Not currently used in the REF.
+    The provenance information from the output bundle is used instead
+    """
+    DISCLAIMER: dict[str, Any] | None = None
+    """
+    Disclaimer information
+    Not currently used in the REF.
+    """
+    NOTES: dict[str, Any] | None = None
+    """
+    Additional notes.
+    Not currently used in the REF.
+    """
+    @model_validator(mode="after")
+    def _validate_metrics(self) -> Self:
+        """Validate a CMECMetric object"""
+        # validate executions data
+        results = self.RESULTS
+        MetricResults.model_validate(results, context=self.DIMENSIONS)
+        return self
+    @validate_call
+    def dump_to_json(self, json_file: str | pathlib.Path = "./cmec.json") -> None:
+        """
+        Save the CMECMetric object to a file in JSON format
+        Parameters
+        ----------
+        json_file
+            JSON file path in the CMEC format to be saved
+        Returns
+        -------
+        :
+            None
+        """
+        pathlib.Path(json_file).write_text(self.model_dump_json(indent=2))
+    @classmethod
+    @validate_call
+    def load_from_json(cls, json_file: FilePath) -> Self:
+        """
+        Create CMECMetric object from a compatible json file
+        Parameters
+        ----------
+        json_file
+            JSON file path to be read
+        Returns
+        -------
+        :
+            CMEC Diagnostic object if the file is CMEC-compatible
+        """
+        json_str = pathlib.Path(json_file).read_text()
+        metric_obj = cls.model_validate_json(json_str)
+        return metric_obj
+    @classmethod
+    def _merge(cls, dict_a: dict[Any, Any], dict_b: dict[Any, Any]) -> dict[Any, Any]:
+        """Merge the values from dict_b into dict_a inplace"""
+        for key, value_b in dict_b.items():
+            if key in dict_a:
+                if isinstance(dict_a[key], dict) and isinstance(value_b, dict):
+                    cls._merge(dict_a[key], value_b)
+                else:
+                    dict_a[key] = value_b
+            else:
+                dict_a[key] = value_b
+        return dict_a
+    @classmethod
+    def _fill(cls, mdict: dict[Any, Any], mdims: dict[Any, Any], level: int = 0) -> None:
+        dim_name = mdims[MetricCV.JSON_STRUCTURE.value][level]
+        for key in mdims[dim_name].keys():
+            if key not in mdict:
+                if level < len(mdims[MetricCV.JSON_STRUCTURE.value]) - 1:
+                    mdict[key] = {}
+        for key, value in mdict.items():
+            if (
+                isinstance(value, dict)
+                and level < len(mdims[MetricCV.JSON_STRUCTURE.value]) - 1
+                and key != MetricCV.ATTRIBUTES.value
+            ):
+                cls._fill(value, mdims, level + 1)
+    @classmethod
+    @validate_call
+    def merge(cls, metric_obj1: Any, metric_obj2: Any) -> Self:
+        """
+        Merge two CMECMetric objects with the same json_structure
+        Parameters
+        ----------
+        metric_obj1
+            First CMECMetric object to be merged
+        metric_obj2
+            Second CMECMetric object to be merged
+        Returns
+        -------
+        :
+            Merged CMEC Diagnostic object
+        """
+        mobj1 = cls.model_validate(metric_obj1)
+        mobj2 = cls.model_validate(metric_obj2)
+        merged_obj_dims = MetricDimensions.merge_dimension(mobj1.DIMENSIONS, mobj2.DIMENSIONS)
+        result1 = mobj1.RESULTS
+        result2 = mobj2.RESULTS
+        merged_obj_rlts = cls._merge(dict(result1), result2)
+        cls._fill(merged_obj_rlts, merged_obj_dims.root)
+        MetricResults.model_validate(merged_obj_rlts, context=merged_obj_dims)
+        return cls(DIMENSIONS=merged_obj_dims, RESULTS=merged_obj_rlts)
+    @staticmethod
+    def create_template() -> dict[str, Any]:
+        """
+        Return an empty dictionary in CMEC diagnostic bundle format
+        """
+        default_dimensions = MetricDimensions()
+        return {
+            MetricCV.DIMENSIONS.value: default_dimensions.root,
+            MetricCV.RESULTS.value: {},
+            MetricCV.PROVENANCE.value: None,
+            MetricCV.DISCLAIMER.value: None,
+            MetricCV.NOTES.value: None,
+        }
+    def iter_results(self) -> Generator[MetricValue]:
+        """
+        Iterate over the executions in the diagnostic bundle
+        This will yield a dictionary for each result, with the dimensions and the value
+        Returns
+        -------
+            A generator of diagnostic values
+        """
+        dimensions = cast(list[str], self.DIMENSIONS[MetricCV.JSON_STRUCTURE.value])
+        yield from _walk_results(dimensions, self.RESULTS, {})
+def _walk_results(
+    dimensions: list[str], results: dict[str, Any], metadata: dict[str, str]
+) -> Generator[MetricValue]:
+    assert len(dimensions), "Not enough dimensions"  # noqa: S101
+    dimension = dimensions[0]
+    for key, value in results.items():
+        if key == MetricCV.ATTRIBUTES.value:
+            continue
+        metadata[dimension] = key
+        if isinstance(value, str | float | int):
+            yield MetricValue(
+                dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
+            )
+        else:
+            yield from _walk_results(dimensions[1:], value, {**metadata})
+class CMECGenerateJsonSchema(GenerateJsonSchema):
+    """
+    Customized CMEC JSON schema generation
+    """
+    def generate(self: Self, schema: CoreSchema, mode: JsonSchemaMode = "validation") -> JsonSchemaValue:
+        """Generate customized json schema"""
+        json_schema = super().generate(schema, mode=mode)
+        json_schema["title"] = "CMEC"
+        json_schema["$schema"] = self.schema_dialect
+        return json_schema