PyPI - esgvoc - Versions diffs - 2.0.2__py3-none-any.whl - Mend

esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

esgvoc/__init__.py +3 -0
esgvoc/api/__init__.py +91 -0
esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
esgvoc/api/data_descriptors/__init__.py +159 -0
esgvoc/api/data_descriptors/activity.py +72 -0
esgvoc/api/data_descriptors/archive.py +5 -0
esgvoc/api/data_descriptors/area_label.py +30 -0
esgvoc/api/data_descriptors/branded_suffix.py +30 -0
esgvoc/api/data_descriptors/branded_variable.py +21 -0
esgvoc/api/data_descriptors/citation_url.py +5 -0
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +28 -0
esgvoc/api/data_descriptors/creation_date.py +18 -0
esgvoc/api/data_descriptors/data_descriptor.py +127 -0
esgvoc/api/data_descriptors/data_specs_version.py +25 -0
esgvoc/api/data_descriptors/date.py +5 -0
esgvoc/api/data_descriptors/directory_date.py +22 -0
esgvoc/api/data_descriptors/drs_specs.py +38 -0
esgvoc/api/data_descriptors/experiment.py +215 -0
esgvoc/api/data_descriptors/forcing_index.py +21 -0
esgvoc/api/data_descriptors/frequency.py +48 -0
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid.py +43 -0
esgvoc/api/data_descriptors/horizontal_label.py +20 -0
esgvoc/api/data_descriptors/initialization_index.py +27 -0
esgvoc/api/data_descriptors/institution.py +80 -0
esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
esgvoc/api/data_descriptors/license.py +31 -0
esgvoc/api/data_descriptors/member_id.py +9 -0
esgvoc/api/data_descriptors/mip_era.py +26 -0
esgvoc/api/data_descriptors/model_component.py +32 -0
esgvoc/api/data_descriptors/models_test/models.py +17 -0
esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +22 -0
esgvoc/api/data_descriptors/physics_index.py +21 -0
esgvoc/api/data_descriptors/product.py +16 -0
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realization_index.py +24 -0
esgvoc/api/data_descriptors/realm.py +16 -0
esgvoc/api/data_descriptors/regex.py +5 -0
esgvoc/api/data_descriptors/region.py +35 -0
esgvoc/api/data_descriptors/resolution.py +7 -0
esgvoc/api/data_descriptors/source.py +120 -0
esgvoc/api/data_descriptors/source_type.py +5 -0
esgvoc/api/data_descriptors/sub_experiment.py +5 -0
esgvoc/api/data_descriptors/table.py +28 -0
esgvoc/api/data_descriptors/temporal_label.py +20 -0
esgvoc/api/data_descriptors/time_range.py +17 -0
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +67 -0
esgvoc/api/data_descriptors/variable.py +56 -0
esgvoc/api/data_descriptors/variant_label.py +25 -0
esgvoc/api/data_descriptors/vertical_label.py +20 -0
esgvoc/api/project_specs.py +143 -0
esgvoc/api/projects.py +1253 -0
esgvoc/api/py.typed +0 -0
esgvoc/api/pydantic_handler.py +146 -0
esgvoc/api/report.py +127 -0
esgvoc/api/search.py +171 -0
esgvoc/api/universe.py +434 -0
esgvoc/apps/__init__.py +6 -0
esgvoc/apps/cmor_tables/__init__.py +7 -0
esgvoc/apps/cmor_tables/cvs_table.py +948 -0
esgvoc/apps/drs/__init__.py +0 -0
esgvoc/apps/drs/constants.py +2 -0
esgvoc/apps/drs/generator.py +429 -0
esgvoc/apps/drs/report.py +540 -0
esgvoc/apps/drs/validator.py +312 -0
esgvoc/apps/ga/__init__.py +104 -0
esgvoc/apps/ga/example_usage.py +315 -0
esgvoc/apps/ga/models/__init__.py +47 -0
esgvoc/apps/ga/models/netcdf_header.py +306 -0
esgvoc/apps/ga/models/validator.py +491 -0
esgvoc/apps/ga/test_ga.py +161 -0
esgvoc/apps/ga/validator.py +277 -0
esgvoc/apps/jsg/json_schema_generator.py +341 -0
esgvoc/apps/jsg/templates/template.jinja +241 -0
esgvoc/apps/test_cv/README.md +214 -0
esgvoc/apps/test_cv/__init__.py +0 -0
esgvoc/apps/test_cv/cv_tester.py +1611 -0
esgvoc/apps/test_cv/example_usage.py +216 -0
esgvoc/apps/vr/__init__.py +12 -0
esgvoc/apps/vr/build_variable_registry.py +71 -0
esgvoc/apps/vr/example_usage.py +60 -0
esgvoc/apps/vr/vr_app.py +333 -0
esgvoc/cli/clean.py +304 -0
esgvoc/cli/cmor.py +46 -0
esgvoc/cli/config.py +1300 -0
esgvoc/cli/drs.py +267 -0
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +155 -0
esgvoc/cli/install.py +41 -0
esgvoc/cli/main.py +60 -0
esgvoc/cli/offline.py +269 -0
esgvoc/cli/status.py +79 -0
esgvoc/cli/test_cv.py +258 -0
esgvoc/cli/valid.py +147 -0
esgvoc/core/constants.py +17 -0
esgvoc/core/convert.py +0 -0
esgvoc/core/data_handler.py +206 -0
esgvoc/core/db/__init__.py +3 -0
esgvoc/core/db/connection.py +40 -0
esgvoc/core/db/models/mixins.py +25 -0
esgvoc/core/db/models/project.py +102 -0
esgvoc/core/db/models/universe.py +98 -0
esgvoc/core/db/project_ingestion.py +231 -0
esgvoc/core/db/universe_ingestion.py +172 -0
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +26 -0
esgvoc/core/repo_fetcher.py +345 -0
esgvoc/core/service/__init__.py +41 -0
esgvoc/core/service/configuration/config_manager.py +196 -0
esgvoc/core/service/configuration/setting.py +363 -0
esgvoc/core/service/data_merger.py +634 -0
esgvoc/core/service/esg_voc.py +77 -0
esgvoc/core/service/resolver_config.py +56 -0
esgvoc/core/service/state.py +324 -0
esgvoc/core/service/string_heuristics.py +98 -0
esgvoc/core/service/term_cache.py +108 -0
esgvoc/core/service/uri_resolver.py +133 -0
esgvoc-2.0.2.dist-info/METADATA +82 -0
esgvoc-2.0.2.dist-info/RECORD +147 -0
esgvoc-2.0.2.dist-info/WHEEL +4 -0
esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0

esgvoc/api/data_descriptors/activity.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+Model (i.e. schema/definition) of the activity data descriptor
+"""
+import re
+from typing import TYPE_CHECKING
+from pydantic import HttpUrl, field_validator
+from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, PlainTermDataDescriptor
+from esgvoc.api.pydantic_handler import create_union
+if TYPE_CHECKING:
+    from esgvoc.api.data_descriptors.experiment import Experiment
+class ActivityCMIP7(PlainTermDataDescriptor):
+    """
+    Identifier of the CMIP activity to which a dataset belongs
+    Examples: "PMIP", "CMIP", "CFMIP", "ScenarioMIP"
+    An 'activity' refers to a coordinated set of modeling experiments
+    designed to address specific scientific questions or objectives.
+    Activities generally have the suffix "MIP",
+    for "model intercomparison project"
+    (even though they're not referred to as projects within CMIP CVs).
+    Activity DRS names should not include a phase.
+    For example, the activity should always be ScenarioMIP,
+    not ScenarioMIP6, ScenarioMIP7 etc.
+    It is now considered essential for each :py:class:`Experiment`
+    to be associated with a single :py:class:`Activity`.
+    However, this was not followed in CMIP6,
+    which significantly complicates definition and validation
+    of the schemas for these two classes.
+    """
+    experiments: list["Experiment"] | list[str]
+    """
+    Experiments 'sponsored' by this activity
+    """
+    urls: list[HttpUrl]
+    """
+    URL with more information about this activity
+    """
+    @field_validator("drs_name")
+    def name_must_not_end_in_number(cls, v):
+        if re.match(r".*\d$", v):
+            msg = f"`drs_name` for {cls} must not end in a number. Received: {v}"
+            raise ValueError(msg)
+        return v
+class ActivityLegacy(DataDescriptor):
+    """
+    Legacy activity model for CMIP6 and earlier versions.
+    This version only contains basic fields (id, type, description)
+    without the additional requirements introduced in CMIP7.
+    """
+    def accept(self, visitor):
+        """Accept method for visitor pattern."""
+        return visitor.visit_plain_term(self)
+Activity = create_union(ActivityCMIP7, ActivityLegacy)

esgvoc/api/data_descriptors/archive.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Archive(PlainTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/area_label.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""
+Model (i.e. schema/definition) of the area label data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class AreaLabel(PlainTermDataDescriptor):
+    """
+    Label that describes a specific area sampling approach
+    Examples: "lnd", "air", "sea", "u"
+    This label is used as the area component of a branded variable's suffix
+    (see :py:class:`BrandedSuffix`).
+    """
+    cf_area_type: str | None
+    """
+    CF-conventions area type
+    (https://cfconventions.org/Data/area-type-table/current/build/area-type-table.html).
+    This is set to "u" ("unmasked") when all areas are sampled
+    i.e. no mask is applied to the data.
+    For underlying details and logic, please see
+    [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+    If `None`, there is no CF-conventions area type
+    associated with this area label.
+    """  # noqa: E501

esgvoc/api/data_descriptors/branded_suffix.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""
+Model (i.e. schema/definition) of the branded suffix data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
+class BrandedSuffix(CompositeTermDataDescriptor):
+    """
+    The suffix of a branded variable.
+    Examples: "tavg-h2m-hxy-u", "tpt-u-hxy-u", "tavg-p19-hxy-air"
+    A branded variable is composed of two parts.
+    The first part is the root variable (see :py:class:`Variable`).
+    The second is the suffix, i.e. the component described here.
+    The suffix captures all the information
+    about the time sampling, horizontal sampling, vertical sampling
+    and area masking of the variable.
+    The suffix is composed of the following components:
+    #. :py:class:`TemporalLabel`
+    #. :py:class:`VerticalLabel`
+    #. :py:class:`HorizontalLabel`
+    #. :py:class:`AreaLabel`
+    For underlying details and logic, please see
+    [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+    """  # noqa: E501

esgvoc/api/data_descriptors/branded_variable.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Model (i.e. schema/definition) of the branded variale data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
+class BrandedVariable(CompositeTermDataDescriptor):
+    """
+    A climate-related quantity or measurement, including information about sampling.
+    Examples: "tas_tavg-h2m-hxy-u", "pr_tpt-u-hxy-u", "ua_tavg-p19-hxy-air"
+    The concept of a branded variable was introduced in CMIP7.
+    A branded variable is composed of two parts.
+    The first part is the root variable (see :py:class:`Variable`).
+    The second is the suffix (see :py:class:`BrandedSuffix`).
+    For underlying details and logic, please see
+    [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+    """  # noqa: E501

esgvoc/api/data_descriptors/citation_url.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class CitationUrl(PatternTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/contact.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class Contact(PatternTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/conventions.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""
+Model (i.e. schema/definition) of the conventions data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Convention(PlainTermDataDescriptor):
+    """
+    Conventions governing the data
+    Examples: "CF-1.10", "CF-1.12"
+    This data descriptor is actually defined by the CF-conventions.
+    However, it is often used in a more specific and restrictive form
+    within WCRP activities.
+    To support this possibility, this data descriptor must also be defined within esgvoc.
+    The most commonly specified conventions are the
+    climate and forecast metadata conventions (https://cfconventions.org/).
+    Other conventions can also be specified in the 'Conventions'
+    attribute of netCDF files/other metadata.
+    The different conventions are usually separated by a whitespace.
+    Within esgvoc, the 'components' (i.e. whitespace separated bits)
+    are all that is specified.
+    If users wish to combine them, they can,
+    but esgvoc does not treat this as either a pattern or composite term.
+    """

esgvoc/api/data_descriptors/creation_date.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Model (i.e. schema/definition) of the creation date data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class CreationDate(PatternTermDataDescriptor):
+    r"""
+    Date (more specifically timestamp) that the file was created
+    Examples: "2025-08-21T04:23:12Z", "2024-04-11T14:03:10Z"
+    Note that the examples above assume a `regex` of
+    `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z`
+    (this matches ISO 8601 timestamps in UTC).
+    If you use a different regex, different examples would be needed.
+    """

esgvoc/api/data_descriptors/data_descriptor.py ADDED Viewed

@@ -0,0 +1,127 @@
+from abc import ABC, abstractmethod
+from typing import Any, ClassVar, Protocol
+from pydantic import BaseModel, ConfigDict
+class ConfiguredBaseModel(BaseModel):
+    model_config = ConfigDict(
+        validate_assignment=True,
+        validate_default=True,
+        extra="allow",
+        arbitrary_types_allowed=True,
+        use_enum_values=True,
+        strict=False,
+    )
+class DataDescriptorVisitor(Protocol):
+    """
+    The specifications for a term visitor.
+    """
+    def visit_sub_set_term(self, term: "DataDescriptorSubSet") -> Any:
+        """Visit a sub set of the information of a term."""
+        pass
+    def visit_plain_term(self, term: "PlainTermDataDescriptor") -> Any:
+        """Visit a plain term."""
+        pass
+    def visit_pattern_term(self, term: "PatternTermDataDescriptor") -> Any:
+        """Visit a pattern term."""
+        pass
+    def visit_composite_term(self, term: "CompositeTermDataDescriptor") -> Any:
+        """Visit a composite term."""
+class DataDescriptor(ConfiguredBaseModel, ABC):
+    """
+    Generic class for the data descriptor classes.
+    """
+    id: str
+    """The identifier of the terms."""
+    type: str
+    """The data descriptor to which the term belongs."""
+    description: str = ""
+    """The description of the term."""
+    @abstractmethod
+    def accept(self, visitor: DataDescriptorVisitor) -> Any:
+        """
+        Accept an term visitor.
+        :param visitor: The term visitor.
+        :type visitor: DataDescriptorVisitor
+        :return: Depending on the visitor.
+        :rtype: Any
+        """
+        pass
+    @property
+    def describe(self):
+        return self.model_fields
+class DataDescriptorSubSet(DataDescriptor):
+    """
+    A sub set of the information contains in a term.
+    """
+    MANDATORY_TERM_FIELDS: ClassVar[tuple[str, str]] = ("id", "type")
+    """The set of mandatory term fields."""
+    def accept(self, visitor: DataDescriptorVisitor) -> Any:
+        return visitor.visit_sub_set_term(self)
+class PlainTermDataDescriptor(DataDescriptor):
+    """
+    A data descriptor that describes hand written terms.
+    """
+    drs_name: str
+    def accept(self, visitor: DataDescriptorVisitor) -> Any:
+        return visitor.visit_plain_term(self)
+class PatternTermDataDescriptor(DataDescriptor):
+    """
+    A data descriptor that describes terms defined by a regular expression.
+    """
+    regex: str
+    """The regular expression."""
+    def accept(self, visitor: DataDescriptorVisitor) -> Any:
+        return visitor.visit_pattern_term(self)
+class CompositeTermPart(ConfiguredBaseModel):
+    """
+    A reference to a term, part of a composite term.
+    """
+    id: str | list[str] | None = None
+    """The id of the referenced term."""
+    type: str
+    """The type of the referenced term."""
+    is_required: bool
+    """Denote if the term is optional as part of a composite term."""
+class CompositeTermDataDescriptor(DataDescriptor):
+    """
+    A data descriptor that describes terms composed of other terms.
+    """
+    separator: str
+    """The components separator character."""
+    parts: list[CompositeTermPart]
+    """The components."""
+    def accept(self, visitor: DataDescriptorVisitor) -> Any:
+        return visitor.visit_composite_term(self)

esgvoc/api/data_descriptors/data_specs_version.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""
+Model (i.e. schema/definition) of the data specifications data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class DataSpecsVersion(PlainTermDataDescriptor):
+    """
+    Data specifications version number
+    Examples: "MIPDS7-2025p10p1"
+    The data specifications describe the overall set of data specifications
+    used when writing the dataset.
+    This version number captures exactly which set of data specifications
+    are consistent (or intended to be consistent) with this dataset.
+    The DRS values can't contain '.' so we use 'p' instead.
+    To go from a DRS value back to a standard version,
+    get everything after the hyphen (everything before the hyphen is a prefix)
+    then replace "p" with ".".
+    Something like, `drs_name.split('-')[-1].replace('p', '.')`.
+    (At the moment, exactly what this means is still vague, particularly for CMIP7.
+    When it solidifies, more details and examples will be added here.)
+    """

esgvoc/api/data_descriptors/date.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class Date(PatternTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/directory_date.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Model (i.e. schema/definition) of the directory date data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class DirectoryDate(PatternTermDataDescriptor):
+    """
+    Date included as part of data paths
+    Examples: "20240513", "20230202", "20250109"
+    In practice, this acts as a version ID for the dataset.
+    For most CMIP projects, it is the only version ID.
+    For some (e.g. input4MIPs), it is another (redundant) version ID
+    on top of other versioning conventions used by the project.
+    More detail than you could ever want on why this only in the directory,
+    and not a file attribute, can be found in
+    https://github.com/WCRP-CMIP/CMIP7-CVs/issues/172.
+    """

esgvoc/api/data_descriptors/drs_specs.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Model (i.e. schema/definition) of the data reference syntax (DRS) specifications data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class DRSSpecs(PlainTermDataDescriptor):
+    """
+    Data reference syntax (DRS) specification
+    Examples: "MIP-DRS7"
+    Identifier of the the data reference syntax used to name files,
+    define directory trees, and uniquely identify datasets.
+    This data descriptor is self-referential:
+    for a given set of CVs (e.g. CMIP7 CVs),
+    it can only have a single value.
+    In practice, this term was a nice idea,
+    but the way things are architected at the moment,
+    we can't really exploit it.
+    As background, the idea was that multiple projects could use the same DRS
+    e.g. CMIP8 could use the same DRS as CMIP7 if it wanted.
+    In practice, `project_specs` is currently defined per project by esgvoc
+    so there is no way for one project to point at another project's specs
+    to specify the DRS.
+    The way of using the same DRS would be to simply copy the project specs.
+    I actually don't think this is a bad thing
+    (new projects spin up slowly so copying one file is not a big issue).
+    It just means that this label points basically nowhere,
+    there is no 'DRS registry' so people can say,
+    "I have DRS MIP-DRS7, so I go here and look up exactly what that means,
+    then off I go".
+    However, it does open up the possibility of such centralisation/re-use in future
+    so while it's a bit redundant now, having it adds only minor extra work
+    and may be useful so I guess we just go with it.
+    """

esgvoc/api/data_descriptors/experiment.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Model (i.e. schema/definition) of the experiment data descriptor
+"""
+from __future__ import annotations
+from datetime import datetime
+from typing import Union
+from pydantic import BeforeValidator, Field
+from typing_extensions import Annotated
+from esgvoc.api.data_descriptors.EMD_models.component_type import ComponentType
+from esgvoc.api.data_descriptors.activity import Activity
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+from esgvoc.api.data_descriptors.mip_era import MipEra
+# from esgvoc.api.data_descriptors.model_component import ModelComponent
+from esgvoc.api.pydantic_handler import create_union
+def ensure_iso8601_compliant_or_none(value: str | None) -> datetime | None:
+    """
+    Ensure that a value is ISO-8601 compliant or `None`
+    Parameters
+    ----------
+    value
+        Value to check
+    Returns
+    -------
+    :
+        Value, cast to `datetime.datetime` if `value is not None`
+    """
+    if value is None:
+        return None
+    res = datetime.fromisoformat(value.replace("Z", "+00:00"))
+    return res
+class ExperimentCMIP7(PlainTermDataDescriptor):
+    """
+    Identifier of the CMIP experiment to which a dataset belongs/a dataset is derived from
+    Examples: "historical", "piControl", "ssp126"
+    An 'experiment' refers to a specific, controlled simulation
+    conducted using climate models to investigate particular aspects of the Earth's climate system.
+    These experiments are designed with set parameters, such as initial conditions,
+    external forcings (like greenhouse gas  concentrations or solar radiation),
+    and duration, to explore and understand climate behavior under various conditions.
+    It is now considered essential for each :py:class:`Experiment`
+    to be associated with a single :py:class:`Activity`.
+    However, this was not followed in CMIP6,
+    which significantly complicates definition and validation
+    of the schemas for these two classes.
+    """
+    # # Can't be Activity to avoid circularity (?).
+    # # Or it can be, but you have to be very careful
+    # # (SQLModel makes this easier).
+    # activity: Activity
+    activity: str
+    """
+    Activity to which this experiment belongs
+    Could also be phrased as,
+    "activity with which this experiment is most strongly associated".
+    """
+    # Note: Allowing str or ModelComponent is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    additional_allowed_model_components: list[str] | list[ComponentType]
+    """
+    Non-compulsory model components that are allowed when running this experiment
+    """
+    branch_information: str | None
+    """
+    Information about how this experiment should branch from its parent
+    If `None`, this experiment has no parent
+    and therefore no branching information is required.
+    """
+    end_timestamp: Annotated[datetime | None, BeforeValidator(ensure_iso8601_compliant_or_none)]
+    """
+    End timestamp (ISO-8601) of the experiment
+    A value of `None` indicates that simulations may end at any time,
+    no particular value is required.
+    """
+    min_ensemble_size: int
+    """
+    Minimum number of ensemble members to run for this experiment
+    This is the minimum ensemble size requested by the definer of the experiment.
+    For other uses, other ensemble sizes may be required
+    so please double check the application your simulations
+    (as defined in e.g. the data request)
+    are intended for too before deciding on your ensemble size.
+    """
+    min_number_yrs_per_sim: float | None
+    """
+    Minimum number of years required per simulation for this experiment
+    If `None`, then there is no minimum number of years required.
+    You can submit as short a simulation as you like.
+    """
+    # Note: Allowing str or Activity is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    parent_activity: Activity | str | None
+    """
+    Activity to which this experiment's parent experiment belongs
+    If `None`, this experiment has no parent experiment.
+    """
+    # Note: Allowing str or Experiment is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    parent_experiment: Union[str, "Experiment", None]
+    """
+    This experiment's parent experiment
+    If `None`, this experiment has no parent experiment.
+    """
+    # Note: Allowing str or MipEra is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    parent_mip_era: MipEra | str | None
+    """
+    The MIP era to which this experiment's parent experiment belongs
+    If `None`, this experiment has no parent experiment.
+    """
+    # Note: Allowing str or ModelComponent is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    required_model_components: list[ComponentType | str]
+    """
+    Model components required to run this experiment
+    """
+    start_timestamp: Annotated[datetime | None, BeforeValidator(ensure_iso8601_compliant_or_none)]
+    """
+    Start timestamp (ISO-8601) of the experiment
+    A value of `None` indicates that simulations may start at any time,
+    no particular value is required.
+    """
+    tier: int | None
+    """
+    Priority tier for this experiment
+    1 is highest priority.
+    If `None`, no priority is specified for this experiment.
+    """
+class ExperimentLegacy(PlainTermDataDescriptor):
+    """
+    An 'experiment' refers to a specific, controlled simulation conducted using climate models to \
+    investigate particular aspects of the Earth's climate system. These experiments are designed \
+    with set parameters, such as initial conditions, external forcings (like greenhouse gas \
+    concentrations or solar radiation), and duration, to explore and understand climate behavior \
+    under various scenarios and conditions.
+    """
+    # Required fields
+    experiment_id: str  # Discriminator - distinguishes Legacy from CMIP7
+    activity_id: list[str]
+    experiment: str
+    tier: int | None
+    # Optional fields
+    sub_experiment_id: list[str] | None = None
+    start_year: str | int | None = None
+    end_year: str | int | None = None
+    min_number_yrs_per_sim: int | None = None
+    parent_activity_id: list[str] | None = None
+    parent_experiment_id: list[str] | None = None
+    required_model_components: list[ComponentType | str] | None = None
+    additional_allowed_model_components: list[ComponentType | str] = Field(default_factory=list)
+class ExperimentBase(PlainTermDataDescriptor):
+    """
+    Base experiment model for Universe data.
+    This loose model accepts experiment data that doesn't fully conform to either
+    ExperimentLegacy or ExperimentCMIP7. Used as fallback for incomplete experiments.
+    Only contains fields common to both Legacy and CMIP7 models.
+    """
+    tier: int | None = None
+    min_number_yrs_per_sim: float | int | None = None
+    required_model_components: list[ComponentType | str] | None = None
+    additional_allowed_model_components: list[ComponentType | str] = Field(default_factory=list)
+# Priority: Try strict models first (Legacy, CMIP7), then fall back to Base
+Experiment = create_union(ExperimentLegacy, ExperimentCMIP7, ExperimentBase)

esgvoc/api/data_descriptors/forcing_index.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Model (i.e. schema/definition) of the forcing index data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class ForcingIndex(PatternTermDataDescriptor):
+    """
+    Label that identifies the forcing variant used to produce a dataset
+    Examples: "f1", "f2", "f23"
+    This label can be used, for example, to distinguish between two historical simulations,
+    one forced with the recommended forcing data sets
+    and another forced by a different dataset,
+    which might yield information about how forcing uncertainty affects the simulation.
+    The value has no intrinsic meaning within the CVs.
+    However, in other external sources (to be confirmed which)
+    the meaning of this forcing label for a given simulation can be looked up.
+    """