PyPI - esgvoc - Versions diffs - 2.0.2__py3-none-any.whl - Mend

esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

esgvoc/__init__.py +3 -0
esgvoc/api/__init__.py +91 -0
esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
esgvoc/api/data_descriptors/__init__.py +159 -0
esgvoc/api/data_descriptors/activity.py +72 -0
esgvoc/api/data_descriptors/archive.py +5 -0
esgvoc/api/data_descriptors/area_label.py +30 -0
esgvoc/api/data_descriptors/branded_suffix.py +30 -0
esgvoc/api/data_descriptors/branded_variable.py +21 -0
esgvoc/api/data_descriptors/citation_url.py +5 -0
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +28 -0
esgvoc/api/data_descriptors/creation_date.py +18 -0
esgvoc/api/data_descriptors/data_descriptor.py +127 -0
esgvoc/api/data_descriptors/data_specs_version.py +25 -0
esgvoc/api/data_descriptors/date.py +5 -0
esgvoc/api/data_descriptors/directory_date.py +22 -0
esgvoc/api/data_descriptors/drs_specs.py +38 -0
esgvoc/api/data_descriptors/experiment.py +215 -0
esgvoc/api/data_descriptors/forcing_index.py +21 -0
esgvoc/api/data_descriptors/frequency.py +48 -0
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid.py +43 -0
esgvoc/api/data_descriptors/horizontal_label.py +20 -0
esgvoc/api/data_descriptors/initialization_index.py +27 -0
esgvoc/api/data_descriptors/institution.py +80 -0
esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
esgvoc/api/data_descriptors/license.py +31 -0
esgvoc/api/data_descriptors/member_id.py +9 -0
esgvoc/api/data_descriptors/mip_era.py +26 -0
esgvoc/api/data_descriptors/model_component.py +32 -0
esgvoc/api/data_descriptors/models_test/models.py +17 -0
esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +22 -0
esgvoc/api/data_descriptors/physics_index.py +21 -0
esgvoc/api/data_descriptors/product.py +16 -0
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realization_index.py +24 -0
esgvoc/api/data_descriptors/realm.py +16 -0
esgvoc/api/data_descriptors/regex.py +5 -0
esgvoc/api/data_descriptors/region.py +35 -0
esgvoc/api/data_descriptors/resolution.py +7 -0
esgvoc/api/data_descriptors/source.py +120 -0
esgvoc/api/data_descriptors/source_type.py +5 -0
esgvoc/api/data_descriptors/sub_experiment.py +5 -0
esgvoc/api/data_descriptors/table.py +28 -0
esgvoc/api/data_descriptors/temporal_label.py +20 -0
esgvoc/api/data_descriptors/time_range.py +17 -0
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +67 -0
esgvoc/api/data_descriptors/variable.py +56 -0
esgvoc/api/data_descriptors/variant_label.py +25 -0
esgvoc/api/data_descriptors/vertical_label.py +20 -0
esgvoc/api/project_specs.py +143 -0
esgvoc/api/projects.py +1253 -0
esgvoc/api/py.typed +0 -0
esgvoc/api/pydantic_handler.py +146 -0
esgvoc/api/report.py +127 -0
esgvoc/api/search.py +171 -0
esgvoc/api/universe.py +434 -0
esgvoc/apps/__init__.py +6 -0
esgvoc/apps/cmor_tables/__init__.py +7 -0
esgvoc/apps/cmor_tables/cvs_table.py +948 -0
esgvoc/apps/drs/__init__.py +0 -0
esgvoc/apps/drs/constants.py +2 -0
esgvoc/apps/drs/generator.py +429 -0
esgvoc/apps/drs/report.py +540 -0
esgvoc/apps/drs/validator.py +312 -0
esgvoc/apps/ga/__init__.py +104 -0
esgvoc/apps/ga/example_usage.py +315 -0
esgvoc/apps/ga/models/__init__.py +47 -0
esgvoc/apps/ga/models/netcdf_header.py +306 -0
esgvoc/apps/ga/models/validator.py +491 -0
esgvoc/apps/ga/test_ga.py +161 -0
esgvoc/apps/ga/validator.py +277 -0
esgvoc/apps/jsg/json_schema_generator.py +341 -0
esgvoc/apps/jsg/templates/template.jinja +241 -0
esgvoc/apps/test_cv/README.md +214 -0
esgvoc/apps/test_cv/__init__.py +0 -0
esgvoc/apps/test_cv/cv_tester.py +1611 -0
esgvoc/apps/test_cv/example_usage.py +216 -0
esgvoc/apps/vr/__init__.py +12 -0
esgvoc/apps/vr/build_variable_registry.py +71 -0
esgvoc/apps/vr/example_usage.py +60 -0
esgvoc/apps/vr/vr_app.py +333 -0
esgvoc/cli/clean.py +304 -0
esgvoc/cli/cmor.py +46 -0
esgvoc/cli/config.py +1300 -0
esgvoc/cli/drs.py +267 -0
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +155 -0
esgvoc/cli/install.py +41 -0
esgvoc/cli/main.py +60 -0
esgvoc/cli/offline.py +269 -0
esgvoc/cli/status.py +79 -0
esgvoc/cli/test_cv.py +258 -0
esgvoc/cli/valid.py +147 -0
esgvoc/core/constants.py +17 -0
esgvoc/core/convert.py +0 -0
esgvoc/core/data_handler.py +206 -0
esgvoc/core/db/__init__.py +3 -0
esgvoc/core/db/connection.py +40 -0
esgvoc/core/db/models/mixins.py +25 -0
esgvoc/core/db/models/project.py +102 -0
esgvoc/core/db/models/universe.py +98 -0
esgvoc/core/db/project_ingestion.py +231 -0
esgvoc/core/db/universe_ingestion.py +172 -0
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +26 -0
esgvoc/core/repo_fetcher.py +345 -0
esgvoc/core/service/__init__.py +41 -0
esgvoc/core/service/configuration/config_manager.py +196 -0
esgvoc/core/service/configuration/setting.py +363 -0
esgvoc/core/service/data_merger.py +634 -0
esgvoc/core/service/esg_voc.py +77 -0
esgvoc/core/service/resolver_config.py +56 -0
esgvoc/core/service/state.py +324 -0
esgvoc/core/service/string_heuristics.py +98 -0
esgvoc/core/service/term_cache.py +108 -0
esgvoc/core/service/uri_resolver.py +133 -0
esgvoc-2.0.2.dist-info/METADATA +82 -0
esgvoc-2.0.2.dist-info/RECORD +147 -0
esgvoc-2.0.2.dist-info/WHEEL +4 -0
esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0

esgvoc/api/data_descriptors/frequency.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""
+Model (i.e. schema/definition) of the frequency data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Frequency(PlainTermDataDescriptor):
+    """
+    Reporting/temporal sampling interval used when creating the dataset
+    Examples: "mon", "day", "3hr", "monC"
+    This is a bit of a trickier concept than it first appears.
+    For time average data, it is effectively the size of each time cell
+    (e.g. if each time point is the average of a month's worth of data,
+    then the data is assigned the term "mon").
+    For time point data, it is the time interval between each reported point
+    (e.g. if the data is reported at midday each day,
+    then the data is assigned the term "day",
+    although in practice the size of each time cell works in this case too).
+    This can usually be validated against the actual data in the file,
+    but it can be complicated with some calendars
+    (e.g. the Julian-Gregorian calendar which has 15 missing days in 1582),
+    reporting intervals (e.g. "mon", which changes length at each interval)
+    and when climatologies are involved
+    (as identifying these follows special rules covered by the CF conventions).
+    """
+    interval: float | None
+    """
+    Size of the interval
+    See `self.units` for units.
+    If `None`, then the interval for this frequency label is undefined,
+    either because it does not exist (e.g. the label for data that does not have a time dimension)
+    or because the label does not uniquely define the interval (e.g. sub-hour labels).
+    """
+    units: str | None
+    """
+    Units of the interval
+    If `None`, then the units for this frequency are not defined
+    because it does not exist (e.g. the label for data that does not have a time dimension).
+    """

esgvoc/api/data_descriptors/further_info_url.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class FurtherInfoUrl(PatternTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/grid.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+Model (i.e. schema/definition) of the grid data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+from esgvoc.api.data_descriptors.region import Region
+class Grid(PlainTermDataDescriptor):
+    """
+    Grid (horizontal) on which the data is reported
+    Examples: "g1", "g2", "g33"
+    The value has no intrinsic meaning within the CVs.
+    However, the other attributes of this model
+    provide information about the grid
+    and in other external sources (to be confirmed which)
+    further resources can be found e.g. cell areas.
+    Grids with the same id (also referred to as 'grid label')
+    are identical (details on how we check identical are to come, for discussion,
+    see https://github.com/WCRP-CMIP/CMIP7-CVs/issues/202)
+    and can be used by more than one model
+    (also referred to as 'source' in CMIP language).
+    Grids with different labels are different.
+    """
+    # Note: Allowing str is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    region: Region | str
+    """
+    Region represented by this grid
+    """
+    # Developer note:
+    # There is a tight coupling to region
+    # (see https://github.com/WCRP-CMIP/CMIP7-CVs/issues/202#issue-3084934841).
+    # However, this region can't be the same as the regions used by EMD,
+    # as EMD has the 'limited_area' region, but that's not something
+    # which makes sense in the CMIP context (it's too vague).
+    # As a result, we need to have both Grid (CMIP) and HorizontalGrid (EMD)
+    # and both Region (CMIP) and HorizontalGridRegion (EMD).

esgvoc/api/data_descriptors/horizontal_label.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""
+Model (i.e. schema/definition) of the horizontal label data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class HorizontalLabel(PlainTermDataDescriptor):
+    """
+    Label that describes a specific horizontal sampling approach
+    Examples: "hxy", "hs", "hm"
+    This is set to "hm" ("horizontal mean") when no other horizontal labels apply.
+    For underlying details and logic, please see
+    [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+    This label is used as the area component of a branded variable's suffix
+    (see :py:class:`BrandedSuffix`).
+    """  # noqa: E501

esgvoc/api/data_descriptors/initialization_index.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Model (i.e. schema/definition) of the initialization index data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class InitializationIndex(PatternTermDataDescriptor):
+    r"""
+    Label that identifies the initialization variant used to produce a dataset
+    Examples: "i1", "i2", "i196001", "i201001", "i201001a", "i201001b"
+    This label can be used, for example, to distinguish between two simulations
+    that were initialised in different ways or on different dates
+    (this is most commonly used for decadal prediction simulations).
+    When this is of the form `i\d*`, the value has no intrinsic meaning within the CVs.
+    However, in other external sources (to be confirmed which)
+    the meaning of this initialization label for a given simulation can be looked up.
+    When this is of the form `i\d{6}[abcde]?`,
+    the digits indicate the year and month used for initialising the simulation,
+    with any suffix letter used to distinguish
+    between simulations that differ in their initialization
+    but nonetheless use the same year and month.
+    """

esgvoc/api/data_descriptors/institution.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""
+Model (i.e. schema/definition) of the institution data descriptor
+"""
+from pydantic import BaseModel, Field, HttpUrl
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Location(BaseModel):
+    """Location"""
+    city: str
+    """
+    City
+    """
+    country: str
+    """
+    Country
+    """
+    lat: float = Field(ge=-90.0, le=90.0)
+    """
+    Latitude in degrees north
+    """
+    lon: float = Field(ge=-180.0, le=180.0)
+    """
+    Longitude in degrees east (range: -180 to 180)
+    """
+class Institution(PlainTermDataDescriptor):
+    """
+    A registered institution
+    Examples: "IPSL", "CR", "NCAR", "CNRM"
+    Unlike :py:class:`Organisation`, this can only refer to a single entity
+    (institute, group, person).
+    """
+    acronyms: list[str]
+    """
+    Known acronyms for this member/entity apart from the registered one
+    The registered/official acronym is given in `self.drs_name`.
+    """
+    labels: list[str]
+    """
+    Labels that can be used for this institute
+    These are free-text and can be used when the member/entity needs to be referred to in full,
+    rather than by its acronym.
+    This can also be thought of as 'long names'.
+    """
+    # TODO: discuss whether there is any meaning to the order of these
+    # and what it means to have more than one label.
+    # TODO: discuss whether we should just call this long_names
+    # for consistency with other conventions.
+    location: list[Location]
+    """
+    Location(s) of the institute
+    """
+    ror: str | None
+    """
+    Research organisation registry (https://ror.org/) ID
+    If `None`, this organisation is not registered with ROR
+    or the ROR was not supplied at the time of registration.
+    """
+    urls: list[HttpUrl]
+    """
+    URL(s) relevant for finding out more information about this member/entity
+    """

esgvoc/api/data_descriptors/known_branded_variable.py ADDED Viewed

@@ -0,0 +1,75 @@
+from typing import Any, Dict, List, Optional
+from pydantic import Field
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+#
+# class KnownBrandedVariable(PlainTermDataDescriptor):
+#     """
+#     A climate-related quantity or measurement, including information about sampling.
+#
+#     The concept of a branded variable was introduced in CMIP7.
+#     A branded variable is composed of two parts.
+#     The first part is the root variable (see :py:class:`Variable`).
+#     The second is the suffix (see :py:class:`BrandedSuffix`).
+#
+#     For further details on the development of branded variables,
+#     see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+#     """
+#
+#     description: str
+#     dimensions: list[str] = Field(default_factory=list)
+#     cell_methods: str
+#     variable: str
+#     label: str
+#
+class KnownBrandedVariable(PlainTermDataDescriptor):
+    """
+    A climate-related quantity or measurement, including information about sampling.
+    The concept of a branded variable was introduced in CMIP7.
+    A branded variable is composed of two parts.
+    The first part is the root variable (see :py:class:`Variable`).
+    The second is the suffix (see :py:class:`BrandedSuffix`).
+    For further details on the development of branded variables,
+    see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
+    """
+    # # ESGVoc required fields
+    # id: str = Field(description="Unique identifier, e.g., 'ta_tavg-p19-hxy-air'")
+    # type: str = Field(default="branded_variable", description="ESGVoc type identifier")
+    # drs_name: str = Field(description="DRS name, same as id")
+    # => already in PlainTermDataDescriptor
+    # CF Standard Name context (flattened from hierarchy)
+    cf_standard_name: str = Field(description="CF standard name, e.g., 'air_temperature'")
+    cf_units: str = Field(description="CF standard units, e.g., 'K'")
+    cf_sn_status: str = Field(description="CF standard name status, e.g., 'approved'")
+    # Variable Root context (flattened from hierarchy)
+    variable_root_name: str = Field(description="Variable root name, e.g., 'ta'")
+    var_def_qualifier: str = Field(default="", description="Variable definition qualifier")
+    branding_suffix_name: str = Field(description="Branding suffix, e.g., 'tavg-p19-hxy-air'")
+    # Variable metadata
+    dimensions: List[str] = Field(description="NetCDF dimensions")
+    cell_methods: str = Field(default="", description="CF cell_methods attribute")
+    cell_measures: str = Field(default="", description="CF cell_measures attribute")
+    history: str = Field(default="", description="Processing history")
+    realm: str = Field(description="Earth system realm, e.g., 'atmos'")
+    # Label components (embedded, not references)
+    temporal_label: str = Field(description="Temporal label, e.g., 'tavg'")
+    vertical_label: str = Field(description="Vertical label, e.g., 'p19'")
+    horizontal_label: str = Field(description="Horizontal label, e.g., 'hxy'")
+    area_label: str = Field(description="Area label, e.g., 'air'")
+    # Status
+    bn_status: str = Field(description="Branded variable status, e.g., 'accepted'")
+    # Additional required fields from specifications
+    positive_direction: str = Field(default="", description="Positive direction for the variable")

esgvoc/api/data_descriptors/license.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+Model (i.e. schema/definition) of the license data descriptor
+"""
+from pydantic import HttpUrl
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class License(PlainTermDataDescriptor):
+    """
+    License that applies to the dataset
+    Examples: "CC-BY-4.0", "CC0-1.0"
+    Licenses must be approved by the WIP & CMIP panel
+    before they can be used in CMIP exercises.
+    """
+    spdx_id: str
+    """
+    SPDX license identifier (https://spdx.org/licenses/)
+    """
+    # Developer note: `id` will not match `spdx_id`
+    # exactly because SPDX IDs are not all lowercase,
+    # hence we need this extra attribute.
+    url: HttpUrl
+    """
+    URL with details of the full license and other information
+    """

esgvoc/api/data_descriptors/member_id.py ADDED Viewed

@@ -0,0 +1,9 @@
+from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
+class MemberId(CompositeTermDataDescriptor):
+    """
+    The member_id uniquely identifies a specific model simulation within an experiment. It is created by combining the sub_experiment, which describes the setup or timing of the simulation (like a specific start year), and the variant_label, which details the configuration of the model (including initial conditions, physics, and forcings). Together, they form a code like s1960-r1i1p1f1. This allows users to distinguish between different ensemble members and understand how each run differs from others within the same experiment.
+    """
+    pass

esgvoc/api/data_descriptors/mip_era.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""
+Model (i.e. schema/definition) of the MIP era data descriptor
+"""
+from pydantic import HttpUrl
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class MipEra(PlainTermDataDescriptor):
+    """
+    Label that identifies the MIP era to which a dataset belongs
+    Examples: "CMIP6", "CMIP7"
+    The MIP era is useful to distinguish among experiments performed during different CMIP phases
+    but with differences in experimental protocol in each phase.
+    For example, the "historical" experiments appear in multiple phases of CMIP
+    but have different input forcings in each.
+    This difference can be identified using the MIP era data descriptor.
+    """
+    url: HttpUrl
+    """
+    URL that links to further information about the MIP era
+    """

esgvoc/api/data_descriptors/model_component.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""
+Model (i.e. schema/definition) of the model component data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class ModelComponent(PlainTermDataDescriptor):
+    """
+    Model component
+    Examples: "AOGCM", "AER", "BGC"
+    These terms are intended to help with identifying required components for experiments
+    or filtering models based on having common components.
+    For example, an aerosol scheme or a circulation model or a biogeochemistry component.
+    However, model component is only an approximate term, there is no precise definition
+    of whether any given model has or does not have a given component.
+    """
+    # These should probably come back in.
+    # However, this level of detail is only relevant for EMD.
+    # For CMOR tables, the convention seems to just be to use the drs_name,
+    # which is awkward because there is more than one possible AOGCM (for example).
+    # Hence, I think we actually need two classes.
+    # `ModelComponent` and `EMDModelcomponent`
+    # (or we just get rid of this model component idea completely/
+    # leve it entirely up to EMD)
+    # name: str
+    # realm: dict
+    # nominal_resolution: dict
+    # version: int

esgvoc/api/data_descriptors/models_test/models.py ADDED Viewed

@@ -0,0 +1,17 @@
+from esgvoc.api.data_descriptors.data_descriptor import (
+    CompositeTermDataDescriptor,
+    PatternTermDataDescriptor,
+    PlainTermDataDescriptor,
+)
+class PlainTermDDex(PlainTermDataDescriptor):
+    pass
+class PatternTermDDex(PatternTermDataDescriptor):
+    pass
+class CompositeTermDDex(CompositeTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/nominal_resolution.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""
+Model (i.e. schema/definition) of the nominal resolution data descriptor
+"""
+from pydantic import field_validator
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class NominalResolution(PlainTermDataDescriptor):
+    """
+    Approximate horizontal resolution of a dataset
+    Examples: "1 km", "250 km", "500 km"
+    This should be calculated following the algorithm implemented by
+    [https://github.com/PCMDI/nominal_resolution/blob/master/lib/api.py]()
+    (although, of course, other implementations of the same algorithm could be used).
+    """
+    # Developer note: given this isn't a pattern term data descriptor,
+    # these are split so people don't have to parse the drs_name themselves.
+    magnitude: float
+    """
+    Magnitude of the nominal resolution
+    """
+    range: tuple[float, float]
+    """
+    Range of mean resolutions to which this nominal resolution applies
+    """
+    units: str
+    """
+    Units of the nominal resolution and range
+    """
+    @field_validator("range")
+    @classmethod
+    def validate_range(cls, v):
+        """Validate that range has exactly 2 values and min <= max."""
+        if len(v) != 2:
+            msg = f"range must contain exactly 2 values [min, max]. Received: {v}"
+            raise ValueError(msg)
+        if v[0] > v[1]:
+            msg = f"range[0] must be <= range[1]. Received: {v}"
+            raise ValueError(msg)
+        return v

esgvoc/api/data_descriptors/obs_type.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class ObsType(PlainTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/organisation.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Model (i.e. schema/definition) of the organisation data descriptor
+"""
+from esgvoc.api.data_descriptors.institution import Institution
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Organisation(PlainTermDataDescriptor):
+    """
+    A registered organisation
+    Examples: "IPSL", "NCAR", "CNRM-CERFACS", "SOLARIS-HEPPA"
+    """
+    # Note: Allowing str is under discussion.
+    # Using this to get things working.
+    # Long-term, we might do something different.
+    members: list[Institution | str]
+    """
+    Members associated with this organisation
+    """

esgvoc/api/data_descriptors/physics_index.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Model (i.e. schema/definition) of the physics index data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class PhysicsIndex(PatternTermDataDescriptor):
+    """
+    Label that identifies the physics variant used to produce a dataset
+    Examples: "p1", "p2", "p20"
+    This label can be used, for example, to distinguish between two simulations,
+    one using a model's 'default physics'
+    and another using a model's 'other physics scheme',
+    which might yield information about how differences in physics within the model affects the simulation.
+    The value has no intrinsic meaning within the CVs.
+    However, in other external sources (to be confirmed which)
+    the meaning of this physics label for a given simulation can be looked up.
+    """

esgvoc/api/data_descriptors/product.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Model (i.e. schema/definition) of the product data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Product(PlainTermDataDescriptor):
+    """
+    Identifier of the data category
+    Examples: "model-output", "observations", derived"
+    This is not a precisely defined data descriptor,
+    rather an approximate labelling.
+    """

esgvoc/api/data_descriptors/publication_status.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class PublicationStatus(PlainTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/realization_index.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""
+Model (i.e. schema/definition) of the realisation index data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class RealizationIndex(PatternTermDataDescriptor):
+    """
+    Label that identifies the realisation variant used to produce a dataset
+    Examples: "r1", "r2", "r23"
+    This label can be used to distinguish between two simulations
+    that are equally likely but differ only due to stochastic variations.
+    These differences can be purely stochastic
+    (i.e. arising simply from stochastic variations when re-running the same simulation,
+    even keeping all other conditions the same)
+    or arise from differences in initial conditions
+    e.g. starting/branching from different points in a control run/parent experiment.
+    The value has no intrinsic meaning within the CVs.
+    However, in other external sources (to be confirmed which)
+    the meaning of this realisation label for a given simulation can be looked up.
+    """

esgvoc/api/data_descriptors/realm.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Model (i.e. schema/definition) of the realm data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Realm(PlainTermDataDescriptor):
+    """
+    Realm associated with the dataset
+    Examples: "atmos", "land", "ocean", "atmosChem"
+    This is intended as a rough categorisation only
+    and is not precisely defined.
+    """

esgvoc/api/data_descriptors/regex.py ADDED Viewed

@@ -0,0 +1,5 @@
+from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
+class Regex(PatternTermDataDescriptor):
+    pass

esgvoc/api/data_descriptors/region.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Model (i.e. schema/definition) of the region data descriptor
+"""
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Region(PlainTermDataDescriptor):
+    """
+    Region associated with the dataset
+    Examples: "glb", "30s-90s", "grl"
+    In other words, the domain over which the dataset is provided.
+    This is intended as a rough categorisation only
+    and is not precisely defined.
+    """
+    cf_standard_region: str | None
+    """
+    CF standard region
+    See https://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html
+    If `None`, there is no CF standard region for this region
+    """
+    iso_region: str | None
+    """
+    ISO 3166-1 alpha-3 region code
+    See https://www.iso.org/iso-3166-country-codes.html
+    If `None`, there is no ISO region code for this region
+    """

esgvoc/api/data_descriptors/resolution.py ADDED Viewed

@@ -0,0 +1,7 @@
+from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
+class Resolution(PlainTermDataDescriptor):
+    value: str
+    name: str
+    unit: str