esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the source descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from esgvoc.api.data_descriptors.organisation import Organisation
|
|
8
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
9
|
+
from esgvoc.api.data_descriptors.EMD_models.model_component import EMDModelComponent
|
|
10
|
+
from esgvoc.api.pydantic_handler import create_union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SourceCMIP7(PlainTermDataDescriptor):
|
|
14
|
+
"""
|
|
15
|
+
Source of the dataset (CMIP7 format with contributors and model_components)
|
|
16
|
+
|
|
17
|
+
Examples: "CanESM6-MR", "CR-CMIP-1-0-0"
|
|
18
|
+
|
|
19
|
+
The more precise meaning of source depends on the kind of dataset this is.
|
|
20
|
+
For model output, 'source' refers to a numerical representations of the Earth's climate system.
|
|
21
|
+
This source is the model which was used to generate the dataset.
|
|
22
|
+
Such models simulate the interactions between the atmosphere, oceans, land surface, and ice.
|
|
23
|
+
They are based on fundamental physical, chemical, and biological processes
|
|
24
|
+
and are used to understand past, present, and future climate conditions.
|
|
25
|
+
Each source or model is typically associated with a specific research institution, center, or group.
|
|
26
|
+
For instance, models like 'EC-Earth' are developed by a consortium of European institutes,
|
|
27
|
+
while 'GFDL-CM4' is developed by the Geophysical Fluid Dynamics Laboratory (GFDL) in the United States.
|
|
28
|
+
|
|
29
|
+
For model inputs i.e. forcings, the 'source' is a unique identifier
|
|
30
|
+
for the group that produced the data and its version.
|
|
31
|
+
This is a different convention from almost all other cases
|
|
32
|
+
(which really muddies the meaning of the term).
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
label: str
|
|
36
|
+
"""
|
|
37
|
+
Label to use for this source
|
|
38
|
+
|
|
39
|
+
Unlike the `drs_name`, this can contain any characters
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
label_extended: str
|
|
43
|
+
"""
|
|
44
|
+
Extended label to use for this source
|
|
45
|
+
|
|
46
|
+
Unlike the `drs_name`, this can contain any characters.
|
|
47
|
+
If desired, it can include lots of verbose information
|
|
48
|
+
(unlike `label`, which should be more terse).
|
|
49
|
+
It can also just be the same as `label`
|
|
50
|
+
if the person registering the source wishes.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
# Note: Allowing str is under discussion.
|
|
54
|
+
# Using this to get things working.
|
|
55
|
+
# Long-term, we might do something different.
|
|
56
|
+
contributors: list[Organisation | str]
|
|
57
|
+
"""
|
|
58
|
+
Organisation(s) using this source
|
|
59
|
+
|
|
60
|
+
Using is a bit vaguely defined, but in practice it is the organisation(s)
|
|
61
|
+
that submit data using this source.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Note: Allowing str is under discussion.
|
|
65
|
+
# Using this to get things working.
|
|
66
|
+
# Long-term, we might do something different.
|
|
67
|
+
model_components: list[EMDModelComponent | str]
|
|
68
|
+
"""
|
|
69
|
+
Model components
|
|
70
|
+
|
|
71
|
+
If this source is not a model, this can/will just be an empty list.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def source(self) -> str:
|
|
76
|
+
"""
|
|
77
|
+
Source label as used by CMOR
|
|
78
|
+
"""
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
# Something like:
|
|
81
|
+
# label (release year from EMD if known):
|
|
82
|
+
# (for each model component)\n component: component name (description)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SourceLegacy(PlainTermDataDescriptor):
|
|
86
|
+
"""
|
|
87
|
+
Legacy source model for CMIP6 and earlier versions.
|
|
88
|
+
|
|
89
|
+
This version uses different field names and structure compared to the CMIP7 format.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
activity_participation: list[str] | None = None
|
|
93
|
+
"""Activities this source participates in."""
|
|
94
|
+
|
|
95
|
+
cohort: list[str] = Field(default_factory=list)
|
|
96
|
+
"""Cohort grouping for this source."""
|
|
97
|
+
|
|
98
|
+
organisation_id: list[str] = Field(default_factory=list)
|
|
99
|
+
"""Organisation IDs associated with this source."""
|
|
100
|
+
|
|
101
|
+
label: str
|
|
102
|
+
"""Label to use for this source."""
|
|
103
|
+
|
|
104
|
+
label_extended: str | None = None
|
|
105
|
+
"""Extended label to use for this source."""
|
|
106
|
+
|
|
107
|
+
license: dict = Field(default_factory=dict)
|
|
108
|
+
"""License information for this source."""
|
|
109
|
+
|
|
110
|
+
model_component: dict | None = Field(
|
|
111
|
+
default=None,
|
|
112
|
+
description="Dictionary containing the model components that make up this climate source"
|
|
113
|
+
)
|
|
114
|
+
"""Model component information (legacy format)."""
|
|
115
|
+
|
|
116
|
+
release_year: int | None = None
|
|
117
|
+
"""Year this source was released."""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
Source = create_union(SourceCMIP7, SourceLegacy)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pydantic import Field, field_validator
|
|
2
|
+
|
|
3
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Table(PlainTermDataDescriptor):
|
|
7
|
+
product: str | None
|
|
8
|
+
table_date: str | None
|
|
9
|
+
variable_entry: list[str] = Field(default_factory=list)
|
|
10
|
+
|
|
11
|
+
@field_validator("variable_entry", mode="before")
|
|
12
|
+
@classmethod
|
|
13
|
+
def normalize_variable_entry(cls, v):
|
|
14
|
+
"""
|
|
15
|
+
Normalize variable_entry to ensure all items are strings.
|
|
16
|
+
If items are dicts (resolved references), extract the 'id' field.
|
|
17
|
+
"""
|
|
18
|
+
if not isinstance(v, list):
|
|
19
|
+
return v
|
|
20
|
+
|
|
21
|
+
result = []
|
|
22
|
+
for item in v:
|
|
23
|
+
if isinstance(item, dict):
|
|
24
|
+
# Extract the id from the resolved object
|
|
25
|
+
result.append(item.get("id", str(item)))
|
|
26
|
+
else:
|
|
27
|
+
result.append(item)
|
|
28
|
+
return result
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the temporal label data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TemporalLabel(PlainTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
Label that describes a specific temporal sampling approach
|
|
11
|
+
|
|
12
|
+
Examples: "tavg", "tpt", "tclm"
|
|
13
|
+
|
|
14
|
+
This is set to "ti" ("time-independent") when the data has no time axis.
|
|
15
|
+
For underlying details and logic, please see
|
|
16
|
+
[Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
|
|
17
|
+
|
|
18
|
+
This label is used as the area component of a branded variable's suffix
|
|
19
|
+
(see :py:class:`BrandedSuffix`).
|
|
20
|
+
""" # noqa: E501
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the time range data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TimeRange(PatternTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
Time range spanned by the data
|
|
11
|
+
|
|
12
|
+
Examples: "185001-202112", "18500101-20211231", "203101010130-203112312230", "185001-186412-clim"
|
|
13
|
+
|
|
14
|
+
The right choice of time range is tightly coupled to the frequency of the data.
|
|
15
|
+
This coupling is not captured within the CVs.
|
|
16
|
+
(It is hopefully enforced elsewhere e.g. in QAQC workflows.)
|
|
17
|
+
"""
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the tracking ID data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TrackingId(PatternTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
Tracking ID, i.e. unique ID, of a file
|
|
11
|
+
|
|
12
|
+
Examples: "hdl:21.14107/f6635404-8a1a-4aa9-918d-3792e8321f04",
|
|
13
|
+
"hdl:21.14100/718ee427-4efb-46a8-9f89-8192593b15fe"
|
|
14
|
+
|
|
15
|
+
This data descriptor applies only at the file level,
|
|
16
|
+
not to datasets as a whole (each file in a dataset gets a unique ID).
|
|
17
|
+
|
|
18
|
+
The regex is of the form `hdl:<prefix>/<uuid>`,
|
|
19
|
+
where `<prefix>` is a prefix which is the same for all files in the same 'group'
|
|
20
|
+
and `<uuid>` is a universally unique ID (UUID).
|
|
21
|
+
|
|
22
|
+
The 'group' is a bit loosely defined and can be defined in different ways
|
|
23
|
+
for different purposes.
|
|
24
|
+
For CMIP phases, the 'group' is usually the CMIP phase
|
|
25
|
+
i.e. all files that are part of the same CMIP phase use the same tracking ID prefix
|
|
26
|
+
(e.g. all CMIP6 files have the same prefix, all CMIP7 files have the same prefix).
|
|
27
|
+
(Also note that some projects haven't really got this right,
|
|
28
|
+
e.g. the input4MIPs project has re-used the CMIP6 prefix, rather than using its own.)
|
|
29
|
+
|
|
30
|
+
The prefixes come from [handle.net](https://www.handle.net/index.html).
|
|
31
|
+
[handle.net](https://www.handle.net/index.html) prefixes
|
|
32
|
+
are allotted to different CMIP (or other project) exercises.
|
|
33
|
+
These prefixes are used by the
|
|
34
|
+
[handle.net](https://www.handle.net/index.html)
|
|
35
|
+
service to group all the entries for the given exercise together.
|
|
36
|
+
These prefixes allow a) each file to have a unique ID
|
|
37
|
+
and b) users to look up entries for all files using their unique ID
|
|
38
|
+
via the [handle.net](https://www.handle.net/index.html) service.
|
|
39
|
+
The prefixes are currently managed and registered by DKRZ
|
|
40
|
+
on behalf of the ESGF team (we think, it's not 100% clear).
|
|
41
|
+
|
|
42
|
+
The last part of the tracking ID is a UUID.
|
|
43
|
+
The specification of a UUID is defined elsewhere
|
|
44
|
+
(apparently in ISO/IEC 9834-8:2014).
|
|
45
|
+
A new UUID must be generated for every single file
|
|
46
|
+
such that every file has a unique tracking id
|
|
47
|
+
(this uniqueness is both within a project thanks to the differing UUIDs
|
|
48
|
+
and across projects thanks to the differing prefixes).
|
|
49
|
+
Most programming languages have native support for UUID generation
|
|
50
|
+
(e.g. the `uuid` library is part of Python's standard library).
|
|
51
|
+
For a standalone solution, the OSSP utility is available.
|
|
52
|
+
It can be accessed from http://www.ossp.org/pkg/lib/uuid/.
|
|
53
|
+
Since CMIP6, version 4 UUIDs (random number based) have been required.
|
|
54
|
+
|
|
55
|
+
The tracking IDs are
|
|
56
|
+
used by a PID service
|
|
57
|
+
so that users can find further information about the file
|
|
58
|
+
by going to `hdl.handle.net/<tracking_id_after_the_hdl_colon_prefix_is_removed>`
|
|
59
|
+
e.g. `hdl.handle.net/21.14107/f6635404-8a1a-4aa9-918d-3792e8321f04`
|
|
60
|
+
(a working link from CMIP6 for those who would like to see a live demonstration is
|
|
61
|
+
[hdl.handle.net/21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b](),
|
|
62
|
+
which was inferred from a file in which the tracking ID is
|
|
63
|
+
`hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b`).
|
|
64
|
+
(Or at least this link with handle.net is the intention.
|
|
65
|
+
It hasn't always happened
|
|
66
|
+
e.g this is not the case for all CMIP7 input4MIPs files.)
|
|
67
|
+
"""
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the variable data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Variable(PlainTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
A climate-related quantity or measurement.
|
|
11
|
+
|
|
12
|
+
Examples: "tas", "pr", "psl", "rlut"
|
|
13
|
+
|
|
14
|
+
These quantities represent key physical, chemical or biological properties of the Earth system
|
|
15
|
+
and can be the result of direct observation of the climate system or simulations.
|
|
16
|
+
Variables cover a range of aspects of the climate system,
|
|
17
|
+
such as temperature, precipitation, sea level, radiation, or atmospheric composition.
|
|
18
|
+
Some more information for variables that have been used in CMIP:
|
|
19
|
+
|
|
20
|
+
- *tas*: Near-surface air temperature (measured at 2 meters above the surface)
|
|
21
|
+
- *pr*: Precipitation
|
|
22
|
+
- *psl*: Sea-level pressure
|
|
23
|
+
- *zg*: Geopotential height
|
|
24
|
+
- *rlut*: Top-of-atmosphere longwave radiation
|
|
25
|
+
- *siconc*: Sea-ice concentration
|
|
26
|
+
- *co2*: Atmospheric CO2 concentration
|
|
27
|
+
|
|
28
|
+
Since CMIP7, the concept of a variable has been augmented with the idea of 'branding',
|
|
29
|
+
leading to the idea of a 'branded variable'.
|
|
30
|
+
For details, see :py:class:`BrandedVariable`.
|
|
31
|
+
|
|
32
|
+
Sometimes 'variable' is also referred to as 'root name' or 'out name'.
|
|
33
|
+
There is mostly a one to one mapping between CF standard names and variables.
|
|
34
|
+
However, this is not always possible so please don't assume this is always the case.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
long_name: str | None
|
|
38
|
+
"""
|
|
39
|
+
Long name of the variable
|
|
40
|
+
|
|
41
|
+
This is free text and can take any value
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
standard_name: str | None
|
|
45
|
+
"""
|
|
46
|
+
Standard name of the variable
|
|
47
|
+
|
|
48
|
+
The standard names are defined by the CF-conventions.
|
|
49
|
+
|
|
50
|
+
If `None`, this variable has no standard name according to the CF-conventions.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
units: str
|
|
54
|
+
"""
|
|
55
|
+
Units of the variable
|
|
56
|
+
"""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the forcing index data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VariantLabel(PatternTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
The variant which provides information about how a dataset was created
|
|
11
|
+
|
|
12
|
+
Examples: "r1i1p1f1", "r2i2p2f1", "r1i198001p1f1", "r1i198001ap1f1", "r1i199001bp1f1"
|
|
13
|
+
|
|
14
|
+
Really, this should be a composite term.
|
|
15
|
+
However, as there is no separator between the parts, it has to be a pattern term
|
|
16
|
+
(at least at the moment).
|
|
17
|
+
|
|
18
|
+
As of the latest round of CMIP,
|
|
19
|
+
the variant label is usually composed of the following components:
|
|
20
|
+
|
|
21
|
+
#. :py:class:`RealizationIndex`
|
|
22
|
+
#. :py:class:`InitializationIndex`
|
|
23
|
+
#. :py:class:`PhysicsIndex`
|
|
24
|
+
#. :py:class:`ForcingIndex`
|
|
25
|
+
"""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model (i.e. schema/definition) of the vertical label data descriptor
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VerticalLabel(PlainTermDataDescriptor):
|
|
9
|
+
"""
|
|
10
|
+
Label that describes a specific vertical sampling approach
|
|
11
|
+
|
|
12
|
+
Examples: "h2m", "200hPa", "p19"
|
|
13
|
+
|
|
14
|
+
This is set to "u" ("unspecified") when the data has no vertical dimension.
|
|
15
|
+
For underlying details and logic, please see
|
|
16
|
+
[Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
|
|
17
|
+
|
|
18
|
+
This label is used as the area component of a branded variable's suffix
|
|
19
|
+
(see :py:class:`BrandedSuffix`).
|
|
20
|
+
""" # noqa: E501
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DrsType(str, Enum):
|
|
7
|
+
"""
|
|
8
|
+
The types of DRS specification (directory, file name and dataset id).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
DIRECTORY = "directory"
|
|
12
|
+
"""The DRS directory specification type."""
|
|
13
|
+
FILE_NAME = "file_name"
|
|
14
|
+
"""The DRS file name specification type."""
|
|
15
|
+
DATASET_ID = "dataset_id"
|
|
16
|
+
"""The DRS dataset id specification type."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DrsPart(BaseModel):
|
|
20
|
+
"""A fragment of a DRS specification"""
|
|
21
|
+
|
|
22
|
+
source_collection: str
|
|
23
|
+
"""The collection id."""
|
|
24
|
+
source_collection_term: str | None = None
|
|
25
|
+
"Specifies a specific term in the collection."
|
|
26
|
+
is_required: bool
|
|
27
|
+
"""Whether the collection is required for the DRS specification or not."""
|
|
28
|
+
|
|
29
|
+
def __str__(self) -> str:
|
|
30
|
+
return self.source_collection
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DrsSpecification(BaseModel):
|
|
34
|
+
"""
|
|
35
|
+
A DRS specification.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
type: DrsType
|
|
39
|
+
"""The type of the specification."""
|
|
40
|
+
regex: str
|
|
41
|
+
"""General pattern for simples checks"""
|
|
42
|
+
separator: str
|
|
43
|
+
"""The textual separator string or character."""
|
|
44
|
+
properties: dict | None = None
|
|
45
|
+
"""The other specifications (e.g., file name extension for file name DRS specification)."""
|
|
46
|
+
parts: list[DrsPart]
|
|
47
|
+
"""The parts of the DRS specification."""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AttributeProperty(BaseModel):
|
|
51
|
+
"""
|
|
52
|
+
A NetCDF global attribute property specification.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
source_collection: str
|
|
56
|
+
"The project collection that originated the property."
|
|
57
|
+
is_required: bool
|
|
58
|
+
"Specifies if the attribute must be present in the NetCDF file."
|
|
59
|
+
value_type: str
|
|
60
|
+
"The type of the attribute value."
|
|
61
|
+
specific_key: str | None = None
|
|
62
|
+
"Specifies a specific key in the collection."
|
|
63
|
+
field_name: str | None = None
|
|
64
|
+
"The name of the attribute field."
|
|
65
|
+
default_value: str | None = None
|
|
66
|
+
"The default value for the attribute."
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class CatalogProperty(BaseModel):
|
|
70
|
+
"""
|
|
71
|
+
A dataset property described in a catalog.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
source_collection: str | None
|
|
75
|
+
"The project collection that originated the property. `None` value means that the property "
|
|
76
|
+
"is not related to any collection of the project. So the property has limited specifications."
|
|
77
|
+
catalog_field_value_type: str
|
|
78
|
+
"The type of the field value."
|
|
79
|
+
is_required: bool
|
|
80
|
+
"Specifies if the property must be present in the dataset properties."
|
|
81
|
+
source_collection_term: str | None = None
|
|
82
|
+
"Specifies a specific term in the collection."
|
|
83
|
+
catalog_field_name: str | None = None
|
|
84
|
+
"The name of the collection referenced in the catalog."
|
|
85
|
+
source_collection_key: str | None = None
|
|
86
|
+
"Specifies a key other than drs_name in the collection."
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class CatalogExtension(BaseModel):
|
|
90
|
+
name: str
|
|
91
|
+
"""The name of the extension"""
|
|
92
|
+
version: str
|
|
93
|
+
"""The version of the extension"""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class CatalogProperties(BaseModel):
|
|
97
|
+
name: str
|
|
98
|
+
"""The name of the catalog system."""
|
|
99
|
+
url_template: str
|
|
100
|
+
"""The URI template of the catalog system."""
|
|
101
|
+
extensions: list[CatalogExtension]
|
|
102
|
+
"""The extensions of the catalog."""
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
AttributeSpecification = list[AttributeProperty]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class CatalogSpecification(BaseModel):
|
|
109
|
+
"""
|
|
110
|
+
A catalog specifications.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
version: str
|
|
114
|
+
"""The version of the catalog."""
|
|
115
|
+
|
|
116
|
+
catalog_properties: CatalogProperties
|
|
117
|
+
"""The properties of the catalog."""
|
|
118
|
+
|
|
119
|
+
dataset_properties: list[CatalogProperty]
|
|
120
|
+
"The properties of the dataset described in a catalog."
|
|
121
|
+
file_properties: list[CatalogProperty]
|
|
122
|
+
"The properties of the files described in a catalog."
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ProjectSpecs(BaseModel):
|
|
126
|
+
"""
|
|
127
|
+
A project specifications.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
project_id: str
|
|
131
|
+
"""The project id."""
|
|
132
|
+
description: str
|
|
133
|
+
"""The description of the project."""
|
|
134
|
+
version: str
|
|
135
|
+
"""The git_hash used as the version"""
|
|
136
|
+
drs_specs: dict[DrsType, DrsSpecification] | None = None
|
|
137
|
+
"""The DRS specifications of the project (directory, file name and dataset id)."""
|
|
138
|
+
# TODO: release = None when all projects have catalog_specs.yaml.
|
|
139
|
+
catalog_specs: CatalogSpecification | None = None
|
|
140
|
+
"""The catalog specifications of the project."""
|
|
141
|
+
attr_specs: AttributeSpecification | None = None
|
|
142
|
+
"""The NetCDF global attribute specifications of the project."""
|
|
143
|
+
model_config = ConfigDict(extra="allow")
|