esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,72 @@
1
+ """
2
+ Model (i.e. schema/definition) of the activity data descriptor
3
+ """
4
+
5
+ import re
6
+ from typing import TYPE_CHECKING
7
+
8
+ from pydantic import HttpUrl, field_validator
9
+
10
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, PlainTermDataDescriptor
11
+ from esgvoc.api.pydantic_handler import create_union
12
+
13
+ if TYPE_CHECKING:
14
+ from esgvoc.api.data_descriptors.experiment import Experiment
15
+
16
+
17
+ class ActivityCMIP7(PlainTermDataDescriptor):
18
+ """
19
+ Identifier of the CMIP activity to which a dataset belongs
20
+
21
+ Examples: "PMIP", "CMIP", "CFMIP", "ScenarioMIP"
22
+
23
+ An 'activity' refers to a coordinated set of modeling experiments
24
+ designed to address specific scientific questions or objectives.
25
+ Activities generally have the suffix "MIP",
26
+ for "model intercomparison project"
27
+ (even though they're not referred to as projects within CMIP CVs).
28
+
29
+ Activity DRS names should not include a phase.
30
+ For example, the activity should always be ScenarioMIP,
31
+ not ScenarioMIP6, ScenarioMIP7 etc.
32
+
33
+ It is now considered essential for each :py:class:`Experiment`
34
+ to be associated with a single :py:class:`Activity`.
35
+ However, this was not followed in CMIP6,
36
+ which significantly complicates definition and validation
37
+ of the schemas for these two classes.
38
+ """
39
+
40
+ experiments: list["Experiment"] | list[str]
41
+ """
42
+ Experiments 'sponsored' by this activity
43
+ """
44
+
45
+ urls: list[HttpUrl]
46
+ """
47
+ URL with more information about this activity
48
+ """
49
+
50
+ @field_validator("drs_name")
51
+ def name_must_not_end_in_number(cls, v):
52
+ if re.match(r".*\d$", v):
53
+ msg = f"`drs_name` for {cls} must not end in a number. Received: {v}"
54
+ raise ValueError(msg)
55
+
56
+ return v
57
+
58
+
59
+ class ActivityLegacy(DataDescriptor):
60
+ """
61
+ Legacy activity model for CMIP6 and earlier versions.
62
+
63
+ This version only contains basic fields (id, type, description)
64
+ without the additional requirements introduced in CMIP7.
65
+ """
66
+
67
+ def accept(self, visitor):
68
+ """Accept method for visitor pattern."""
69
+ return visitor.visit_plain_term(self)
70
+
71
+
72
+ Activity = create_union(ActivityCMIP7, ActivityLegacy)
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class Archive(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,30 @@
1
+ """
2
+ Model (i.e. schema/definition) of the area label data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class AreaLabel(PlainTermDataDescriptor):
9
+ """
10
+ Label that describes a specific area sampling approach
11
+
12
+ Examples: "lnd", "air", "sea", "u"
13
+
14
+ This label is used as the area component of a branded variable's suffix
15
+ (see :py:class:`BrandedSuffix`).
16
+ """
17
+
18
+ cf_area_type: str | None
19
+ """
20
+ CF-conventions area type
21
+ (https://cfconventions.org/Data/area-type-table/current/build/area-type-table.html).
22
+
23
+ This is set to "u" ("unmasked") when all areas are sampled
24
+ i.e. no mask is applied to the data.
25
+ For underlying details and logic, please see
26
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
27
+
28
+ If `None`, there is no CF-conventions area type
29
+ associated with this area label.
30
+ """ # noqa: E501
@@ -0,0 +1,30 @@
1
+ """
2
+ Model (i.e. schema/definition) of the branded suffix data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
6
+
7
+
8
+ class BrandedSuffix(CompositeTermDataDescriptor):
9
+ """
10
+ The suffix of a branded variable.
11
+
12
+ Examples: "tavg-h2m-hxy-u", "tpt-u-hxy-u", "tavg-p19-hxy-air"
13
+
14
+ A branded variable is composed of two parts.
15
+ The first part is the root variable (see :py:class:`Variable`).
16
+ The second is the suffix, i.e. the component described here.
17
+ The suffix captures all the information
18
+ about the time sampling, horizontal sampling, vertical sampling
19
+ and area masking of the variable.
20
+
21
+ The suffix is composed of the following components:
22
+
23
+ #. :py:class:`TemporalLabel`
24
+ #. :py:class:`VerticalLabel`
25
+ #. :py:class:`HorizontalLabel`
26
+ #. :py:class:`AreaLabel`
27
+
28
+ For underlying details and logic, please see
29
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
30
+ """ # noqa: E501
@@ -0,0 +1,21 @@
1
+ """
2
+ Model (i.e. schema/definition) of the branded variale data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
6
+
7
+
8
+ class BrandedVariable(CompositeTermDataDescriptor):
9
+ """
10
+ A climate-related quantity or measurement, including information about sampling.
11
+
12
+ Examples: "tas_tavg-h2m-hxy-u", "pr_tpt-u-hxy-u", "ua_tavg-p19-hxy-air"
13
+
14
+ The concept of a branded variable was introduced in CMIP7.
15
+ A branded variable is composed of two parts.
16
+ The first part is the root variable (see :py:class:`Variable`).
17
+ The second is the suffix (see :py:class:`BrandedSuffix`).
18
+
19
+ For underlying details and logic, please see
20
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
21
+ """ # noqa: E501
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class CitationUrl(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class Contact(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,28 @@
1
+ """
2
+ Model (i.e. schema/definition) of the conventions data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Convention(PlainTermDataDescriptor):
9
+ """
10
+ Conventions governing the data
11
+
12
+ Examples: "CF-1.10", "CF-1.12"
13
+
14
+ This data descriptor is actually defined by the CF-conventions.
15
+ However, it is often used in a more specific and restrictive form
16
+ within WCRP activities.
17
+ To support this possibility, this data descriptor must also be defined within esgvoc.
18
+
19
+ The most commonly specified conventions are the
20
+ climate and forecast metadata conventions (https://cfconventions.org/).
21
+ Other conventions can also be specified in the 'Conventions'
22
+ attribute of netCDF files/other metadata.
23
+ The different conventions are usually separated by a whitespace.
24
+ Within esgvoc, the 'components' (i.e. whitespace separated bits)
25
+ are all that is specified.
26
+ If users wish to combine them, they can,
27
+ but esgvoc does not treat this as either a pattern or composite term.
28
+ """
@@ -0,0 +1,18 @@
1
+ """
2
+ Model (i.e. schema/definition) of the creation date data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class CreationDate(PatternTermDataDescriptor):
9
+ r"""
10
+ Date (more specifically timestamp) that the file was created
11
+
12
+ Examples: "2025-08-21T04:23:12Z", "2024-04-11T14:03:10Z"
13
+
14
+ Note that the examples above assume a `regex` of
15
+ `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z`
16
+ (this matches ISO 8601 timestamps in UTC).
17
+ If you use a different regex, different examples would be needed.
18
+ """
@@ -0,0 +1,127 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, ClassVar, Protocol
3
+
4
+ from pydantic import BaseModel, ConfigDict
5
+
6
+
7
+ class ConfiguredBaseModel(BaseModel):
8
+ model_config = ConfigDict(
9
+ validate_assignment=True,
10
+ validate_default=True,
11
+ extra="allow",
12
+ arbitrary_types_allowed=True,
13
+ use_enum_values=True,
14
+ strict=False,
15
+ )
16
+
17
+
18
+ class DataDescriptorVisitor(Protocol):
19
+ """
20
+ The specifications for a term visitor.
21
+ """
22
+
23
+ def visit_sub_set_term(self, term: "DataDescriptorSubSet") -> Any:
24
+ """Visit a sub set of the information of a term."""
25
+ pass
26
+
27
+ def visit_plain_term(self, term: "PlainTermDataDescriptor") -> Any:
28
+ """Visit a plain term."""
29
+ pass
30
+
31
+ def visit_pattern_term(self, term: "PatternTermDataDescriptor") -> Any:
32
+ """Visit a pattern term."""
33
+ pass
34
+
35
+ def visit_composite_term(self, term: "CompositeTermDataDescriptor") -> Any:
36
+ """Visit a composite term."""
37
+
38
+
39
+ class DataDescriptor(ConfiguredBaseModel, ABC):
40
+ """
41
+ Generic class for the data descriptor classes.
42
+ """
43
+
44
+ id: str
45
+ """The identifier of the terms."""
46
+ type: str
47
+ """The data descriptor to which the term belongs."""
48
+ description: str = ""
49
+ """The description of the term."""
50
+
51
+ @abstractmethod
52
+ def accept(self, visitor: DataDescriptorVisitor) -> Any:
53
+ """
54
+ Accept an term visitor.
55
+
56
+ :param visitor: The term visitor.
57
+ :type visitor: DataDescriptorVisitor
58
+ :return: Depending on the visitor.
59
+ :rtype: Any
60
+ """
61
+ pass
62
+
63
+ @property
64
+ def describe(self):
65
+ return self.model_fields
66
+
67
+
68
+ class DataDescriptorSubSet(DataDescriptor):
69
+ """
70
+ A sub set of the information contains in a term.
71
+ """
72
+
73
+ MANDATORY_TERM_FIELDS: ClassVar[tuple[str, str]] = ("id", "type")
74
+ """The set of mandatory term fields."""
75
+
76
+ def accept(self, visitor: DataDescriptorVisitor) -> Any:
77
+ return visitor.visit_sub_set_term(self)
78
+
79
+
80
+ class PlainTermDataDescriptor(DataDescriptor):
81
+ """
82
+ A data descriptor that describes hand written terms.
83
+ """
84
+
85
+ drs_name: str
86
+
87
+ def accept(self, visitor: DataDescriptorVisitor) -> Any:
88
+ return visitor.visit_plain_term(self)
89
+
90
+
91
+ class PatternTermDataDescriptor(DataDescriptor):
92
+ """
93
+ A data descriptor that describes terms defined by a regular expression.
94
+ """
95
+
96
+ regex: str
97
+ """The regular expression."""
98
+
99
+ def accept(self, visitor: DataDescriptorVisitor) -> Any:
100
+ return visitor.visit_pattern_term(self)
101
+
102
+
103
+ class CompositeTermPart(ConfiguredBaseModel):
104
+ """
105
+ A reference to a term, part of a composite term.
106
+ """
107
+
108
+ id: str | list[str] | None = None
109
+ """The id of the referenced term."""
110
+ type: str
111
+ """The type of the referenced term."""
112
+ is_required: bool
113
+ """Denote if the term is optional as part of a composite term."""
114
+
115
+
116
+ class CompositeTermDataDescriptor(DataDescriptor):
117
+ """
118
+ A data descriptor that describes terms composed of other terms.
119
+ """
120
+
121
+ separator: str
122
+ """The components separator character."""
123
+ parts: list[CompositeTermPart]
124
+ """The components."""
125
+
126
+ def accept(self, visitor: DataDescriptorVisitor) -> Any:
127
+ return visitor.visit_composite_term(self)
@@ -0,0 +1,25 @@
1
+ """
2
+ Model (i.e. schema/definition) of the data specifications data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class DataSpecsVersion(PlainTermDataDescriptor):
9
+ """
10
+ Data specifications version number
11
+
12
+ Examples: "MIPDS7-2025p10p1"
13
+
14
+ The data specifications describe the overall set of data specifications
15
+ used when writing the dataset.
16
+ This version number captures exactly which set of data specifications
17
+ are consistent (or intended to be consistent) with this dataset.
18
+ The DRS values can't contain '.' so we use 'p' instead.
19
+ To go from a DRS value back to a standard version,
20
+ get everything after the hyphen (everything before the hyphen is a prefix)
21
+ then replace "p" with ".".
22
+ Something like, `drs_name.split('-')[-1].replace('p', '.')`.
23
+ (At the moment, exactly what this means is still vague, particularly for CMIP7.
24
+ When it solidifies, more details and examples will be added here.)
25
+ """
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class Date(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,22 @@
1
+ """
2
+ Model (i.e. schema/definition) of the directory date data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class DirectoryDate(PatternTermDataDescriptor):
9
+ """
10
+ Date included as part of data paths
11
+
12
+ Examples: "20240513", "20230202", "20250109"
13
+
14
+ In practice, this acts as a version ID for the dataset.
15
+ For most CMIP projects, it is the only version ID.
16
+ For some (e.g. input4MIPs), it is another (redundant) version ID
17
+ on top of other versioning conventions used by the project.
18
+
19
+ More detail than you could ever want on why this only in the directory,
20
+ and not a file attribute, can be found in
21
+ https://github.com/WCRP-CMIP/CMIP7-CVs/issues/172.
22
+ """
@@ -0,0 +1,38 @@
1
+ """
2
+ Model (i.e. schema/definition) of the data reference syntax (DRS) specifications data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class DRSSpecs(PlainTermDataDescriptor):
9
+ """
10
+ Data reference syntax (DRS) specification
11
+
12
+ Examples: "MIP-DRS7"
13
+
14
+ Identifier of the the data reference syntax used to name files,
15
+ define directory trees, and uniquely identify datasets.
16
+ This data descriptor is self-referential:
17
+ for a given set of CVs (e.g. CMIP7 CVs),
18
+ it can only have a single value.
19
+
20
+ In practice, this term was a nice idea,
21
+ but the way things are architected at the moment,
22
+ we can't really exploit it.
23
+ As background, the idea was that multiple projects could use the same DRS
24
+ e.g. CMIP8 could use the same DRS as CMIP7 if it wanted.
25
+ In practice, `project_specs` is currently defined per project by esgvoc
26
+ so there is no way for one project to point at another project's specs
27
+ to specify the DRS.
28
+ The way of using the same DRS would be to simply copy the project specs.
29
+ I actually don't think this is a bad thing
30
+ (new projects spin up slowly so copying one file is not a big issue).
31
+ It just means that this label points basically nowhere,
32
+ there is no 'DRS registry' so people can say,
33
+ "I have DRS MIP-DRS7, so I go here and look up exactly what that means,
34
+ then off I go".
35
+ However, it does open up the possibility of such centralisation/re-use in future
36
+ so while it's a bit redundant now, having it adds only minor extra work
37
+ and may be useful so I guess we just go with it.
38
+ """
@@ -0,0 +1,215 @@
1
+ """
2
+ Model (i.e. schema/definition) of the experiment data descriptor
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from datetime import datetime
8
+ from typing import Union
9
+
10
+ from pydantic import BeforeValidator, Field
11
+ from typing_extensions import Annotated
12
+
13
+ from esgvoc.api.data_descriptors.EMD_models.component_type import ComponentType
14
+ from esgvoc.api.data_descriptors.activity import Activity
15
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
16
+ from esgvoc.api.data_descriptors.mip_era import MipEra
17
+
18
+ # from esgvoc.api.data_descriptors.model_component import ModelComponent
19
+ from esgvoc.api.pydantic_handler import create_union
20
+
21
+
22
+ def ensure_iso8601_compliant_or_none(value: str | None) -> datetime | None:
23
+ """
24
+ Ensure that a value is ISO-8601 compliant or `None`
25
+
26
+ Parameters
27
+ ----------
28
+ value
29
+ Value to check
30
+
31
+ Returns
32
+ -------
33
+ :
34
+ Value, cast to `datetime.datetime` if `value is not None`
35
+ """
36
+ if value is None:
37
+ return None
38
+
39
+ res = datetime.fromisoformat(value.replace("Z", "+00:00"))
40
+
41
+ return res
42
+
43
+
44
+ class ExperimentCMIP7(PlainTermDataDescriptor):
45
+ """
46
+ Identifier of the CMIP experiment to which a dataset belongs/a dataset is derived from
47
+
48
+ Examples: "historical", "piControl", "ssp126"
49
+
50
+ An 'experiment' refers to a specific, controlled simulation
51
+ conducted using climate models to investigate particular aspects of the Earth's climate system.
52
+ These experiments are designed with set parameters, such as initial conditions,
53
+ external forcings (like greenhouse gas concentrations or solar radiation),
54
+ and duration, to explore and understand climate behavior under various conditions.
55
+
56
+ It is now considered essential for each :py:class:`Experiment`
57
+ to be associated with a single :py:class:`Activity`.
58
+ However, this was not followed in CMIP6,
59
+ which significantly complicates definition and validation
60
+ of the schemas for these two classes.
61
+ """
62
+
63
+ # # Can't be Activity to avoid circularity (?).
64
+ # # Or it can be, but you have to be very careful
65
+ # # (SQLModel makes this easier).
66
+ # activity: Activity
67
+ activity: str
68
+ """
69
+ Activity to which this experiment belongs
70
+
71
+ Could also be phrased as,
72
+ "activity with which this experiment is most strongly associated".
73
+ """
74
+
75
+ # Note: Allowing str or ModelComponent is under discussion.
76
+ # Using this to get things working.
77
+ # Long-term, we might do something different.
78
+ additional_allowed_model_components: list[str] | list[ComponentType]
79
+ """
80
+ Non-compulsory model components that are allowed when running this experiment
81
+ """
82
+
83
+ branch_information: str | None
84
+ """
85
+ Information about how this experiment should branch from its parent
86
+
87
+ If `None`, this experiment has no parent
88
+ and therefore no branching information is required.
89
+ """
90
+
91
+ end_timestamp: Annotated[datetime | None, BeforeValidator(ensure_iso8601_compliant_or_none)]
92
+ """
93
+ End timestamp (ISO-8601) of the experiment
94
+
95
+ A value of `None` indicates that simulations may end at any time,
96
+ no particular value is required.
97
+ """
98
+
99
+ min_ensemble_size: int
100
+ """
101
+ Minimum number of ensemble members to run for this experiment
102
+
103
+ This is the minimum ensemble size requested by the definer of the experiment.
104
+ For other uses, other ensemble sizes may be required
105
+ so please double check the application your simulations
106
+ (as defined in e.g. the data request)
107
+ are intended for too before deciding on your ensemble size.
108
+ """
109
+
110
+ min_number_yrs_per_sim: float | None
111
+ """
112
+ Minimum number of years required per simulation for this experiment
113
+
114
+ If `None`, then there is no minimum number of years required.
115
+ You can submit as short a simulation as you like.
116
+ """
117
+
118
+ # Note: Allowing str or Activity is under discussion.
119
+ # Using this to get things working.
120
+ # Long-term, we might do something different.
121
+ parent_activity: Activity | str | None
122
+ """
123
+ Activity to which this experiment's parent experiment belongs
124
+
125
+ If `None`, this experiment has no parent experiment.
126
+ """
127
+
128
+ # Note: Allowing str or Experiment is under discussion.
129
+ # Using this to get things working.
130
+ # Long-term, we might do something different.
131
+ parent_experiment: Union[str, "Experiment", None]
132
+ """
133
+ This experiment's parent experiment
134
+
135
+ If `None`, this experiment has no parent experiment.
136
+ """
137
+
138
+ # Note: Allowing str or MipEra is under discussion.
139
+ # Using this to get things working.
140
+ # Long-term, we might do something different.
141
+ parent_mip_era: MipEra | str | None
142
+ """
143
+ The MIP era to which this experiment's parent experiment belongs
144
+
145
+ If `None`, this experiment has no parent experiment.
146
+ """
147
+
148
+ # Note: Allowing str or ModelComponent is under discussion.
149
+ # Using this to get things working.
150
+ # Long-term, we might do something different.
151
+ required_model_components: list[ComponentType | str]
152
+ """
153
+ Model components required to run this experiment
154
+ """
155
+
156
+ start_timestamp: Annotated[datetime | None, BeforeValidator(ensure_iso8601_compliant_or_none)]
157
+ """
158
+ Start timestamp (ISO-8601) of the experiment
159
+
160
+ A value of `None` indicates that simulations may start at any time,
161
+ no particular value is required.
162
+ """
163
+
164
+ tier: int | None
165
+ """
166
+ Priority tier for this experiment
167
+
168
+ 1 is highest priority.
169
+ If `None`, no priority is specified for this experiment.
170
+ """
171
+
172
+
173
+ class ExperimentLegacy(PlainTermDataDescriptor):
174
+ """
175
+ An 'experiment' refers to a specific, controlled simulation conducted using climate models to \
176
+ investigate particular aspects of the Earth's climate system. These experiments are designed \
177
+ with set parameters, such as initial conditions, external forcings (like greenhouse gas \
178
+ concentrations or solar radiation), and duration, to explore and understand climate behavior \
179
+ under various scenarios and conditions.
180
+ """
181
+
182
+ # Required fields
183
+ experiment_id: str # Discriminator - distinguishes Legacy from CMIP7
184
+ activity_id: list[str]
185
+ experiment: str
186
+ tier: int | None
187
+
188
+ # Optional fields
189
+ sub_experiment_id: list[str] | None = None
190
+ start_year: str | int | None = None
191
+ end_year: str | int | None = None
192
+ min_number_yrs_per_sim: int | None = None
193
+ parent_activity_id: list[str] | None = None
194
+ parent_experiment_id: list[str] | None = None
195
+ required_model_components: list[ComponentType | str] | None = None
196
+ additional_allowed_model_components: list[ComponentType | str] = Field(default_factory=list)
197
+
198
+
199
+ class ExperimentBase(PlainTermDataDescriptor):
200
+ """
201
+ Base experiment model for Universe data.
202
+
203
+ This loose model accepts experiment data that doesn't fully conform to either
204
+ ExperimentLegacy or ExperimentCMIP7. Used as fallback for incomplete experiments.
205
+ Only contains fields common to both Legacy and CMIP7 models.
206
+ """
207
+
208
+ tier: int | None = None
209
+ min_number_yrs_per_sim: float | int | None = None
210
+ required_model_components: list[ComponentType | str] | None = None
211
+ additional_allowed_model_components: list[ComponentType | str] = Field(default_factory=list)
212
+
213
+
214
+ # Priority: Try strict models first (Legacy, CMIP7), then fall back to Base
215
+ Experiment = create_union(ExperimentLegacy, ExperimentCMIP7, ExperimentBase)
@@ -0,0 +1,21 @@
1
+ """
2
+ Model (i.e. schema/definition) of the forcing index data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class ForcingIndex(PatternTermDataDescriptor):
9
+ """
10
+ Label that identifies the forcing variant used to produce a dataset
11
+
12
+ Examples: "f1", "f2", "f23"
13
+
14
+ This label can be used, for example, to distinguish between two historical simulations,
15
+ one forced with the recommended forcing data sets
16
+ and another forced by a different dataset,
17
+ which might yield information about how forcing uncertainty affects the simulation.
18
+ The value has no intrinsic meaning within the CVs.
19
+ However, in other external sources (to be confirmed which)
20
+ the meaning of this forcing label for a given simulation can be looked up.
21
+ """