esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,48 @@
1
+ """
2
+ Model (i.e. schema/definition) of the frequency data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Frequency(PlainTermDataDescriptor):
9
+ """
10
+ Reporting/temporal sampling interval used when creating the dataset
11
+
12
+ Examples: "mon", "day", "3hr", "monC"
13
+
14
+ This is a bit of a trickier concept than it first appears.
15
+ For time average data, it is effectively the size of each time cell
16
+ (e.g. if each time point is the average of a month's worth of data,
17
+ then the data is assigned the term "mon").
18
+ For time point data, it is the time interval between each reported point
19
+ (e.g. if the data is reported at midday each day,
20
+ then the data is assigned the term "day",
21
+ although in practice the size of each time cell works in this case too).
22
+
23
+ This can usually be validated against the actual data in the file,
24
+ but it can be complicated with some calendars
25
+ (e.g. the Julian-Gregorian calendar which has 15 missing days in 1582),
26
+ reporting intervals (e.g. "mon", which changes length at each interval)
27
+ and when climatologies are involved
28
+ (as identifying these follows special rules covered by the CF conventions).
29
+ """
30
+
31
+ interval: float | None
32
+ """
33
+ Size of the interval
34
+
35
+ See `self.units` for units.
36
+
37
+ If `None`, then the interval for this frequency label is undefined,
38
+ either because it does not exist (e.g. the label for data that does not have a time dimension)
39
+ or because the label does not uniquely define the interval (e.g. sub-hour labels).
40
+ """
41
+
42
+ units: str | None
43
+ """
44
+ Units of the interval
45
+
46
+ If `None`, then the units for this frequency are not defined
47
+ because it does not exist (e.g. the label for data that does not have a time dimension).
48
+ """
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class FurtherInfoUrl(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,43 @@
1
+ """
2
+ Model (i.e. schema/definition) of the grid data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+ from esgvoc.api.data_descriptors.region import Region
7
+
8
+
9
+ class Grid(PlainTermDataDescriptor):
10
+ """
11
+ Grid (horizontal) on which the data is reported
12
+
13
+ Examples: "g1", "g2", "g33"
14
+
15
+ The value has no intrinsic meaning within the CVs.
16
+ However, the other attributes of this model
17
+ provide information about the grid
18
+ and in other external sources (to be confirmed which)
19
+ further resources can be found e.g. cell areas.
20
+
21
+ Grids with the same id (also referred to as 'grid label')
22
+ are identical (details on how we check identical are to come, for discussion,
23
+ see https://github.com/WCRP-CMIP/CMIP7-CVs/issues/202)
24
+ and can be used by more than one model
25
+ (also referred to as 'source' in CMIP language).
26
+ Grids with different labels are different.
27
+ """
28
+
29
+ # Note: Allowing str is under discussion.
30
+ # Using this to get things working.
31
+ # Long-term, we might do something different.
32
+ region: Region | str
33
+ """
34
+ Region represented by this grid
35
+ """
36
+ # Developer note:
37
+ # There is a tight coupling to region
38
+ # (see https://github.com/WCRP-CMIP/CMIP7-CVs/issues/202#issue-3084934841).
39
+ # However, this region can't be the same as the regions used by EMD,
40
+ # as EMD has the 'limited_area' region, but that's not something
41
+ # which makes sense in the CMIP context (it's too vague).
42
+ # As a result, we need to have both Grid (CMIP) and HorizontalGrid (EMD)
43
+ # and both Region (CMIP) and HorizontalGridRegion (EMD).
@@ -0,0 +1,20 @@
1
+ """
2
+ Model (i.e. schema/definition) of the horizontal label data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class HorizontalLabel(PlainTermDataDescriptor):
9
+ """
10
+ Label that describes a specific horizontal sampling approach
11
+
12
+ Examples: "hxy", "hs", "hm"
13
+
14
+ This is set to "hm" ("horizontal mean") when no other horizontal labels apply.
15
+ For underlying details and logic, please see
16
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
17
+
18
+ This label is used as the area component of a branded variable's suffix
19
+ (see :py:class:`BrandedSuffix`).
20
+ """ # noqa: E501
@@ -0,0 +1,27 @@
1
+ """
2
+ Model (i.e. schema/definition) of the initialization index data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class InitializationIndex(PatternTermDataDescriptor):
9
+ r"""
10
+ Label that identifies the initialization variant used to produce a dataset
11
+
12
+ Examples: "i1", "i2", "i196001", "i201001", "i201001a", "i201001b"
13
+
14
+ This label can be used, for example, to distinguish between two simulations
15
+ that were initialised in different ways or on different dates
16
+ (this is most commonly used for decadal prediction simulations).
17
+
18
+ When this is of the form `i\d*`, the value has no intrinsic meaning within the CVs.
19
+ However, in other external sources (to be confirmed which)
20
+ the meaning of this initialization label for a given simulation can be looked up.
21
+
22
+ When this is of the form `i\d{6}[abcde]?`,
23
+ the digits indicate the year and month used for initialising the simulation,
24
+ with any suffix letter used to distinguish
25
+ between simulations that differ in their initialization
26
+ but nonetheless use the same year and month.
27
+ """
@@ -0,0 +1,80 @@
1
+ """
2
+ Model (i.e. schema/definition) of the institution data descriptor
3
+ """
4
+
5
+ from pydantic import BaseModel, Field, HttpUrl
6
+
7
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
8
+
9
+
10
+ class Location(BaseModel):
11
+ """Location"""
12
+
13
+ city: str
14
+ """
15
+ City
16
+ """
17
+
18
+ country: str
19
+ """
20
+ Country
21
+ """
22
+
23
+ lat: float = Field(ge=-90.0, le=90.0)
24
+ """
25
+ Latitude in degrees north
26
+ """
27
+
28
+ lon: float = Field(ge=-180.0, le=180.0)
29
+ """
30
+ Longitude in degrees east (range: -180 to 180)
31
+ """
32
+
33
+
34
+ class Institution(PlainTermDataDescriptor):
35
+ """
36
+ A registered institution
37
+
38
+ Examples: "IPSL", "CR", "NCAR", "CNRM"
39
+
40
+ Unlike :py:class:`Organisation`, this can only refer to a single entity
41
+ (institute, group, person).
42
+ """
43
+
44
+ acronyms: list[str]
45
+ """
46
+ Known acronyms for this member/entity apart from the registered one
47
+
48
+ The registered/official acronym is given in `self.drs_name`.
49
+ """
50
+
51
+ labels: list[str]
52
+ """
53
+ Labels that can be used for this institute
54
+
55
+ These are free-text and can be used when the member/entity needs to be referred to in full,
56
+ rather than by its acronym.
57
+ This can also be thought of as 'long names'.
58
+ """
59
+ # TODO: discuss whether there is any meaning to the order of these
60
+ # and what it means to have more than one label.
61
+ # TODO: discuss whether we should just call this long_names
62
+ # for consistency with other conventions.
63
+
64
+ location: list[Location]
65
+ """
66
+ Location(s) of the institute
67
+ """
68
+
69
+ ror: str | None
70
+ """
71
+ Research organisation registry (https://ror.org/) ID
72
+
73
+ If `None`, this organisation is not registered with ROR
74
+ or the ROR was not supplied at the time of registration.
75
+ """
76
+
77
+ urls: list[HttpUrl]
78
+ """
79
+ URL(s) relevant for finding out more information about this member/entity
80
+ """
@@ -0,0 +1,75 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from pydantic import Field
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+ #
8
+ # class KnownBrandedVariable(PlainTermDataDescriptor):
9
+ # """
10
+ # A climate-related quantity or measurement, including information about sampling.
11
+ #
12
+ # The concept of a branded variable was introduced in CMIP7.
13
+ # A branded variable is composed of two parts.
14
+ # The first part is the root variable (see :py:class:`Variable`).
15
+ # The second is the suffix (see :py:class:`BrandedSuffix`).
16
+ #
17
+ # For further details on the development of branded variables,
18
+ # see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
19
+ # """
20
+ #
21
+ # description: str
22
+ # dimensions: list[str] = Field(default_factory=list)
23
+ # cell_methods: str
24
+ # variable: str
25
+ # label: str
26
+ #
27
+
28
+
29
+ class KnownBrandedVariable(PlainTermDataDescriptor):
30
+ """
31
+ A climate-related quantity or measurement, including information about sampling.
32
+
33
+ The concept of a branded variable was introduced in CMIP7.
34
+ A branded variable is composed of two parts.
35
+ The first part is the root variable (see :py:class:`Variable`).
36
+ The second is the suffix (see :py:class:`BrandedSuffix`).
37
+
38
+ For further details on the development of branded variables,
39
+ see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
40
+ """
41
+
42
+ # # ESGVoc required fields
43
+ # id: str = Field(description="Unique identifier, e.g., 'ta_tavg-p19-hxy-air'")
44
+ # type: str = Field(default="branded_variable", description="ESGVoc type identifier")
45
+ # drs_name: str = Field(description="DRS name, same as id")
46
+ # => already in PlainTermDataDescriptor
47
+
48
+ # CF Standard Name context (flattened from hierarchy)
49
+ cf_standard_name: str = Field(description="CF standard name, e.g., 'air_temperature'")
50
+ cf_units: str = Field(description="CF standard units, e.g., 'K'")
51
+ cf_sn_status: str = Field(description="CF standard name status, e.g., 'approved'")
52
+
53
+ # Variable Root context (flattened from hierarchy)
54
+ variable_root_name: str = Field(description="Variable root name, e.g., 'ta'")
55
+ var_def_qualifier: str = Field(default="", description="Variable definition qualifier")
56
+ branding_suffix_name: str = Field(description="Branding suffix, e.g., 'tavg-p19-hxy-air'")
57
+
58
+ # Variable metadata
59
+ dimensions: List[str] = Field(description="NetCDF dimensions")
60
+ cell_methods: str = Field(default="", description="CF cell_methods attribute")
61
+ cell_measures: str = Field(default="", description="CF cell_measures attribute")
62
+ history: str = Field(default="", description="Processing history")
63
+ realm: str = Field(description="Earth system realm, e.g., 'atmos'")
64
+
65
+ # Label components (embedded, not references)
66
+ temporal_label: str = Field(description="Temporal label, e.g., 'tavg'")
67
+ vertical_label: str = Field(description="Vertical label, e.g., 'p19'")
68
+ horizontal_label: str = Field(description="Horizontal label, e.g., 'hxy'")
69
+ area_label: str = Field(description="Area label, e.g., 'air'")
70
+
71
+ # Status
72
+ bn_status: str = Field(description="Branded variable status, e.g., 'accepted'")
73
+
74
+ # Additional required fields from specifications
75
+ positive_direction: str = Field(default="", description="Positive direction for the variable")
@@ -0,0 +1,31 @@
1
+ """
2
+ Model (i.e. schema/definition) of the license data descriptor
3
+ """
4
+
5
+ from pydantic import HttpUrl
6
+
7
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
8
+
9
+
10
+ class License(PlainTermDataDescriptor):
11
+ """
12
+ License that applies to the dataset
13
+
14
+ Examples: "CC-BY-4.0", "CC0-1.0"
15
+
16
+ Licenses must be approved by the WIP & CMIP panel
17
+ before they can be used in CMIP exercises.
18
+ """
19
+
20
+ spdx_id: str
21
+ """
22
+ SPDX license identifier (https://spdx.org/licenses/)
23
+ """
24
+ # Developer note: `id` will not match `spdx_id`
25
+ # exactly because SPDX IDs are not all lowercase,
26
+ # hence we need this extra attribute.
27
+
28
+ url: HttpUrl
29
+ """
30
+ URL with details of the full license and other information
31
+ """
@@ -0,0 +1,9 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
2
+
3
+
4
+ class MemberId(CompositeTermDataDescriptor):
5
+ """
6
+ The member_id uniquely identifies a specific model simulation within an experiment. It is created by combining the sub_experiment, which describes the setup or timing of the simulation (like a specific start year), and the variant_label, which details the configuration of the model (including initial conditions, physics, and forcings). Together, they form a code like s1960-r1i1p1f1. This allows users to distinguish between different ensemble members and understand how each run differs from others within the same experiment.
7
+ """
8
+
9
+ pass
@@ -0,0 +1,26 @@
1
+ """
2
+ Model (i.e. schema/definition) of the MIP era data descriptor
3
+ """
4
+
5
+ from pydantic import HttpUrl
6
+
7
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
8
+
9
+
10
+ class MipEra(PlainTermDataDescriptor):
11
+ """
12
+ Label that identifies the MIP era to which a dataset belongs
13
+
14
+ Examples: "CMIP6", "CMIP7"
15
+
16
+ The MIP era is useful to distinguish among experiments performed during different CMIP phases
17
+ but with differences in experimental protocol in each phase.
18
+ For example, the "historical" experiments appear in multiple phases of CMIP
19
+ but have different input forcings in each.
20
+ This difference can be identified using the MIP era data descriptor.
21
+ """
22
+
23
+ url: HttpUrl
24
+ """
25
+ URL that links to further information about the MIP era
26
+ """
@@ -0,0 +1,32 @@
1
+ """
2
+ Model (i.e. schema/definition) of the model component data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class ModelComponent(PlainTermDataDescriptor):
9
+ """
10
+ Model component
11
+
12
+ Examples: "AOGCM", "AER", "BGC"
13
+
14
+ These terms are intended to help with identifying required components for experiments
15
+ or filtering models based on having common components.
16
+ For example, an aerosol scheme or a circulation model or a biogeochemistry component.
17
+ However, model component is only an approximate term, there is no precise definition
18
+ of whether any given model has or does not have a given component.
19
+ """
20
+
21
+ # These should probably come back in.
22
+ # However, this level of detail is only relevant for EMD.
23
+ # For CMOR tables, the convention seems to just be to use the drs_name,
24
+ # which is awkward because there is more than one possible AOGCM (for example).
25
+ # Hence, I think we actually need two classes.
26
+ # `ModelComponent` and `EMDModelcomponent`
27
+ # (or we just get rid of this model component idea completely/
28
+ # leve it entirely up to EMD)
29
+ # name: str
30
+ # realm: dict
31
+ # nominal_resolution: dict
32
+ # version: int
@@ -0,0 +1,17 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import (
2
+ CompositeTermDataDescriptor,
3
+ PatternTermDataDescriptor,
4
+ PlainTermDataDescriptor,
5
+ )
6
+
7
+
8
+ class PlainTermDDex(PlainTermDataDescriptor):
9
+ pass
10
+
11
+
12
+ class PatternTermDDex(PatternTermDataDescriptor):
13
+ pass
14
+
15
+
16
+ class CompositeTermDDex(CompositeTermDataDescriptor):
17
+ pass
@@ -0,0 +1,50 @@
1
+ """
2
+ Model (i.e. schema/definition) of the nominal resolution data descriptor
3
+ """
4
+
5
+ from pydantic import field_validator
6
+
7
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
8
+
9
+
10
+ class NominalResolution(PlainTermDataDescriptor):
11
+ """
12
+ Approximate horizontal resolution of a dataset
13
+
14
+ Examples: "1 km", "250 km", "500 km"
15
+
16
+ This should be calculated following the algorithm implemented by
17
+ [https://github.com/PCMDI/nominal_resolution/blob/master/lib/api.py]()
18
+ (although, of course, other implementations of the same algorithm could be used).
19
+ """
20
+
21
+ # Developer note: given this isn't a pattern term data descriptor,
22
+ # these are split so people don't have to parse the drs_name themselves.
23
+ magnitude: float
24
+ """
25
+ Magnitude of the nominal resolution
26
+ """
27
+
28
+ range: tuple[float, float]
29
+ """
30
+ Range of mean resolutions to which this nominal resolution applies
31
+ """
32
+
33
+ units: str
34
+ """
35
+ Units of the nominal resolution and range
36
+ """
37
+
38
+ @field_validator("range")
39
+ @classmethod
40
+ def validate_range(cls, v):
41
+ """Validate that range has exactly 2 values and min <= max."""
42
+ if len(v) != 2:
43
+ msg = f"range must contain exactly 2 values [min, max]. Received: {v}"
44
+ raise ValueError(msg)
45
+
46
+ if v[0] > v[1]:
47
+ msg = f"range[0] must be <= range[1]. Received: {v}"
48
+ raise ValueError(msg)
49
+
50
+ return v
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class ObsType(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,22 @@
1
+ """
2
+ Model (i.e. schema/definition) of the organisation data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.institution import Institution
6
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
7
+
8
+
9
+ class Organisation(PlainTermDataDescriptor):
10
+ """
11
+ A registered organisation
12
+
13
+ Examples: "IPSL", "NCAR", "CNRM-CERFACS", "SOLARIS-HEPPA"
14
+ """
15
+
16
+ # Note: Allowing str is under discussion.
17
+ # Using this to get things working.
18
+ # Long-term, we might do something different.
19
+ members: list[Institution | str]
20
+ """
21
+ Members associated with this organisation
22
+ """
@@ -0,0 +1,21 @@
1
+ """
2
+ Model (i.e. schema/definition) of the physics index data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class PhysicsIndex(PatternTermDataDescriptor):
9
+ """
10
+ Label that identifies the physics variant used to produce a dataset
11
+
12
+ Examples: "p1", "p2", "p20"
13
+
14
+ This label can be used, for example, to distinguish between two simulations,
15
+ one using a model's 'default physics'
16
+ and another using a model's 'other physics scheme',
17
+ which might yield information about how differences in physics within the model affects the simulation.
18
+ The value has no intrinsic meaning within the CVs.
19
+ However, in other external sources (to be confirmed which)
20
+ the meaning of this physics label for a given simulation can be looked up.
21
+ """
@@ -0,0 +1,16 @@
1
+ """
2
+ Model (i.e. schema/definition) of the product data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Product(PlainTermDataDescriptor):
9
+ """
10
+ Identifier of the data category
11
+
12
+ Examples: "model-output", "observations", derived"
13
+
14
+ This is not a precisely defined data descriptor,
15
+ rather an approximate labelling.
16
+ """
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class PublicationStatus(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,24 @@
1
+ """
2
+ Model (i.e. schema/definition) of the realisation index data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class RealizationIndex(PatternTermDataDescriptor):
9
+ """
10
+ Label that identifies the realisation variant used to produce a dataset
11
+
12
+ Examples: "r1", "r2", "r23"
13
+
14
+ This label can be used to distinguish between two simulations
15
+ that are equally likely but differ only due to stochastic variations.
16
+ These differences can be purely stochastic
17
+ (i.e. arising simply from stochastic variations when re-running the same simulation,
18
+ even keeping all other conditions the same)
19
+ or arise from differences in initial conditions
20
+ e.g. starting/branching from different points in a control run/parent experiment.
21
+ The value has no intrinsic meaning within the CVs.
22
+ However, in other external sources (to be confirmed which)
23
+ the meaning of this realisation label for a given simulation can be looked up.
24
+ """
@@ -0,0 +1,16 @@
1
+ """
2
+ Model (i.e. schema/definition) of the realm data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Realm(PlainTermDataDescriptor):
9
+ """
10
+ Realm associated with the dataset
11
+
12
+ Examples: "atmos", "land", "ocean", "atmosChem"
13
+
14
+ This is intended as a rough categorisation only
15
+ and is not precisely defined.
16
+ """
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class Regex(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,35 @@
1
+ """
2
+ Model (i.e. schema/definition) of the region data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Region(PlainTermDataDescriptor):
9
+ """
10
+ Region associated with the dataset
11
+
12
+ Examples: "glb", "30s-90s", "grl"
13
+
14
+ In other words, the domain over which the dataset is provided.
15
+ This is intended as a rough categorisation only
16
+ and is not precisely defined.
17
+ """
18
+
19
+ cf_standard_region: str | None
20
+ """
21
+ CF standard region
22
+
23
+ See https://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html
24
+
25
+ If `None`, there is no CF standard region for this region
26
+ """
27
+
28
+ iso_region: str | None
29
+ """
30
+ ISO 3166-1 alpha-3 region code
31
+
32
+ See https://www.iso.org/iso-3166-country-codes.html
33
+
34
+ If `None`, there is no ISO region code for this region
35
+ """
@@ -0,0 +1,7 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class Resolution(PlainTermDataDescriptor):
5
+ value: str
6
+ name: str
7
+ unit: str