esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,120 @@
1
+ """
2
+ Model (i.e. schema/definition) of the source descriptor
3
+ """
4
+
5
+ from pydantic import Field
6
+
7
+ from esgvoc.api.data_descriptors.organisation import Organisation
8
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
9
+ from esgvoc.api.data_descriptors.EMD_models.model_component import EMDModelComponent
10
+ from esgvoc.api.pydantic_handler import create_union
11
+
12
+
13
+ class SourceCMIP7(PlainTermDataDescriptor):
14
+ """
15
+ Source of the dataset (CMIP7 format with contributors and model_components)
16
+
17
+ Examples: "CanESM6-MR", "CR-CMIP-1-0-0"
18
+
19
+ The more precise meaning of source depends on the kind of dataset this is.
20
+ For model output, 'source' refers to a numerical representations of the Earth's climate system.
21
+ This source is the model which was used to generate the dataset.
22
+ Such models simulate the interactions between the atmosphere, oceans, land surface, and ice.
23
+ They are based on fundamental physical, chemical, and biological processes
24
+ and are used to understand past, present, and future climate conditions.
25
+ Each source or model is typically associated with a specific research institution, center, or group.
26
+ For instance, models like 'EC-Earth' are developed by a consortium of European institutes,
27
+ while 'GFDL-CM4' is developed by the Geophysical Fluid Dynamics Laboratory (GFDL) in the United States.
28
+
29
+ For model inputs i.e. forcings, the 'source' is a unique identifier
30
+ for the group that produced the data and its version.
31
+ This is a different convention from almost all other cases
32
+ (which really muddies the meaning of the term).
33
+ """
34
+
35
+ label: str
36
+ """
37
+ Label to use for this source
38
+
39
+ Unlike the `drs_name`, this can contain any characters
40
+ """
41
+
42
+ label_extended: str
43
+ """
44
+ Extended label to use for this source
45
+
46
+ Unlike the `drs_name`, this can contain any characters.
47
+ If desired, it can include lots of verbose information
48
+ (unlike `label`, which should be more terse).
49
+ It can also just be the same as `label`
50
+ if the person registering the source wishes.
51
+ """
52
+
53
+ # Note: Allowing str is under discussion.
54
+ # Using this to get things working.
55
+ # Long-term, we might do something different.
56
+ contributors: list[Organisation | str]
57
+ """
58
+ Organisation(s) using this source
59
+
60
+ Using is a bit vaguely defined, but in practice it is the organisation(s)
61
+ that submit data using this source.
62
+ """
63
+
64
+ # Note: Allowing str is under discussion.
65
+ # Using this to get things working.
66
+ # Long-term, we might do something different.
67
+ model_components: list[EMDModelComponent | str]
68
+ """
69
+ Model components
70
+
71
+ If this source is not a model, this can/will just be an empty list.
72
+ """
73
+
74
+ @property
75
+ def source(self) -> str:
76
+ """
77
+ Source label as used by CMOR
78
+ """
79
+ raise NotImplementedError
80
+ # Something like:
81
+ # label (release year from EMD if known):
82
+ # (for each model component)\n component: component name (description)
83
+
84
+
85
+ class SourceLegacy(PlainTermDataDescriptor):
86
+ """
87
+ Legacy source model for CMIP6 and earlier versions.
88
+
89
+ This version uses different field names and structure compared to the CMIP7 format.
90
+ """
91
+
92
+ activity_participation: list[str] | None = None
93
+ """Activities this source participates in."""
94
+
95
+ cohort: list[str] = Field(default_factory=list)
96
+ """Cohort grouping for this source."""
97
+
98
+ organisation_id: list[str] = Field(default_factory=list)
99
+ """Organisation IDs associated with this source."""
100
+
101
+ label: str
102
+ """Label to use for this source."""
103
+
104
+ label_extended: str | None = None
105
+ """Extended label to use for this source."""
106
+
107
+ license: dict = Field(default_factory=dict)
108
+ """License information for this source."""
109
+
110
+ model_component: dict | None = Field(
111
+ default=None,
112
+ description="Dictionary containing the model components that make up this climate source"
113
+ )
114
+ """Model component information (legacy format)."""
115
+
116
+ release_year: int | None = None
117
+ """Year this source was released."""
118
+
119
+
120
+ Source = create_union(SourceCMIP7, SourceLegacy)
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class SourceType(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class SubExperiment(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,28 @@
1
+ from pydantic import Field, field_validator
2
+
3
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
4
+
5
+
6
+ class Table(PlainTermDataDescriptor):
7
+ product: str | None
8
+ table_date: str | None
9
+ variable_entry: list[str] = Field(default_factory=list)
10
+
11
+ @field_validator("variable_entry", mode="before")
12
+ @classmethod
13
+ def normalize_variable_entry(cls, v):
14
+ """
15
+ Normalize variable_entry to ensure all items are strings.
16
+ If items are dicts (resolved references), extract the 'id' field.
17
+ """
18
+ if not isinstance(v, list):
19
+ return v
20
+
21
+ result = []
22
+ for item in v:
23
+ if isinstance(item, dict):
24
+ # Extract the id from the resolved object
25
+ result.append(item.get("id", str(item)))
26
+ else:
27
+ result.append(item)
28
+ return result
@@ -0,0 +1,20 @@
1
+ """
2
+ Model (i.e. schema/definition) of the temporal label data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class TemporalLabel(PlainTermDataDescriptor):
9
+ """
10
+ Label that describes a specific temporal sampling approach
11
+
12
+ Examples: "tavg", "tpt", "tclm"
13
+
14
+ This is set to "ti" ("time-independent") when the data has no time axis.
15
+ For underlying details and logic, please see
16
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
17
+
18
+ This label is used as the area component of a branded variable's suffix
19
+ (see :py:class:`BrandedSuffix`).
20
+ """ # noqa: E501
@@ -0,0 +1,17 @@
1
+ """
2
+ Model (i.e. schema/definition) of the time range data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class TimeRange(PatternTermDataDescriptor):
9
+ """
10
+ Time range spanned by the data
11
+
12
+ Examples: "185001-202112", "18500101-20211231", "203101010130-203112312230", "185001-186412-clim"
13
+
14
+ The right choice of time range is tightly coupled to the frequency of the data.
15
+ This coupling is not captured within the CVs.
16
+ (It is hopefully enforced elsewhere e.g. in QAQC workflows.)
17
+ """
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class Title(PatternTermDataDescriptor):
5
+ pass
@@ -0,0 +1,67 @@
1
+ """
2
+ Model (i.e. schema/definition) of the tracking ID data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class TrackingId(PatternTermDataDescriptor):
9
+ """
10
+ Tracking ID, i.e. unique ID, of a file
11
+
12
+ Examples: "hdl:21.14107/f6635404-8a1a-4aa9-918d-3792e8321f04",
13
+ "hdl:21.14100/718ee427-4efb-46a8-9f89-8192593b15fe"
14
+
15
+ This data descriptor applies only at the file level,
16
+ not to datasets as a whole (each file in a dataset gets a unique ID).
17
+
18
+ The regex is of the form `hdl:<prefix>/<uuid>`,
19
+ where `<prefix>` is a prefix which is the same for all files in the same 'group'
20
+ and `<uuid>` is a universally unique ID (UUID).
21
+
22
+ The 'group' is a bit loosely defined and can be defined in different ways
23
+ for different purposes.
24
+ For CMIP phases, the 'group' is usually the CMIP phase
25
+ i.e. all files that are part of the same CMIP phase use the same tracking ID prefix
26
+ (e.g. all CMIP6 files have the same prefix, all CMIP7 files have the same prefix).
27
+ (Also note that some projects haven't really got this right,
28
+ e.g. the input4MIPs project has re-used the CMIP6 prefix, rather than using its own.)
29
+
30
+ The prefixes come from [handle.net](https://www.handle.net/index.html).
31
+ [handle.net](https://www.handle.net/index.html) prefixes
32
+ are allotted to different CMIP (or other project) exercises.
33
+ These prefixes are used by the
34
+ [handle.net](https://www.handle.net/index.html)
35
+ service to group all the entries for the given exercise together.
36
+ These prefixes allow a) each file to have a unique ID
37
+ and b) users to look up entries for all files using their unique ID
38
+ via the [handle.net](https://www.handle.net/index.html) service.
39
+ The prefixes are currently managed and registered by DKRZ
40
+ on behalf of the ESGF team (we think, it's not 100% clear).
41
+
42
+ The last part of the tracking ID is a UUID.
43
+ The specification of a UUID is defined elsewhere
44
+ (apparently in ISO/IEC 9834-8:2014).
45
+ A new UUID must be generated for every single file
46
+ such that every file has a unique tracking id
47
+ (this uniqueness is both within a project thanks to the differing UUIDs
48
+ and across projects thanks to the differing prefixes).
49
+ Most programming languages have native support for UUID generation
50
+ (e.g. the `uuid` library is part of Python's standard library).
51
+ For a standalone solution, the OSSP utility is available.
52
+ It can be accessed from http://www.ossp.org/pkg/lib/uuid/.
53
+ Since CMIP6, version 4 UUIDs (random number based) have been required.
54
+
55
+ The tracking IDs are
56
+ used by a PID service
57
+ so that users can find further information about the file
58
+ by going to `hdl.handle.net/<tracking_id_after_the_hdl_colon_prefix_is_removed>`
59
+ e.g. `hdl.handle.net/21.14107/f6635404-8a1a-4aa9-918d-3792e8321f04`
60
+ (a working link from CMIP6 for those who would like to see a live demonstration is
61
+ [hdl.handle.net/21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b](),
62
+ which was inferred from a file in which the tracking ID is
63
+ `hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b`).
64
+ (Or at least this link with handle.net is the intention.
65
+ It hasn't always happened
66
+ e.g this is not the case for all CMIP7 input4MIPs files.)
67
+ """
@@ -0,0 +1,56 @@
1
+ """
2
+ Model (i.e. schema/definition) of the variable data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class Variable(PlainTermDataDescriptor):
9
+ """
10
+ A climate-related quantity or measurement.
11
+
12
+ Examples: "tas", "pr", "psl", "rlut"
13
+
14
+ These quantities represent key physical, chemical or biological properties of the Earth system
15
+ and can be the result of direct observation of the climate system or simulations.
16
+ Variables cover a range of aspects of the climate system,
17
+ such as temperature, precipitation, sea level, radiation, or atmospheric composition.
18
+ Some more information for variables that have been used in CMIP:
19
+
20
+ - *tas*: Near-surface air temperature (measured at 2 meters above the surface)
21
+ - *pr*: Precipitation
22
+ - *psl*: Sea-level pressure
23
+ - *zg*: Geopotential height
24
+ - *rlut*: Top-of-atmosphere longwave radiation
25
+ - *siconc*: Sea-ice concentration
26
+ - *co2*: Atmospheric CO2 concentration
27
+
28
+ Since CMIP7, the concept of a variable has been augmented with the idea of 'branding',
29
+ leading to the idea of a 'branded variable'.
30
+ For details, see :py:class:`BrandedVariable`.
31
+
32
+ Sometimes 'variable' is also referred to as 'root name' or 'out name'.
33
+ There is mostly a one to one mapping between CF standard names and variables.
34
+ However, this is not always possible so please don't assume this is always the case.
35
+ """
36
+
37
+ long_name: str | None
38
+ """
39
+ Long name of the variable
40
+
41
+ This is free text and can take any value
42
+ """
43
+
44
+ standard_name: str | None
45
+ """
46
+ Standard name of the variable
47
+
48
+ The standard names are defined by the CF-conventions.
49
+
50
+ If `None`, this variable has no standard name according to the CF-conventions.
51
+ """
52
+
53
+ units: str
54
+ """
55
+ Units of the variable
56
+ """
@@ -0,0 +1,25 @@
1
+ """
2
+ Model (i.e. schema/definition) of the forcing index data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
6
+
7
+
8
+ class VariantLabel(PatternTermDataDescriptor):
9
+ """
10
+ The variant which provides information about how a dataset was created
11
+
12
+ Examples: "r1i1p1f1", "r2i2p2f1", "r1i198001p1f1", "r1i198001ap1f1", "r1i199001bp1f1"
13
+
14
+ Really, this should be a composite term.
15
+ However, as there is no separator between the parts, it has to be a pattern term
16
+ (at least at the moment).
17
+
18
+ As of the latest round of CMIP,
19
+ the variant label is usually composed of the following components:
20
+
21
+ #. :py:class:`RealizationIndex`
22
+ #. :py:class:`InitializationIndex`
23
+ #. :py:class:`PhysicsIndex`
24
+ #. :py:class:`ForcingIndex`
25
+ """
@@ -0,0 +1,20 @@
1
+ """
2
+ Model (i.e. schema/definition) of the vertical label data descriptor
3
+ """
4
+
5
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
6
+
7
+
8
+ class VerticalLabel(PlainTermDataDescriptor):
9
+ """
10
+ Label that describes a specific vertical sampling approach
11
+
12
+ Examples: "h2m", "200hPa", "p19"
13
+
14
+ This is set to "u" ("unspecified") when the data has no vertical dimension.
15
+ For underlying details and logic, please see
16
+ [Taylor et al., 2025](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
17
+
18
+ This label is used as the area component of a branded variable's suffix
19
+ (see :py:class:`BrandedSuffix`).
20
+ """ # noqa: E501
@@ -0,0 +1,143 @@
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel, ConfigDict
4
+
5
+
6
+ class DrsType(str, Enum):
7
+ """
8
+ The types of DRS specification (directory, file name and dataset id).
9
+ """
10
+
11
+ DIRECTORY = "directory"
12
+ """The DRS directory specification type."""
13
+ FILE_NAME = "file_name"
14
+ """The DRS file name specification type."""
15
+ DATASET_ID = "dataset_id"
16
+ """The DRS dataset id specification type."""
17
+
18
+
19
+ class DrsPart(BaseModel):
20
+ """A fragment of a DRS specification"""
21
+
22
+ source_collection: str
23
+ """The collection id."""
24
+ source_collection_term: str | None = None
25
+ "Specifies a specific term in the collection."
26
+ is_required: bool
27
+ """Whether the collection is required for the DRS specification or not."""
28
+
29
+ def __str__(self) -> str:
30
+ return self.source_collection
31
+
32
+
33
+ class DrsSpecification(BaseModel):
34
+ """
35
+ A DRS specification.
36
+ """
37
+
38
+ type: DrsType
39
+ """The type of the specification."""
40
+ regex: str
41
+ """General pattern for simples checks"""
42
+ separator: str
43
+ """The textual separator string or character."""
44
+ properties: dict | None = None
45
+ """The other specifications (e.g., file name extension for file name DRS specification)."""
46
+ parts: list[DrsPart]
47
+ """The parts of the DRS specification."""
48
+
49
+
50
+ class AttributeProperty(BaseModel):
51
+ """
52
+ A NetCDF global attribute property specification.
53
+ """
54
+
55
+ source_collection: str
56
+ "The project collection that originated the property."
57
+ is_required: bool
58
+ "Specifies if the attribute must be present in the NetCDF file."
59
+ value_type: str
60
+ "The type of the attribute value."
61
+ specific_key: str | None = None
62
+ "Specifies a specific key in the collection."
63
+ field_name: str | None = None
64
+ "The name of the attribute field."
65
+ default_value: str | None = None
66
+ "The default value for the attribute."
67
+
68
+
69
+ class CatalogProperty(BaseModel):
70
+ """
71
+ A dataset property described in a catalog.
72
+ """
73
+
74
+ source_collection: str | None
75
+ "The project collection that originated the property. `None` value means that the property "
76
+ "is not related to any collection of the project. So the property has limited specifications."
77
+ catalog_field_value_type: str
78
+ "The type of the field value."
79
+ is_required: bool
80
+ "Specifies if the property must be present in the dataset properties."
81
+ source_collection_term: str | None = None
82
+ "Specifies a specific term in the collection."
83
+ catalog_field_name: str | None = None
84
+ "The name of the collection referenced in the catalog."
85
+ source_collection_key: str | None = None
86
+ "Specifies a key other than drs_name in the collection."
87
+
88
+
89
+ class CatalogExtension(BaseModel):
90
+ name: str
91
+ """The name of the extension"""
92
+ version: str
93
+ """The version of the extension"""
94
+
95
+
96
+ class CatalogProperties(BaseModel):
97
+ name: str
98
+ """The name of the catalog system."""
99
+ url_template: str
100
+ """The URI template of the catalog system."""
101
+ extensions: list[CatalogExtension]
102
+ """The extensions of the catalog."""
103
+
104
+
105
+ AttributeSpecification = list[AttributeProperty]
106
+
107
+
108
+ class CatalogSpecification(BaseModel):
109
+ """
110
+ A catalog specifications.
111
+ """
112
+
113
+ version: str
114
+ """The version of the catalog."""
115
+
116
+ catalog_properties: CatalogProperties
117
+ """The properties of the catalog."""
118
+
119
+ dataset_properties: list[CatalogProperty]
120
+ "The properties of the dataset described in a catalog."
121
+ file_properties: list[CatalogProperty]
122
+ "The properties of the files described in a catalog."
123
+
124
+
125
+ class ProjectSpecs(BaseModel):
126
+ """
127
+ A project specifications.
128
+ """
129
+
130
+ project_id: str
131
+ """The project id."""
132
+ description: str
133
+ """The description of the project."""
134
+ version: str
135
+ """The git_hash used as the version"""
136
+ drs_specs: dict[DrsType, DrsSpecification] | None = None
137
+ """The DRS specifications of the project (directory, file name and dataset id)."""
138
+ # TODO: release = None when all projects have catalog_specs.yaml.
139
+ catalog_specs: CatalogSpecification | None = None
140
+ """The catalog specifications of the project."""
141
+ attr_specs: AttributeSpecification | None = None
142
+ """The NetCDF global attribute specifications of the project."""
143
+ model_config = ConfigDict(extra="allow")