esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
@@ -0,0 +1,948 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Support for generating CMOR CVs tables
|
|
3
|
+
|
|
4
|
+
Note: this really shouldn't be in esgvoc.
|
|
5
|
+
It should be in CMOR, as CMOR knows the structure it needs,
|
|
6
|
+
not esgvoc. Anyway, can do that later.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import itertools
|
|
10
|
+
import re
|
|
11
|
+
from functools import partial
|
|
12
|
+
from typing import Any, TypeAlias
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, HttpUrl
|
|
15
|
+
|
|
16
|
+
import esgvoc.api as ev_api
|
|
17
|
+
|
|
18
|
+
AllowedDict: TypeAlias = dict[str, Any]
|
|
19
|
+
"""
|
|
20
|
+
Dictionary (key-value pairs). The keys define the allowed values for the given attribute
|
|
21
|
+
|
|
22
|
+
The values can be anything,
|
|
23
|
+
they generally provide extra information about the meaning of the keys.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
RegularExpressionValidators: TypeAlias = list[str]
|
|
27
|
+
"""
|
|
28
|
+
List of values which are assumed to be regular expressions
|
|
29
|
+
|
|
30
|
+
Attribute values provided by teams are then validated
|
|
31
|
+
against these regular expressions.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CMORDRSDefinition(BaseModel):
|
|
36
|
+
"""
|
|
37
|
+
CMOR data reference syntax (DRS) definition
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
directory_path_example: str
|
|
41
|
+
"""
|
|
42
|
+
Example of a directory path that follows this DRS
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
directory_path_template: str
|
|
46
|
+
"""
|
|
47
|
+
Template to use for generating directory paths
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
filename_path_example: str
|
|
51
|
+
"""
|
|
52
|
+
Example of a filename path that follows this DRS
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
filename_path_template: str
|
|
56
|
+
"""
|
|
57
|
+
Template to use for generating filename paths
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CMORExperimentDefinition(BaseModel):
|
|
62
|
+
"""
|
|
63
|
+
CMOR experiment definition
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
activity_id: list[str]
|
|
67
|
+
"""
|
|
68
|
+
Activity ID to which this experiment belongs
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
# required_model_components: RegularExpressionValidators
|
|
72
|
+
# """
|
|
73
|
+
# Required model components to run this experiment
|
|
74
|
+
# """
|
|
75
|
+
#
|
|
76
|
+
# additional_allowed_model_components: RegularExpressionValidators
|
|
77
|
+
# """
|
|
78
|
+
# Additional model components that can be included when running this experiment
|
|
79
|
+
# """
|
|
80
|
+
|
|
81
|
+
description: str
|
|
82
|
+
"""
|
|
83
|
+
Experiment description
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
experiment: str
|
|
87
|
+
"""
|
|
88
|
+
Experiment description (same as description)
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
# TODO: check if we should switch to timestamps
|
|
92
|
+
start_year: int | None
|
|
93
|
+
"""Start year of the experiment"""
|
|
94
|
+
|
|
95
|
+
end_year: int | None
|
|
96
|
+
"""End year of the experiment"""
|
|
97
|
+
|
|
98
|
+
min_number_yrs_per_sim: int | None
|
|
99
|
+
"""Minimum number of years of simulation required"""
|
|
100
|
+
|
|
101
|
+
experiment_id: str
|
|
102
|
+
"""
|
|
103
|
+
Experiment ID
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
# # Not a thing anymore, hence remove
|
|
107
|
+
# host_collection: str
|
|
108
|
+
# """
|
|
109
|
+
# Host collection of this experiment
|
|
110
|
+
# """
|
|
111
|
+
|
|
112
|
+
parent_activity_id: list[str]
|
|
113
|
+
"""Activity ID for the parent of this experiment"""
|
|
114
|
+
|
|
115
|
+
parent_experiment_id: list[str]
|
|
116
|
+
"""Experiment ID for the parent of this experiment"""
|
|
117
|
+
|
|
118
|
+
tier: int
|
|
119
|
+
"""
|
|
120
|
+
Tier i.e. priority of this experiment
|
|
121
|
+
|
|
122
|
+
Lower is higher priority i.e. 1 is the highest priority
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class CMORFrequencyDefinition(BaseModel):
|
|
127
|
+
"""
|
|
128
|
+
CMOR frequency definition
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
approx_interval: float
|
|
132
|
+
"""
|
|
133
|
+
Approximate interval in days
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
description: str
|
|
137
|
+
"""
|
|
138
|
+
Description
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class CMORSpecificLicenseDefinition(BaseModel):
|
|
143
|
+
"""
|
|
144
|
+
CMOR-style specific license definition
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
license_type: str
|
|
148
|
+
"""
|
|
149
|
+
Type of the license
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
license_url: HttpUrl
|
|
153
|
+
"""
|
|
154
|
+
URL that describes the license
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class CMORLicenseDefinition(BaseModel):
|
|
159
|
+
"""
|
|
160
|
+
CMOR license definition
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
license_id: dict[str, CMORSpecificLicenseDefinition]
|
|
164
|
+
"""
|
|
165
|
+
Supported licenses
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
# (rightfully) not in esgvoc
|
|
169
|
+
license_template: str
|
|
170
|
+
"""
|
|
171
|
+
Template for writing license strings
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class CMORModelComponentDefintion(BaseModel):
|
|
176
|
+
"""
|
|
177
|
+
CMOR model component definition
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
description: str
|
|
181
|
+
"""Description"""
|
|
182
|
+
|
|
183
|
+
native_nominal_resolution: str
|
|
184
|
+
"""Native nominal resolution of this component"""
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class CMORSourceDefinition(BaseModel):
|
|
188
|
+
"""
|
|
189
|
+
CMOR source definition
|
|
190
|
+
|
|
191
|
+
The meaning of 'source' is a bit fuzzy across projects,
|
|
192
|
+
but for CMIP phases it refers to the model which provided the simulation.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
# # Don't think this is used or relevant hence drop
|
|
196
|
+
# activity_participation: RegularExpressionValidators
|
|
197
|
+
# """
|
|
198
|
+
# Activities in which this source has participated
|
|
199
|
+
# """
|
|
200
|
+
|
|
201
|
+
# # Don't know what this is hence drop
|
|
202
|
+
# cohort: RegularExpressionValidators
|
|
203
|
+
# """
|
|
204
|
+
# Cohort to which this source belongs
|
|
205
|
+
#
|
|
206
|
+
# TODO: clarify what this means
|
|
207
|
+
# """
|
|
208
|
+
|
|
209
|
+
institution_id: RegularExpressionValidators
|
|
210
|
+
"""
|
|
211
|
+
Institution ID for this source
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
label: str
|
|
215
|
+
"""
|
|
216
|
+
Label to use for this source ID
|
|
217
|
+
|
|
218
|
+
TODO: check, does this mean in graphs/plots?
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
label_extended: str
|
|
222
|
+
"""
|
|
223
|
+
Extended label to use for this source ID
|
|
224
|
+
|
|
225
|
+
TODO: check, does this mean in graphs/plots?
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
model_component: dict[str, CMORModelComponentDefintion]
|
|
229
|
+
"""
|
|
230
|
+
Model components of this source
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
# # Not relevant hence drop
|
|
234
|
+
# release_year: int | None
|
|
235
|
+
# """
|
|
236
|
+
# Release year of the model/source
|
|
237
|
+
#
|
|
238
|
+
# `None` if the release concept does not apply to this source
|
|
239
|
+
# """
|
|
240
|
+
|
|
241
|
+
source: str
|
|
242
|
+
"""
|
|
243
|
+
Source information
|
|
244
|
+
|
|
245
|
+
Combination of source name and information about each model component
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
source_id: str
|
|
249
|
+
"""
|
|
250
|
+
Source ID for `self`
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def convert_none_value_to_empty_string(v: Any) -> Any:
|
|
255
|
+
return v if v is not None else ""
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def remove_none_values_from_dict(inv: dict[str, Any]) -> dict[str, Any]:
|
|
259
|
+
res = {}
|
|
260
|
+
for k, v in inv.items():
|
|
261
|
+
if isinstance(v, list):
|
|
262
|
+
res[k] = [convert_none_value_to_empty_string(vv) for vv in v]
|
|
263
|
+
|
|
264
|
+
elif isinstance(v, dict):
|
|
265
|
+
res[k] = remove_none_values_from_dict(v)
|
|
266
|
+
|
|
267
|
+
else:
|
|
268
|
+
res[k] = convert_none_value_to_empty_string(v)
|
|
269
|
+
|
|
270
|
+
return res
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class CMORCVsTable(BaseModel):
|
|
274
|
+
"""
|
|
275
|
+
Representation of the JSON table required by CMOR for CVs
|
|
276
|
+
CMOR also takes in variable tables,
|
|
277
|
+
as well as a user input table.
|
|
278
|
+
This model doesn't consider those tables
|
|
279
|
+
or their interactions with this table at the moment.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
model_config = ConfigDict(extra="forbid")
|
|
283
|
+
|
|
284
|
+
DRS: CMORDRSDefinition
|
|
285
|
+
"""
|
|
286
|
+
CMOR definition of the data reference syntax
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
# Note; not a required global attribute hence dropped
|
|
290
|
+
# archive_id: AllowedDict
|
|
291
|
+
# """
|
|
292
|
+
# Allowed values of `archive_id`
|
|
293
|
+
# """
|
|
294
|
+
|
|
295
|
+
activity_id: AllowedDict
|
|
296
|
+
"""
|
|
297
|
+
Allowed values of `activity_id`
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
area_label: AllowedDict
|
|
301
|
+
"""
|
|
302
|
+
Allowed values of `area_label`
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
branding_suffix: str
|
|
306
|
+
"""
|
|
307
|
+
Template for branding suffix
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
creation_date: RegularExpressionValidators
|
|
311
|
+
"""
|
|
312
|
+
Allowed patterns for `creation_date`
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
data_specs_version: str
|
|
316
|
+
"""
|
|
317
|
+
Allowed value of `data_specs_version`
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
drs_specs: AllowedDict
|
|
321
|
+
"""
|
|
322
|
+
Allowed values of `drs_specs`
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
experiment_id: dict[str, CMORExperimentDefinition]
|
|
326
|
+
"""
|
|
327
|
+
CMOR-style experiment definitions
|
|
328
|
+
"""
|
|
329
|
+
|
|
330
|
+
forcing_index: RegularExpressionValidators
|
|
331
|
+
"""
|
|
332
|
+
Allowed patterns for `forcing_index`
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
frequency: AllowedDict
|
|
336
|
+
"""
|
|
337
|
+
Allowed values of `frequency`
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
grid_label: AllowedDict
|
|
341
|
+
"""
|
|
342
|
+
Allowed values of `grid_label`
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
horizontal_label: AllowedDict
|
|
346
|
+
"""
|
|
347
|
+
Allowed values of `horizontal_label`
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
initialization_index: RegularExpressionValidators
|
|
351
|
+
"""
|
|
352
|
+
Allowed patterns for `initialization_index`
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
institution_id: AllowedDict
|
|
356
|
+
"""
|
|
357
|
+
Allowed values of `institution_id`
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
license: CMORLicenseDefinition
|
|
361
|
+
"""
|
|
362
|
+
CMOR-style license definition
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
mip_era: str
|
|
366
|
+
"""
|
|
367
|
+
Allowed value of `mip_era`
|
|
368
|
+
"""
|
|
369
|
+
|
|
370
|
+
nominal_resolution: RegularExpressionValidators
|
|
371
|
+
"""
|
|
372
|
+
Allowed values of `nominal_resolution`
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
physics_index: RegularExpressionValidators
|
|
376
|
+
"""
|
|
377
|
+
Allowed patterns for `physics_index`
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
product: AllowedDict
|
|
381
|
+
"""
|
|
382
|
+
Allowed values of `product`
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
realization_index: RegularExpressionValidators
|
|
386
|
+
"""
|
|
387
|
+
Allowed patterns for `realization_index`
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
realm: AllowedDict
|
|
391
|
+
"""
|
|
392
|
+
Allowed values of `realm`
|
|
393
|
+
"""
|
|
394
|
+
|
|
395
|
+
region: AllowedDict
|
|
396
|
+
"""
|
|
397
|
+
Allowed values of `region`
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
required_global_attributes: list[str]
|
|
401
|
+
"""
|
|
402
|
+
Required global attributes
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
source_id: dict[str, CMORSourceDefinition]
|
|
406
|
+
"""
|
|
407
|
+
CMOR-style source definitions
|
|
408
|
+
"""
|
|
409
|
+
|
|
410
|
+
temporal_label: AllowedDict
|
|
411
|
+
"""
|
|
412
|
+
Allowed values of `temporal_label`
|
|
413
|
+
"""
|
|
414
|
+
|
|
415
|
+
tracking_id: RegularExpressionValidators
|
|
416
|
+
"""
|
|
417
|
+
Allowed patterns for `tracking_id`
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
variant_label: RegularExpressionValidators
|
|
421
|
+
"""
|
|
422
|
+
Allowed patterns for `variant_label`
|
|
423
|
+
"""
|
|
424
|
+
|
|
425
|
+
vertical_label: AllowedDict
|
|
426
|
+
"""
|
|
427
|
+
Allowed values of `vertical_label`
|
|
428
|
+
"""
|
|
429
|
+
|
|
430
|
+
def to_cvs_json(
|
|
431
|
+
self, top_level_key: str = "CV"
|
|
432
|
+
) -> dict[str, dict[str, str, AllowedDict, RegularExpressionValidators]]:
|
|
433
|
+
md = self.model_dump(mode="json")
|
|
434
|
+
|
|
435
|
+
# # Unclear why this is done for some keys and not others,
|
|
436
|
+
# # which makes reasoning hard.
|
|
437
|
+
# to_hyphenise = list(md["drs"].keys())
|
|
438
|
+
# for k in to_hyphenise:
|
|
439
|
+
# md["drs"][k.replace("_", "-")] = md["drs"].pop(k)
|
|
440
|
+
#
|
|
441
|
+
# md["experiment_id"] = {k: v.to_json() for k, v in self.experiment_id.experiments.items()}
|
|
442
|
+
# # More fun
|
|
443
|
+
# md["DRS"] = md.pop("drs")
|
|
444
|
+
|
|
445
|
+
md_no_none = remove_none_values_from_dict(md)
|
|
446
|
+
|
|
447
|
+
cvs_json = {top_level_key: md_no_none}
|
|
448
|
+
|
|
449
|
+
return cvs_json
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def get_project_attribute_property(
|
|
453
|
+
attribute_value: str, attribute_to_match: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
454
|
+
) -> ev_api.project_specs.AttributeProperty:
|
|
455
|
+
for ev_attribute_property in ev_project.attr_specs:
|
|
456
|
+
if getattr(ev_attribute_property, attribute_to_match) == attribute_value:
|
|
457
|
+
break
|
|
458
|
+
|
|
459
|
+
else:
|
|
460
|
+
msg = f"Nothing in attr_specs had {attribute_to_match} equal to {attribute_value}"
|
|
461
|
+
raise KeyError(msg)
|
|
462
|
+
|
|
463
|
+
return ev_attribute_property
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def get_allowed_dict_for_attribute(attribute_name: str, ev_project: ev_api.project_specs.ProjectSpecs) -> AllowedDict:
|
|
467
|
+
ev_attribute_property = get_project_attribute_property(
|
|
468
|
+
attribute_value=attribute_name,
|
|
469
|
+
attribute_to_match="field_name",
|
|
470
|
+
ev_project=ev_project,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
attribute_instances = ev_api.get_all_terms_in_collection(
|
|
474
|
+
ev_project.project_id, ev_attribute_property.source_collection
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
res = {v.drs_name: v.description for v in attribute_instances}
|
|
478
|
+
|
|
479
|
+
return res
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def convert_python_regex_to_cmor_regex(inv: str) -> list[str]:
|
|
483
|
+
# Not ideal that we have to do this ourselves,
|
|
484
|
+
# but I can't see another way
|
|
485
|
+
# (it doesn't make sense to use posix regex in the CV JSON
|
|
486
|
+
# because then esgvoc's Python API won't work)
|
|
487
|
+
|
|
488
|
+
if "|" in inv:
|
|
489
|
+
or_sections = re.findall(r"\([^|(]*\|[^)]*\)", inv)
|
|
490
|
+
if not or_sections:
|
|
491
|
+
raise AssertionError(inv)
|
|
492
|
+
|
|
493
|
+
substitution_components = []
|
|
494
|
+
for or_section in or_sections:
|
|
495
|
+
tmp = []
|
|
496
|
+
for subs in (v.strip("()") for v in or_section.split("|")):
|
|
497
|
+
tmp.append((or_section, subs))
|
|
498
|
+
|
|
499
|
+
substitution_components.append(tmp)
|
|
500
|
+
|
|
501
|
+
to_substitute = []
|
|
502
|
+
for substitution_set in itertools.product(*substitution_components):
|
|
503
|
+
filled = inv
|
|
504
|
+
for old, new in substitution_set:
|
|
505
|
+
filled = filled.replace(old, new)
|
|
506
|
+
|
|
507
|
+
to_substitute.append(filled)
|
|
508
|
+
|
|
509
|
+
else:
|
|
510
|
+
to_substitute = [inv]
|
|
511
|
+
|
|
512
|
+
res = []
|
|
513
|
+
for start in to_substitute:
|
|
514
|
+
# Get rid of Python style capturing groups.
|
|
515
|
+
# Super brittle, might break if there are brackets inside the caught exptmpsion.
|
|
516
|
+
# We'll have to fix as we find problems, regex is annoyingly complicated.
|
|
517
|
+
tmp = re.sub(r"\(\?P\<[^>]*\>([^)]*)\)", r"\1", start)
|
|
518
|
+
|
|
519
|
+
# Other things we seem to have to change
|
|
520
|
+
tmp = tmp.replace("{", r"\{")
|
|
521
|
+
tmp = tmp.replace("}", r"\}")
|
|
522
|
+
tmp = tmp.replace("(", r"\(")
|
|
523
|
+
tmp = tmp.replace(")", r"\)")
|
|
524
|
+
tmp = tmp.replace(r"\d", "[[:digit:]]")
|
|
525
|
+
tmp = tmp.replace("+", r"\{1,\}")
|
|
526
|
+
tmp = tmp.replace("?", r"\{0,\}")
|
|
527
|
+
|
|
528
|
+
res.append(tmp)
|
|
529
|
+
|
|
530
|
+
return res
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def get_regular_expression_validator_for_attribute(
|
|
534
|
+
attribute_property: ev_api.project_specs.AttributeProperty,
|
|
535
|
+
ev_project: ev_api.project_specs.ProjectSpecs,
|
|
536
|
+
) -> RegularExpressionValidators:
|
|
537
|
+
attribute_instances = ev_api.get_all_terms_in_collection(
|
|
538
|
+
ev_project.project_id, attribute_property.source_collection
|
|
539
|
+
)
|
|
540
|
+
res = []
|
|
541
|
+
for v in attribute_instances:
|
|
542
|
+
res.extend(convert_python_regex_to_cmor_regex(v.regex))
|
|
543
|
+
|
|
544
|
+
return res
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def get_template_for_composite_attribute(attribute_name: str, ev_project: ev_api.project_specs.ProjectSpecs) -> str:
|
|
548
|
+
ev_attribute_property = get_project_attribute_property(
|
|
549
|
+
attribute_value=attribute_name,
|
|
550
|
+
attribute_to_match="field_name",
|
|
551
|
+
ev_project=ev_project,
|
|
552
|
+
)
|
|
553
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, ev_attribute_property.source_collection)
|
|
554
|
+
if len(terms) > 1:
|
|
555
|
+
raise AssertionError(terms)
|
|
556
|
+
|
|
557
|
+
term = terms[0]
|
|
558
|
+
|
|
559
|
+
parts_l = []
|
|
560
|
+
for v in term.parts:
|
|
561
|
+
va = get_project_attribute_property(v.type, "source_collection", ev_project)
|
|
562
|
+
parts_l.append(f"<{va.field_name}>")
|
|
563
|
+
|
|
564
|
+
if term.separator != "-":
|
|
565
|
+
msg = f"CMOR only supports '-' as a separator, received {term.separator=} for {term=}"
|
|
566
|
+
raise NotImplementedError(msg)
|
|
567
|
+
|
|
568
|
+
res = "".join(parts_l)
|
|
569
|
+
|
|
570
|
+
return res
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def get_single_allowed_value_for_attribute(attribute_name: str, ev_project: ev_api.project_specs.ProjectSpecs) -> str:
|
|
574
|
+
ev_attribute_property = get_project_attribute_property(
|
|
575
|
+
attribute_value=attribute_name,
|
|
576
|
+
attribute_to_match="field_name",
|
|
577
|
+
ev_project=ev_project,
|
|
578
|
+
)
|
|
579
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, ev_attribute_property.source_collection)
|
|
580
|
+
if len(terms) > 1:
|
|
581
|
+
raise AssertionError(terms)
|
|
582
|
+
|
|
583
|
+
term = terms[0]
|
|
584
|
+
|
|
585
|
+
res = term.drs_name
|
|
586
|
+
|
|
587
|
+
return res
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def get_cmor_license_definition(
|
|
591
|
+
source_collection: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
592
|
+
) -> CMORLicenseDefinition:
|
|
593
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, source_collection)
|
|
594
|
+
|
|
595
|
+
license_ids_d = {
|
|
596
|
+
v.drs_name: CMORSpecificLicenseDefinition(
|
|
597
|
+
license_type=v.description,
|
|
598
|
+
license_url=v.url,
|
|
599
|
+
)
|
|
600
|
+
for v in terms
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
res = CMORLicenseDefinition(
|
|
604
|
+
license_id=license_ids_d,
|
|
605
|
+
license_template=(
|
|
606
|
+
"<license_id>; CMIP7 data produced by <institution_id> "
|
|
607
|
+
"is licensed under a <license_type> License (<license_url>). "
|
|
608
|
+
"Consult [TODO terms of use link] for terms of use governing CMIP7 output, "
|
|
609
|
+
"including citation requirements and proper acknowledgment. "
|
|
610
|
+
"The data producers and data providers make no warranty, "
|
|
611
|
+
"either express or implied, including, but not limited to, "
|
|
612
|
+
"warranties of merchantability and fitness for a particular purpose. "
|
|
613
|
+
"All liabilities arising from the supply of the information "
|
|
614
|
+
"(including any liability arising in negligence) "
|
|
615
|
+
"are excluded to the fullest extent permitted by law."
|
|
616
|
+
),
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
return res
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def get_approx_interval(interval: float, units: str) -> float:
|
|
623
|
+
try:
|
|
624
|
+
import pint
|
|
625
|
+
|
|
626
|
+
ur = pint.get_application_registry()
|
|
627
|
+
except ImportError as exc:
|
|
628
|
+
msg = "Missing optional dependency `pint`, please install"
|
|
629
|
+
raise ImportError(msg) from exc
|
|
630
|
+
|
|
631
|
+
if units == "month":
|
|
632
|
+
# Special case, month is 30 days
|
|
633
|
+
res = interval * 30.0
|
|
634
|
+
else:
|
|
635
|
+
res = ur.Quantity(interval, units).to("day").m
|
|
636
|
+
|
|
637
|
+
return res
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def get_cmor_experiment_id_definitions(
|
|
641
|
+
source_collection: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
642
|
+
) -> dict[str, CMORExperimentDefinition]:
|
|
643
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, source_collection)
|
|
644
|
+
|
|
645
|
+
get_term = partial(ev_api.get_term_in_project, ev_project.project_id)
|
|
646
|
+
res = {}
|
|
647
|
+
for v in terms:
|
|
648
|
+
res[v.drs_name] = CMORExperimentDefinition(
|
|
649
|
+
activity_id=[get_term(v.activity).drs_name],
|
|
650
|
+
# required_model_components=[vv.drs_name for vv in v.required_model_components],
|
|
651
|
+
# additional_allowed_model_components=[vv.drs_name for vv in v.additional_allowed_model_components],
|
|
652
|
+
description=v.description,
|
|
653
|
+
experiment=v.description,
|
|
654
|
+
start_year=v.start_timestamp.year if v.start_timestamp else v.start_timestamp,
|
|
655
|
+
end_year=v.end_timestamp.year if v.end_timestamp else v.end_timestamp,
|
|
656
|
+
min_number_yrs_per_sim=v.min_number_yrs_per_sim,
|
|
657
|
+
experiment_id=v.drs_name,
|
|
658
|
+
parent_activity_id=[v.parent_activity.drs_name] if v.parent_activity else [],
|
|
659
|
+
parent_experiment_id=[v.parent_experiment.drs_name] if v.parent_experiment else [],
|
|
660
|
+
tier=v.tier,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
return res
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def get_cmor_nominal_resolution_defintions(
|
|
667
|
+
source_collection: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
668
|
+
) -> list[str]:
|
|
669
|
+
try:
|
|
670
|
+
import pint
|
|
671
|
+
|
|
672
|
+
ur = pint.get_application_registry()
|
|
673
|
+
except ImportError as exc:
|
|
674
|
+
msg = "Missing optional dependency `pint`, please install"
|
|
675
|
+
raise ImportError(msg) from exc
|
|
676
|
+
|
|
677
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, source_collection)
|
|
678
|
+
res = []
|
|
679
|
+
for t in terms:
|
|
680
|
+
size_km = ur.Quantity(t.magnitude, t.units).to("km").m
|
|
681
|
+
if int(size_km) == size_km:
|
|
682
|
+
allowed = f"{size_km:.0f} km"
|
|
683
|
+
else:
|
|
684
|
+
allowed = f"{size_km:.1f} km"
|
|
685
|
+
|
|
686
|
+
res.append(allowed)
|
|
687
|
+
|
|
688
|
+
return sorted(res)
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def get_cmor_source_id_definitions(
|
|
692
|
+
source_collection: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
693
|
+
) -> dict[str, CMORSourceDefinition]:
|
|
694
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, source_collection)
|
|
695
|
+
|
|
696
|
+
get_term = partial(ev_api.get_term_in_project, ev_project.project_id)
|
|
697
|
+
res = {}
|
|
698
|
+
for v in terms:
|
|
699
|
+
model_components = {}
|
|
700
|
+
for mc in v.model_components:
|
|
701
|
+
raise NotImplementedError(mc)
|
|
702
|
+
|
|
703
|
+
source = "\n".join([f"{v.drs_name}:", *[f"{key}: {v.description}" for key, v in model_components.items()]])
|
|
704
|
+
res[v.drs_name] = CMORSourceDefinition(
|
|
705
|
+
institution_id=[get_term(vv).drs_name for vv in v.contributors],
|
|
706
|
+
label=v.label,
|
|
707
|
+
label_extended=v.label_extended,
|
|
708
|
+
model_component=model_components,
|
|
709
|
+
source=source,
|
|
710
|
+
source_id=v.drs_name,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
return res
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def get_cmor_frequency_definitions(
|
|
717
|
+
source_collection: str, ev_project: ev_api.project_specs.ProjectSpecs
|
|
718
|
+
) -> dict[str, CMORFrequencyDefinition]:
|
|
719
|
+
terms = ev_api.get_all_terms_in_collection(ev_project.project_id, source_collection)
|
|
720
|
+
|
|
721
|
+
res = {
|
|
722
|
+
v.drs_name: CMORFrequencyDefinition(
|
|
723
|
+
description=v.description,
|
|
724
|
+
approx_interval=get_approx_interval(v.interval, units=v.units),
|
|
725
|
+
)
|
|
726
|
+
if v.interval
|
|
727
|
+
# I'm still not convinced that it wouldn't be simpler to use the same schema for all types
|
|
728
|
+
else "fixed (time invariant) field"
|
|
729
|
+
for v in terms
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
return res
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def get_cmor_drs_definition(ev_project: ev_api.project_specs.ProjectSpecs) -> CMORDRSDefinition:
|
|
736
|
+
# Creating a valid example is quite hard because of the coupling between elements.
|
|
737
|
+
# Try and anticipate those here.
|
|
738
|
+
# Note that a perfect way to do this is beyond me right now.
|
|
739
|
+
# grid region
|
|
740
|
+
activity_example = ev_api.get_term_in_collection(ev_project.project_id, "activity", "cmip")
|
|
741
|
+
experiment_example = ev_api.get_term_in_collection(
|
|
742
|
+
ev_project.project_id, "experiment", activity_example.experiments[0]
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
institution_example = ev_api.get_all_terms_in_collection(ev_project.project_id, "organisation")[0]
|
|
746
|
+
sources = ev_api.get_all_terms_in_collection(ev_project.project_id, "source")
|
|
747
|
+
for source in sources:
|
|
748
|
+
if institution_example.id in source.contributors:
|
|
749
|
+
source_example = source
|
|
750
|
+
break
|
|
751
|
+
else:
|
|
752
|
+
msg = f"No example source found for {institution_example.id}"
|
|
753
|
+
raise AssertionError(msg)
|
|
754
|
+
|
|
755
|
+
grid_example = ev_api.get_all_terms_in_collection(ev_project.project_id, "grid")[0]
|
|
756
|
+
region_example = ev_api.get_term_in_collection(ev_project.project_id, "region", grid_example.region)
|
|
757
|
+
|
|
758
|
+
frequency_example = "mon"
|
|
759
|
+
time_range_example = "185001-202112"
|
|
760
|
+
|
|
761
|
+
# Creating example regexp terms on the fly also doesn't work
|
|
762
|
+
variant_label_example = "r1i1p1f1"
|
|
763
|
+
branded_suffix_example = "tavg-h2m-hxy-u"
|
|
764
|
+
|
|
765
|
+
directory_path_template_l = []
|
|
766
|
+
directory_path_example_l = []
|
|
767
|
+
for part in ev_project.drs_specs["directory"].parts:
|
|
768
|
+
if not part.is_required:
|
|
769
|
+
raise NotImplementedError
|
|
770
|
+
|
|
771
|
+
if part.source_collection == "directory_date":
|
|
772
|
+
# Maybe should be using catalogue specs rather than attr specs?
|
|
773
|
+
# Hard-coded CMOR weirdness
|
|
774
|
+
directory_path_template_l.append("<version>")
|
|
775
|
+
directory_path_example_l.append("20251104")
|
|
776
|
+
|
|
777
|
+
continue
|
|
778
|
+
|
|
779
|
+
project_attribute_property = get_project_attribute_property(
|
|
780
|
+
attribute_value=part.source_collection, attribute_to_match="source_collection", ev_project=ev_project
|
|
781
|
+
)
|
|
782
|
+
directory_path_template_l.append(f"<{project_attribute_property.field_name}>")
|
|
783
|
+
|
|
784
|
+
if part.source_collection == "activity":
|
|
785
|
+
directory_path_example_l.append(activity_example.drs_name)
|
|
786
|
+
elif part.source_collection == "experiment":
|
|
787
|
+
directory_path_example_l.append(experiment_example.drs_name)
|
|
788
|
+
elif part.source_collection == "frequency":
|
|
789
|
+
directory_path_example_l.append(frequency_example)
|
|
790
|
+
elif part.source_collection == "institution":
|
|
791
|
+
directory_path_example_l.append(institution_example.drs_name)
|
|
792
|
+
elif part.source_collection == "source":
|
|
793
|
+
directory_path_example_l.append(source_example.drs_name)
|
|
794
|
+
elif part.source_collection == "grid":
|
|
795
|
+
directory_path_example_l.append(grid_example.drs_name)
|
|
796
|
+
elif part.source_collection == "region":
|
|
797
|
+
directory_path_example_l.append(region_example.drs_name)
|
|
798
|
+
elif part.source_collection == "variant_label":
|
|
799
|
+
# Urgh
|
|
800
|
+
directory_path_example_l.append(variant_label_example)
|
|
801
|
+
elif part.source_collection == "branded_suffix":
|
|
802
|
+
# Urgh
|
|
803
|
+
directory_path_example_l.append(branded_suffix_example)
|
|
804
|
+
else:
|
|
805
|
+
example_drs_name = ev_api.get_all_terms_in_collection(ev_project.project_id, part.source_collection)[
|
|
806
|
+
0
|
|
807
|
+
].drs_name
|
|
808
|
+
directory_path_example_l.append(example_drs_name)
|
|
809
|
+
|
|
810
|
+
directory_path_template = ev_project.drs_specs["directory"].separator.join(directory_path_template_l)
|
|
811
|
+
directory_path_example = ev_project.drs_specs["directory"].separator.join(directory_path_example_l)
|
|
812
|
+
|
|
813
|
+
filename_path_template_l = []
|
|
814
|
+
filename_path_example_l = []
|
|
815
|
+
for i, part in enumerate(ev_project.drs_specs["file_name"].parts):
|
|
816
|
+
if i > 0:
|
|
817
|
+
prefix = ev_project.drs_specs["file_name"].separator
|
|
818
|
+
else:
|
|
819
|
+
prefix = ""
|
|
820
|
+
|
|
821
|
+
if part.source_collection == "time_range":
|
|
822
|
+
# Maybe should be using catalogue specs rather than attr specs?
|
|
823
|
+
# Hard-coded CMOR weirdness
|
|
824
|
+
cmor_placeholder = "timeRange"
|
|
825
|
+
example_value = time_range_example
|
|
826
|
+
|
|
827
|
+
else:
|
|
828
|
+
project_attribute_property = get_project_attribute_property(
|
|
829
|
+
attribute_value=part.source_collection, attribute_to_match="source_collection", ev_project=ev_project
|
|
830
|
+
)
|
|
831
|
+
cmor_placeholder = project_attribute_property.field_name
|
|
832
|
+
|
|
833
|
+
if part.source_collection == "experiment":
|
|
834
|
+
example_value = experiment_example.drs_name
|
|
835
|
+
elif part.source_collection == "frequency":
|
|
836
|
+
example_value = frequency_example
|
|
837
|
+
elif part.source_collection == "source":
|
|
838
|
+
example_value = source_example.drs_name
|
|
839
|
+
elif part.source_collection == "grid":
|
|
840
|
+
example_value = grid_example.drs_name
|
|
841
|
+
elif part.source_collection == "region":
|
|
842
|
+
example_value = region_example.drs_name
|
|
843
|
+
elif part.source_collection == "variant_label":
|
|
844
|
+
# Urgh
|
|
845
|
+
example_value = variant_label_example
|
|
846
|
+
elif part.source_collection == "branded_suffix":
|
|
847
|
+
# Urgh
|
|
848
|
+
example_value = branded_suffix_example
|
|
849
|
+
else:
|
|
850
|
+
example_value = ev_api.get_all_terms_in_collection(ev_project.project_id, part.source_collection)[
|
|
851
|
+
0
|
|
852
|
+
].drs_name
|
|
853
|
+
|
|
854
|
+
if part.is_required:
|
|
855
|
+
filename_path_template_l.append(f"{prefix}<{cmor_placeholder}>")
|
|
856
|
+
else:
|
|
857
|
+
filename_path_template_l.append(f"[{prefix}<{cmor_placeholder}>]")
|
|
858
|
+
|
|
859
|
+
filename_path_example_l.append(f"{prefix}{example_value}")
|
|
860
|
+
|
|
861
|
+
filename_path_template_excl_ext = "".join(filename_path_template_l)
|
|
862
|
+
filename_path_template = f"{filename_path_template_excl_ext}.nc"
|
|
863
|
+
filename_path_example_excl_ext = "".join(filename_path_example_l)
|
|
864
|
+
filename_path_example = f"{filename_path_example_excl_ext}.nc"
|
|
865
|
+
|
|
866
|
+
res = CMORDRSDefinition(
|
|
867
|
+
directory_path_example=directory_path_example,
|
|
868
|
+
directory_path_template=directory_path_template,
|
|
869
|
+
filename_path_example=filename_path_example,
|
|
870
|
+
filename_path_template=filename_path_template,
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
return res
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def generate_cvs_table(project: str) -> CMORCVsTable:
|
|
877
|
+
ev_project = ev_api.projects.get_project(project)
|
|
878
|
+
|
|
879
|
+
init_kwargs = {"required_global_attributes": []}
|
|
880
|
+
for attr_property in ev_project.attr_specs:
|
|
881
|
+
if attr_property.is_required:
|
|
882
|
+
init_kwargs["required_global_attributes"].append(attr_property.field_name)
|
|
883
|
+
|
|
884
|
+
# Logic: https://github.com/WCRP-CMIP/CMIP7-CVs/issues/271#issuecomment-3286291815
|
|
885
|
+
if attr_property.field_name in [
|
|
886
|
+
"Conventions",
|
|
887
|
+
"branded_variable",
|
|
888
|
+
"variable_id",
|
|
889
|
+
]:
|
|
890
|
+
# Not handled in CMOR tables
|
|
891
|
+
continue
|
|
892
|
+
|
|
893
|
+
elif attr_property.field_name in [
|
|
894
|
+
"data_specs_version",
|
|
895
|
+
"mip_era",
|
|
896
|
+
]:
|
|
897
|
+
# Special single value entries
|
|
898
|
+
value = get_single_allowed_value_for_attribute(attr_property.field_name, ev_project)
|
|
899
|
+
kwarg = attr_property.field_name
|
|
900
|
+
|
|
901
|
+
elif attr_property.field_name == "license_id":
|
|
902
|
+
value = get_cmor_license_definition(attr_property.source_collection, ev_project)
|
|
903
|
+
kwarg = "license"
|
|
904
|
+
|
|
905
|
+
elif attr_property.field_name == "frequency":
|
|
906
|
+
value = get_cmor_frequency_definitions(attr_property.source_collection, ev_project)
|
|
907
|
+
kwarg = attr_property.field_name
|
|
908
|
+
|
|
909
|
+
elif attr_property.field_name == "experiment_id":
|
|
910
|
+
value = get_cmor_experiment_id_definitions(attr_property.source_collection, ev_project)
|
|
911
|
+
kwarg = attr_property.field_name
|
|
912
|
+
|
|
913
|
+
elif attr_property.field_name == "nominal_resolution":
|
|
914
|
+
kwarg = attr_property.field_name
|
|
915
|
+
value = get_cmor_nominal_resolution_defintions(attr_property.field_name, ev_project)
|
|
916
|
+
|
|
917
|
+
elif attr_property.field_name == "source_id":
|
|
918
|
+
value = get_cmor_source_id_definitions(attr_property.source_collection, ev_project)
|
|
919
|
+
kwarg = attr_property.field_name
|
|
920
|
+
|
|
921
|
+
elif attr_property.field_name in ("activity_id",):
|
|
922
|
+
# Hard-code for now
|
|
923
|
+
# TODO: figure out how to unpack typing.Annotated
|
|
924
|
+
kwarg = attr_property.field_name
|
|
925
|
+
value = get_allowed_dict_for_attribute(attr_property.field_name, ev_project)
|
|
926
|
+
|
|
927
|
+
else:
|
|
928
|
+
kwarg = attr_property.field_name
|
|
929
|
+
pydantic_class = ev_api.pydantic_handler.get_pydantic_class(attr_property.source_collection)
|
|
930
|
+
if issubclass(pydantic_class, ev_api.data_descriptors.data_descriptor.PlainTermDataDescriptor):
|
|
931
|
+
value = get_allowed_dict_for_attribute(attr_property.field_name, ev_project)
|
|
932
|
+
|
|
933
|
+
elif issubclass(pydantic_class, ev_api.data_descriptors.data_descriptor.PatternTermDataDescriptor):
|
|
934
|
+
value = get_regular_expression_validator_for_attribute(attr_property, ev_project)
|
|
935
|
+
|
|
936
|
+
elif issubclass(pydantic_class, ev_api.data_descriptors.data_descriptor.CompositeTermDataDescriptor):
|
|
937
|
+
value = get_template_for_composite_attribute(attr_property.field_name, ev_project)
|
|
938
|
+
|
|
939
|
+
else:
|
|
940
|
+
raise NotImplementedError(pydantic_class)
|
|
941
|
+
|
|
942
|
+
init_kwargs[kwarg] = value
|
|
943
|
+
|
|
944
|
+
init_kwargs["DRS"] = get_cmor_drs_definition(ev_project)
|
|
945
|
+
|
|
946
|
+
cmor_cvs_table = CMORCVsTable(**init_kwargs)
|
|
947
|
+
|
|
948
|
+
return cmor_cvs_table
|