esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +120 -4
- esgvoc/api/projects.py +733 -505
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +168 -98
- esgvoc/api/universe.py +368 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +46 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +57 -15
- esgvoc/core/db/models/universe.py +49 -10
- esgvoc/core/db/project_ingestion.py +79 -65
- esgvoc/core/db/universe_ingestion.py +71 -40
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/api/_utils.py +0 -53
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.3.0.dist-info/RECORD +0 -78
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/apps/drs/validator.py
CHANGED
|
@@ -2,17 +2,33 @@ from typing import cast
|
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
4
|
import esgvoc.apps.drs.constants as constants
|
|
5
|
-
from esgvoc.api import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
5
|
+
from esgvoc.api.project_specs import (
|
|
6
|
+
DrsCollection,
|
|
7
|
+
DrsConstant,
|
|
8
|
+
DrsPart,
|
|
9
|
+
DrsPartKind,
|
|
10
|
+
DrsSpecification,
|
|
11
|
+
DrsType,
|
|
12
|
+
ProjectSpecs,
|
|
13
|
+
)
|
|
14
|
+
from esgvoc.apps.drs.report import (
|
|
15
|
+
BlankTerm,
|
|
16
|
+
ComplianceIssue,
|
|
17
|
+
DrsIssue,
|
|
18
|
+
DrsValidationReport,
|
|
19
|
+
ExtraChar,
|
|
20
|
+
ExtraSeparator,
|
|
21
|
+
ExtraTerm,
|
|
22
|
+
FileNameExtensionIssue,
|
|
23
|
+
InvalidTerm,
|
|
24
|
+
MissingTerm,
|
|
25
|
+
ParsingIssue,
|
|
26
|
+
Space,
|
|
27
|
+
Unparsable,
|
|
28
|
+
ValidationError,
|
|
29
|
+
ValidationWarning,
|
|
30
|
+
)
|
|
31
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError
|
|
16
32
|
|
|
17
33
|
|
|
18
34
|
class DrsApplication:
|
|
@@ -25,9 +41,9 @@ class DrsApplication:
|
|
|
25
41
|
"""The project id."""
|
|
26
42
|
self.pedantic: bool = pedantic
|
|
27
43
|
"""Same as the option of GCC: turn warnings into errors. Default False."""
|
|
28
|
-
project_specs: ProjectSpecs|None = projects.
|
|
44
|
+
project_specs: ProjectSpecs | None = projects.get_project(project_id)
|
|
29
45
|
if not project_specs:
|
|
30
|
-
raise
|
|
46
|
+
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
31
47
|
for specs in project_specs.drs_specs:
|
|
32
48
|
match specs.type:
|
|
33
49
|
case DrsType.DIRECTORY:
|
|
@@ -40,7 +56,7 @@ class DrsApplication:
|
|
|
40
56
|
self.dataset_id_specs: DrsSpecification = specs
|
|
41
57
|
"""The DRS dataset id specs of the project."""
|
|
42
58
|
case _:
|
|
43
|
-
raise
|
|
59
|
+
raise EsgvocDbError(f"unsupported DRS specs type '{specs.type}'")
|
|
44
60
|
|
|
45
61
|
def _get_full_file_name_extension(self) -> str:
|
|
46
62
|
"""
|
|
@@ -55,8 +71,8 @@ class DrsApplication:
|
|
|
55
71
|
full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
|
|
56
72
|
specs.properties[constants.FILE_NAME_EXTENSION_KEY]
|
|
57
73
|
else:
|
|
58
|
-
raise
|
|
59
|
-
|
|
74
|
+
raise EsgvocDbError('missing properties in the DRS file name specifications of the ' +
|
|
75
|
+
f"project '{self.project_id}'")
|
|
60
76
|
return full_extension
|
|
61
77
|
|
|
62
78
|
|
|
@@ -66,7 +82,7 @@ class DrsValidator(DrsApplication):
|
|
|
66
82
|
"""
|
|
67
83
|
|
|
68
84
|
def validate_directory(self, drs_expression: str,
|
|
69
|
-
prefix: str|None = None) -> DrsValidationReport:
|
|
85
|
+
prefix: str | None = None) -> DrsValidationReport:
|
|
70
86
|
"""
|
|
71
87
|
Validate a DRS directory expression.
|
|
72
88
|
|
|
@@ -112,7 +128,7 @@ class DrsValidator(DrsApplication):
|
|
|
112
128
|
[issue], [])
|
|
113
129
|
return result
|
|
114
130
|
|
|
115
|
-
def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
|
|
131
|
+
def validate(self, drs_expression: str, drs_type: DrsType | str) -> DrsValidationReport:
|
|
116
132
|
"""
|
|
117
133
|
Validate a DRS expression.
|
|
118
134
|
|
|
@@ -131,14 +147,14 @@ class DrsValidator(DrsApplication):
|
|
|
131
147
|
case DrsType.DATASET_ID:
|
|
132
148
|
return self.validate_dataset_id(drs_expression=drs_expression)
|
|
133
149
|
case _:
|
|
134
|
-
raise
|
|
150
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
135
151
|
|
|
136
152
|
def _parse(self,
|
|
137
153
|
drs_expression: str,
|
|
138
154
|
separator: str,
|
|
139
|
-
drs_type: DrsType) -> tuple[list[str]|None, # terms
|
|
155
|
+
drs_type: DrsType) -> tuple[list[str] | None, # terms
|
|
140
156
|
list[DrsIssue], # Errors
|
|
141
|
-
list[DrsIssue]]:
|
|
157
|
+
list[DrsIssue]]: # Warnings
|
|
142
158
|
errors: list[DrsIssue] = list()
|
|
143
159
|
warnings: list[DrsIssue] = list()
|
|
144
160
|
cursor_offset = 0
|
|
@@ -160,7 +176,7 @@ class DrsValidator(DrsApplication):
|
|
|
160
176
|
terms = drs_expression.split(separator)
|
|
161
177
|
if len(terms) < 2:
|
|
162
178
|
errors.append(Unparsable(expected_drs_type=drs_type))
|
|
163
|
-
return None, errors, warnings
|
|
179
|
+
return None, errors, warnings # Early exit
|
|
164
180
|
max_term_index = len(terms)
|
|
165
181
|
cursor_position = initial_cursor_position = len(drs_expression) + 1
|
|
166
182
|
has_white_term = False
|
|
@@ -178,7 +194,10 @@ class DrsValidator(DrsApplication):
|
|
|
178
194
|
column = cursor_position+cursor_offset
|
|
179
195
|
if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
|
|
180
196
|
issue = ExtraSeparator(column=column)
|
|
181
|
-
|
|
197
|
+
if self.pedantic:
|
|
198
|
+
errors.append(issue)
|
|
199
|
+
else:
|
|
200
|
+
warnings.append(issue)
|
|
182
201
|
else:
|
|
183
202
|
issue = ExtraChar(column=column)
|
|
184
203
|
errors.append(issue)
|
|
@@ -188,7 +207,7 @@ class DrsValidator(DrsApplication):
|
|
|
188
207
|
if not term:
|
|
189
208
|
column = cursor_position + cursor_offset
|
|
190
209
|
issue = ExtraSeparator(column=column)
|
|
191
|
-
if
|
|
210
|
+
if self.pedantic or drs_type != DrsType.DIRECTORY or index == 0:
|
|
192
211
|
errors.append(issue)
|
|
193
212
|
else:
|
|
194
213
|
warnings.append(issue)
|
|
@@ -200,10 +219,10 @@ class DrsValidator(DrsApplication):
|
|
|
200
219
|
del terms[index]
|
|
201
220
|
cursor_position -= len_term + 1
|
|
202
221
|
|
|
203
|
-
#
|
|
204
|
-
sorted_errors = DrsValidator._sort_parser_issues(errors)
|
|
205
|
-
sorted_warnings = DrsValidator._sort_parser_issues(warnings)
|
|
206
|
-
return terms, sorted_errors, sorted_warnings
|
|
222
|
+
# Mypy doesn't understand that ParsingIssues are DrsIssues...
|
|
223
|
+
sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
|
|
224
|
+
sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
|
|
225
|
+
return terms, sorted_errors, sorted_warnings # type: ignore
|
|
207
226
|
|
|
208
227
|
@staticmethod
|
|
209
228
|
def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
|
|
@@ -213,13 +232,9 @@ class DrsValidator(DrsApplication):
|
|
|
213
232
|
match part.kind:
|
|
214
233
|
case DrsPartKind.COLLECTION:
|
|
215
234
|
casted_part: DrsCollection = cast(DrsCollection, part)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
casted_part.collection_id)
|
|
220
|
-
except Exception as e:
|
|
221
|
-
msg = f'problem while validating term: {e}.Abort.'
|
|
222
|
-
raise APIException(msg) from e
|
|
235
|
+
matching_terms = projects.valid_term_in_collection(term,
|
|
236
|
+
self.project_id,
|
|
237
|
+
casted_part.collection_id)
|
|
223
238
|
if len(matching_terms) > 0:
|
|
224
239
|
return True
|
|
225
240
|
else:
|
|
@@ -228,7 +243,7 @@ class DrsValidator(DrsApplication):
|
|
|
228
243
|
part_casted: DrsConstant = cast(DrsConstant, part)
|
|
229
244
|
return part_casted.value != term
|
|
230
245
|
case _:
|
|
231
|
-
raise
|
|
246
|
+
raise EsgvocDbError(f"unsupported DRS specs part type '{part.kind}'")
|
|
232
247
|
|
|
233
248
|
def _create_report(self,
|
|
234
249
|
type: DrsType,
|
|
@@ -245,7 +260,7 @@ class DrsValidator(DrsApplication):
|
|
|
245
260
|
specs: DrsSpecification) -> DrsValidationReport:
|
|
246
261
|
terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
|
|
247
262
|
if not terms:
|
|
248
|
-
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
263
|
+
return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
|
|
249
264
|
term_index = 0
|
|
250
265
|
term_max_index = len(terms)
|
|
251
266
|
part_index = 0
|
|
@@ -259,27 +274,27 @@ class DrsValidator(DrsApplication):
|
|
|
259
274
|
part_index += 1
|
|
260
275
|
matching_code_mapping[part.__str__()] = 0
|
|
261
276
|
elif part.kind == DrsPartKind.CONSTANT or \
|
|
262
|
-
cast(DrsCollection, part).is_required:
|
|
277
|
+
cast(DrsCollection, part).is_required: # noqa E127
|
|
263
278
|
issue: ComplianceIssue = InvalidTerm(term=term,
|
|
264
|
-
|
|
265
|
-
|
|
279
|
+
term_position=term_index+1,
|
|
280
|
+
collection_id_or_constant_value=str(part))
|
|
266
281
|
errors.append(issue)
|
|
267
282
|
matching_code_mapping[part.__str__()] = 1
|
|
268
283
|
term_index += 1
|
|
269
284
|
part_index += 1
|
|
270
|
-
else:
|
|
285
|
+
else: # The part is not required so try to match the term with the next part.
|
|
271
286
|
part_index += 1
|
|
272
287
|
matching_code_mapping[part.__str__()] = -1
|
|
273
288
|
if term_index == term_max_index:
|
|
274
289
|
break
|
|
275
290
|
# Cases:
|
|
276
291
|
# - All terms and collections have been processed.
|
|
277
|
-
#
|
|
292
|
+
# - Not enough term to process all collections.
|
|
278
293
|
# - Extra terms left whereas all collections have been processed:
|
|
279
294
|
# + The last collections are required => report extra terms.
|
|
280
295
|
# + The last collections are not required and these terms were not validated by them.
|
|
281
296
|
# => Should report error even if the collections are not required.
|
|
282
|
-
if part_index < part_max_index:
|
|
297
|
+
if part_index < part_max_index: # Missing terms.
|
|
283
298
|
for index in range(part_index, part_max_index):
|
|
284
299
|
part = specs.parts[index]
|
|
285
300
|
issue = MissingTerm(collection_id=str(part), collection_position=index+1)
|
|
@@ -288,43 +303,17 @@ class DrsValidator(DrsApplication):
|
|
|
288
303
|
errors.append(issue)
|
|
289
304
|
else:
|
|
290
305
|
warnings.append(issue)
|
|
291
|
-
elif term_index < term_max_index:
|
|
306
|
+
elif term_index < term_max_index: # Extra terms.
|
|
292
307
|
part_index -= term_max_index - term_index
|
|
293
308
|
for index in range(term_index, term_max_index):
|
|
294
309
|
term = terms[index]
|
|
295
310
|
part = specs.parts[part_index]
|
|
296
311
|
if part.kind != DrsPartKind.CONSTANT and \
|
|
297
312
|
(not cast(DrsCollection, part).is_required) and \
|
|
298
|
-
matching_code_mapping[part.__str__()] < 0:
|
|
313
|
+
matching_code_mapping[part.__str__()] < 0: # noqa E125
|
|
299
314
|
issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
|
|
300
315
|
else:
|
|
301
316
|
issue = ExtraTerm(term=term, term_position=index, collection_id=None)
|
|
302
317
|
errors.append(issue)
|
|
303
318
|
part_index += 1
|
|
304
319
|
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
if __name__ == "__main__":
|
|
308
|
-
project_id = 'cmip6plus'
|
|
309
|
-
validator = DrsValidator(project_id)
|
|
310
|
-
drs_expressions = [
|
|
311
|
-
".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
|
|
312
|
-
]
|
|
313
|
-
import time
|
|
314
|
-
for drs_expression in drs_expressions:
|
|
315
|
-
start_time = time.perf_counter_ns()
|
|
316
|
-
report = validator.validate_dataset_id(drs_expression)
|
|
317
|
-
stop_time = time.perf_counter_ns()
|
|
318
|
-
print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
|
|
319
|
-
if report.nb_errors > 0:
|
|
320
|
-
print(f'error(s): {report.nb_errors}')
|
|
321
|
-
for error in report.errors:
|
|
322
|
-
print(error)
|
|
323
|
-
else:
|
|
324
|
-
print('error(s): 0')
|
|
325
|
-
if report.nb_warnings > 0:
|
|
326
|
-
print(f'warning(s): {report.nb_warnings}')
|
|
327
|
-
for warning in report.warnings:
|
|
328
|
-
print(warning)
|
|
329
|
-
else:
|
|
330
|
-
print('warning(s): 0')
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
|
|
4
|
+
"title": "CMIP6 Extension",
|
|
5
|
+
"description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"stac_extensions"
|
|
9
|
+
],
|
|
10
|
+
"properties": {
|
|
11
|
+
"stac_extensions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"contains": {
|
|
14
|
+
"const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"$comment": "This is the schema for STAC Items.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": [
|
|
23
|
+
"type",
|
|
24
|
+
"properties"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"const": "Feature"
|
|
29
|
+
},
|
|
30
|
+
"properties": {
|
|
31
|
+
"allOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/require_any"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"$ref": "#/definitions/fields"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
+
"type": "object",
|
|
45
|
+
"required": [
|
|
46
|
+
"type",
|
|
47
|
+
"summaries"
|
|
48
|
+
],
|
|
49
|
+
"properties": {
|
|
50
|
+
"type": {
|
|
51
|
+
"const": "Collection"
|
|
52
|
+
},
|
|
53
|
+
"summaries": {
|
|
54
|
+
"$ref": "#/definitions/require_any"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"definitions": {
|
|
60
|
+
"require_any": {
|
|
61
|
+
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
+
},
|
|
63
|
+
"fields": {
|
|
64
|
+
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
},
|
|
68
|
+
"patternProperties": {
|
|
69
|
+
"^(?!cmip6:)": {}
|
|
70
|
+
},
|
|
71
|
+
"additionalProperties": false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json#",
|
|
4
|
+
"title": "CMIP6Plus Extension",
|
|
5
|
+
"description": "STAC CMIP6Plus Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"stac_extensions"
|
|
9
|
+
],
|
|
10
|
+
"properties": {
|
|
11
|
+
"stac_extensions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"contains": {
|
|
14
|
+
"const": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"$comment": "This is the schema for STAC Items.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": [
|
|
23
|
+
"type",
|
|
24
|
+
"properties"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"const": "Feature"
|
|
29
|
+
},
|
|
30
|
+
"properties": {
|
|
31
|
+
"allOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/require_any"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"$ref": "#/definitions/fields"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
+
"type": "object",
|
|
45
|
+
"required": [
|
|
46
|
+
"type",
|
|
47
|
+
"summaries"
|
|
48
|
+
],
|
|
49
|
+
"properties": {
|
|
50
|
+
"type": {
|
|
51
|
+
"const": "Collection"
|
|
52
|
+
},
|
|
53
|
+
"summaries": {
|
|
54
|
+
"$ref": "#/definitions/require_any"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"definitions": {
|
|
60
|
+
"require_any": {
|
|
61
|
+
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
+
},
|
|
63
|
+
"fields": {
|
|
64
|
+
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
},
|
|
68
|
+
"patternProperties": {
|
|
69
|
+
"^(?!cmip6plus:)": {}
|
|
70
|
+
},
|
|
71
|
+
"additionalProperties": false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Iterable
|
|
5
|
+
|
|
6
|
+
from sqlmodel import Session
|
|
7
|
+
|
|
8
|
+
from esgvoc.api import projects, search
|
|
9
|
+
from esgvoc.api.project_specs import (
|
|
10
|
+
GlobalAttributeSpecBase,
|
|
11
|
+
GlobalAttributeSpecSpecific,
|
|
12
|
+
GlobalAttributeVisitor,
|
|
13
|
+
)
|
|
14
|
+
from esgvoc.core.constants import DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
|
|
15
|
+
from esgvoc.core.db.models.project import PCollection, TermKind
|
|
16
|
+
from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocNotImplementedError
|
|
17
|
+
|
|
18
|
+
KEY_SEPARATOR = ':'
|
|
19
|
+
JSON_SCHEMA_TEMPLATE_DIR_PATH = Path(__file__).parent
|
|
20
|
+
JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
|
|
21
|
+
JSON_INDENTATION = 2
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _process_plain(collection: PCollection, selected_field: str) -> list[str]:
|
|
25
|
+
result: list[str] = list()
|
|
26
|
+
for term in collection.terms:
|
|
27
|
+
if selected_field in term.specs:
|
|
28
|
+
value = term.specs[selected_field]
|
|
29
|
+
result.append(value)
|
|
30
|
+
else:
|
|
31
|
+
raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
|
|
32
|
+
f'collection {collection.id}')
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _process_composite(collection: PCollection, universe_session: Session,
|
|
37
|
+
project_session: Session) -> str:
|
|
38
|
+
result = ""
|
|
39
|
+
for term in collection.terms:
|
|
40
|
+
_, parts = projects._get_composite_term_separator_parts(term)
|
|
41
|
+
for part in parts:
|
|
42
|
+
resolved_term = projects._resolve_term(part, universe_session, project_session)
|
|
43
|
+
if resolved_term.kind == TermKind.PATTERN:
|
|
44
|
+
result += resolved_term.specs[PATTERN_JSON_KEY]
|
|
45
|
+
else:
|
|
46
|
+
raise EsgvocNotImplementedError(f'{term.kind} term is not supported yet')
|
|
47
|
+
# Patterns terms are meant to be validated individually.
|
|
48
|
+
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
49
|
+
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
50
|
+
# The later, must be removed.
|
|
51
|
+
result = result.replace('^', '').replace('$', '')
|
|
52
|
+
result = f'^{result}$'
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _process_pattern(collection: PCollection) -> str:
|
|
57
|
+
# The generation of the value of the field pattern for the collections with more than one term
|
|
58
|
+
# is not specified yet.
|
|
59
|
+
if len(collection.terms) == 1:
|
|
60
|
+
term = collection.terms[0]
|
|
61
|
+
return term.specs[PATTERN_JSON_KEY]
|
|
62
|
+
else:
|
|
63
|
+
msg = f"unsupported collection of term pattern with more than one term for '{collection.id}'"
|
|
64
|
+
raise EsgvocNotImplementedError(msg)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _generate_attribute_key(project_id: str, attribute_name) -> str:
|
|
68
|
+
return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextManager):
|
|
72
|
+
def __init__(self, project_id: str) -> None:
|
|
73
|
+
self.project_id = project_id
|
|
74
|
+
# Project session can't be None here.
|
|
75
|
+
self.universe_session: Session = search.get_universe_session()
|
|
76
|
+
self.project_session: Session = projects._get_project_session_with_exception(project_id)
|
|
77
|
+
self.collections: dict[str, PCollection] = dict()
|
|
78
|
+
for collection in projects._get_all_collections_in_project(self.project_session):
|
|
79
|
+
self.collections[collection.id] = collection
|
|
80
|
+
|
|
81
|
+
def __exit__(self, exception_type, exception_value, exception_traceback):
|
|
82
|
+
self.project_session.close()
|
|
83
|
+
self.universe_session.close()
|
|
84
|
+
if exception_type is not None:
|
|
85
|
+
raise exception_value
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
def _generate_attribute_property(self, attribute_name: str, source_collection: str,
|
|
89
|
+
selected_field: str) -> tuple[str, str | list[str]]:
|
|
90
|
+
property_value: str | list[str]
|
|
91
|
+
property_key: str
|
|
92
|
+
if source_collection not in self.collections:
|
|
93
|
+
raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
|
|
94
|
+
f"{attribute_name} is not found")
|
|
95
|
+
collection = self.collections[source_collection]
|
|
96
|
+
match collection.term_kind:
|
|
97
|
+
case TermKind.PLAIN:
|
|
98
|
+
property_value = _process_plain(collection=collection,
|
|
99
|
+
selected_field=selected_field)
|
|
100
|
+
property_key = 'enum'
|
|
101
|
+
case TermKind.COMPOSITE:
|
|
102
|
+
property_value = _process_composite(collection=collection,
|
|
103
|
+
universe_session=self.universe_session,
|
|
104
|
+
project_session=self.project_session)
|
|
105
|
+
property_key = 'pattern'
|
|
106
|
+
case TermKind.PATTERN:
|
|
107
|
+
property_value = _process_pattern(collection)
|
|
108
|
+
property_key = 'pattern'
|
|
109
|
+
case _:
|
|
110
|
+
msg = f"unsupported term kind '{collection.term_kind}' " + \
|
|
111
|
+
f"for global attribute {attribute_name}"
|
|
112
|
+
raise EsgvocNotImplementedError(msg)
|
|
113
|
+
return property_key, property_value
|
|
114
|
+
|
|
115
|
+
def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
|
|
116
|
+
-> tuple[str, dict[str, str | list[str]]]:
|
|
117
|
+
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
118
|
+
attribute_properties: dict[str, str | list[str]] = dict()
|
|
119
|
+
attribute_properties['type'] = attribute.value_type.value
|
|
120
|
+
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
121
|
+
attribute.source_collection,
|
|
122
|
+
DRS_SPECS_JSON_KEY)
|
|
123
|
+
attribute_properties[property_key] = property_value
|
|
124
|
+
return attribute_key, attribute_properties
|
|
125
|
+
|
|
126
|
+
def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
|
|
127
|
+
-> tuple[str, dict[str, str | list[str]]]:
|
|
128
|
+
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
129
|
+
attribute_properties: dict[str, str | list[str]] = dict()
|
|
130
|
+
attribute_properties['type'] = attribute.value_type.value
|
|
131
|
+
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
132
|
+
attribute.source_collection,
|
|
133
|
+
attribute.specific_key)
|
|
134
|
+
attribute_properties[property_key] = property_value
|
|
135
|
+
return attribute_key, attribute_properties
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _inject_global_attributes(json_root: dict, project_id: str, attribute_names: Iterable[str]) -> None:
|
|
139
|
+
attribute_properties = list()
|
|
140
|
+
for attribute_name in attribute_names:
|
|
141
|
+
attribute_key = _generate_attribute_key(project_id, attribute_name)
|
|
142
|
+
attribute_properties.append({"required": [attribute_key]})
|
|
143
|
+
json_root['definitions']['require_any']['anyOf'] = attribute_properties
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
|
|
147
|
+
for property in properties:
|
|
148
|
+
json_root['definitions']['fields']['properties'][property[0]] = property[1]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def generate_json_schema(project_id: str) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Generate json schema for the given project.
|
|
154
|
+
|
|
155
|
+
:param project_id: The id of the given project.
|
|
156
|
+
:type project_id: str
|
|
157
|
+
:returns: The content of a json schema
|
|
158
|
+
:rtype: str
|
|
159
|
+
:raises EsgvocNotFoundError: On missing information
|
|
160
|
+
:raises EsgvocNotImplementedError: On unexpected operations
|
|
161
|
+
"""
|
|
162
|
+
file_name = JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE.format(project_id=project_id)
|
|
163
|
+
template_file_path = JSON_SCHEMA_TEMPLATE_DIR_PATH.joinpath(file_name)
|
|
164
|
+
if template_file_path.exists():
|
|
165
|
+
project_specs = projects.get_project(project_id)
|
|
166
|
+
if project_specs:
|
|
167
|
+
if project_specs.global_attributes_specs:
|
|
168
|
+
with open(file=template_file_path, mode='r') as file, \
|
|
169
|
+
JsonPropertiesVisitor(project_id) as visitor:
|
|
170
|
+
file_content = file.read()
|
|
171
|
+
root = json.loads(file_content)
|
|
172
|
+
properties: list[tuple[str, dict[str, str | list[str]]]] = list()
|
|
173
|
+
for attribute_name, attribute in project_specs.global_attributes_specs.items():
|
|
174
|
+
attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
|
|
175
|
+
properties.append((attribute_key, attribute_properties))
|
|
176
|
+
_inject_properties(root, properties)
|
|
177
|
+
_inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
|
|
178
|
+
return json.dumps(root, indent=JSON_INDENTATION)
|
|
179
|
+
else:
|
|
180
|
+
raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
|
|
181
|
+
"are not provided")
|
|
182
|
+
else:
|
|
183
|
+
raise EsgvocNotFoundError(f"project '{project_id}' is not found")
|
|
184
|
+
else:
|
|
185
|
+
raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")
|
esgvoc/apps/py.typed
ADDED
|
File without changes
|