esgvoc 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +30 -30
- esgvoc/api/_utils.py +28 -14
- esgvoc/api/data_descriptors/__init__.py +19 -10
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/project_specs.py +82 -0
- esgvoc/api/projects.py +284 -238
- esgvoc/api/report.py +89 -52
- esgvoc/api/search.py +31 -11
- esgvoc/api/universe.py +57 -48
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/drs/__init__.py +0 -16
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +492 -0
- esgvoc/apps/drs/validator.py +330 -0
- esgvoc/cli/drs.py +248 -0
- esgvoc/cli/get.py +26 -25
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +4 -5
- esgvoc/cli/status.py +14 -2
- esgvoc/cli/valid.py +41 -45
- esgvoc/core/db/models/mixins.py +7 -0
- esgvoc/core/db/models/project.py +3 -8
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +4 -1
- esgvoc/core/db/universe_ingestion.py +8 -7
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +66 -42
- esgvoc-0.3.0.dist-info/METADATA +89 -0
- esgvoc-0.3.0.dist-info/RECORD +78 -0
- esgvoc-0.3.0.dist-info/licenses/LICENSE.txt +519 -0
- esgvoc/apps/drs/models.py +0 -43
- esgvoc/apps/drs/parser.py +0 -27
- esgvoc/cli/config.py +0 -79
- esgvoc/core/service/settings.py +0 -64
- esgvoc/core/service/settings.toml +0 -12
- esgvoc/core/service/settings_default.toml +0 -20
- esgvoc-0.1.2.dist-info/METADATA +0 -54
- esgvoc-0.1.2.dist-info/RECORD +0 -66
- {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
from typing import cast
|
|
2
|
+
|
|
3
|
+
import esgvoc.api.projects as projects
|
|
4
|
+
import esgvoc.apps.drs.constants as constants
|
|
5
|
+
from esgvoc.api import APIException
|
|
6
|
+
from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPart,
|
|
7
|
+
DrsPartKind, DrsSpecification, DrsType,
|
|
8
|
+
ProjectSpecs)
|
|
9
|
+
from esgvoc.apps.drs.report import (BlankTerm, ComplianceIssue, DrsIssue,
|
|
10
|
+
DrsValidationReport, ExtraChar,
|
|
11
|
+
ExtraSeparator, ExtraTerm,
|
|
12
|
+
FileNameExtensionIssue, InvalidTerm,
|
|
13
|
+
MissingTerm, ParsingIssue, Space,
|
|
14
|
+
Unparsable, ValidationError,
|
|
15
|
+
ValidationWarning)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DrsApplication:
|
|
19
|
+
"""
|
|
20
|
+
Generic DRS application class.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, project_id: str, pedantic: bool = False) -> None:
|
|
24
|
+
self.project_id: str = project_id
|
|
25
|
+
"""The project id."""
|
|
26
|
+
self.pedantic: bool = pedantic
|
|
27
|
+
"""Same as the option of GCC: turn warnings into errors. Default False."""
|
|
28
|
+
project_specs: ProjectSpecs|None = projects.find_project(project_id)
|
|
29
|
+
if not project_specs:
|
|
30
|
+
raise APIException(f'unable to find project {project_id}')
|
|
31
|
+
for specs in project_specs.drs_specs:
|
|
32
|
+
match specs.type:
|
|
33
|
+
case DrsType.DIRECTORY:
|
|
34
|
+
self.directory_specs: DrsSpecification = specs
|
|
35
|
+
"""The DRS directory specs of the project."""
|
|
36
|
+
case DrsType.FILE_NAME:
|
|
37
|
+
self.file_name_specs: DrsSpecification = specs
|
|
38
|
+
"""The DRS file name specs of the project."""
|
|
39
|
+
case DrsType.DATASET_ID:
|
|
40
|
+
self.dataset_id_specs: DrsSpecification = specs
|
|
41
|
+
"""The DRS dataset id specs of the project."""
|
|
42
|
+
case _:
|
|
43
|
+
raise RuntimeError(f'unsupported DRS specs type {specs.type}')
|
|
44
|
+
|
|
45
|
+
def _get_full_file_name_extension(self) -> str:
|
|
46
|
+
"""
|
|
47
|
+
Returns the full file name extension (the separator plus the extension) of the DRS file
|
|
48
|
+
name specs of the project.
|
|
49
|
+
|
|
50
|
+
:returns: The full file name extension.
|
|
51
|
+
:rtype: str
|
|
52
|
+
"""
|
|
53
|
+
specs: DrsSpecification = self.file_name_specs
|
|
54
|
+
if specs.properties:
|
|
55
|
+
full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
|
|
56
|
+
specs.properties[constants.FILE_NAME_EXTENSION_KEY]
|
|
57
|
+
else:
|
|
58
|
+
raise RuntimeError('missing properties in the DRS file name specifications of the ' +
|
|
59
|
+
f'project {self.project_id}')
|
|
60
|
+
return full_extension
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class DrsValidator(DrsApplication):
|
|
64
|
+
"""
|
|
65
|
+
Valid a DRS directory, dataset id and file name expression against a project.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def validate_directory(self, drs_expression: str,
|
|
69
|
+
prefix: str|None = None) -> DrsValidationReport:
|
|
70
|
+
"""
|
|
71
|
+
Validate a DRS directory expression.
|
|
72
|
+
|
|
73
|
+
:param drs_expression: A DRS directory expression.
|
|
74
|
+
:type drs_expression: str
|
|
75
|
+
:param prefix: A directory prefix to be removed from the directory expression.
|
|
76
|
+
:type prefix: str|None
|
|
77
|
+
:returns: A validation report.
|
|
78
|
+
:rtype: DrsValidationReport
|
|
79
|
+
"""
|
|
80
|
+
if prefix:
|
|
81
|
+
# Remove prefix if present. Always returns a copy.
|
|
82
|
+
drs_expression = drs_expression.removeprefix(prefix)
|
|
83
|
+
return self._validate(drs_expression, self.directory_specs)
|
|
84
|
+
|
|
85
|
+
def validate_dataset_id(self, drs_expression: str) -> DrsValidationReport:
|
|
86
|
+
"""
|
|
87
|
+
Validate a DRS dataset id expression.
|
|
88
|
+
|
|
89
|
+
:param drs_expression: A DRS dataset id expression.
|
|
90
|
+
:type drs_expression: str
|
|
91
|
+
:returns: A validation report.
|
|
92
|
+
:rtype: DrsValidationReport
|
|
93
|
+
"""
|
|
94
|
+
return self._validate(drs_expression, self.dataset_id_specs)
|
|
95
|
+
|
|
96
|
+
def validate_file_name(self, drs_expression: str) -> DrsValidationReport:
|
|
97
|
+
"""
|
|
98
|
+
Validate a file name expression.
|
|
99
|
+
|
|
100
|
+
:param drs_expression: A DRS file name expression.
|
|
101
|
+
:type drs_expression: str
|
|
102
|
+
:returns: A validation report.
|
|
103
|
+
:rtype: DrsValidationReport
|
|
104
|
+
"""
|
|
105
|
+
full_extension = self._get_full_file_name_extension()
|
|
106
|
+
if drs_expression.endswith(full_extension):
|
|
107
|
+
drs_expression = drs_expression.replace(full_extension, '')
|
|
108
|
+
result = self._validate(drs_expression, self.file_name_specs)
|
|
109
|
+
else:
|
|
110
|
+
issue = FileNameExtensionIssue(expected_extension=full_extension)
|
|
111
|
+
result = self._create_report(self.file_name_specs.type, drs_expression,
|
|
112
|
+
[issue], [])
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
|
|
116
|
+
"""
|
|
117
|
+
Validate a DRS expression.
|
|
118
|
+
|
|
119
|
+
:param drs_expression: A DRS expression.
|
|
120
|
+
:type drs_expression: str
|
|
121
|
+
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
122
|
+
:type drs_type: DrsType|str
|
|
123
|
+
:returns: A validation report.
|
|
124
|
+
:rtype: DrsValidationReport
|
|
125
|
+
"""
|
|
126
|
+
match drs_type:
|
|
127
|
+
case DrsType.DIRECTORY:
|
|
128
|
+
return self.validate_directory(drs_expression=drs_expression)
|
|
129
|
+
case DrsType.FILE_NAME:
|
|
130
|
+
return self.validate_file_name(drs_expression=drs_expression)
|
|
131
|
+
case DrsType.DATASET_ID:
|
|
132
|
+
return self.validate_dataset_id(drs_expression=drs_expression)
|
|
133
|
+
case _:
|
|
134
|
+
raise RuntimeError(f'unsupported drs type {drs_type}')
|
|
135
|
+
|
|
136
|
+
def _parse(self,
|
|
137
|
+
drs_expression: str,
|
|
138
|
+
separator: str,
|
|
139
|
+
drs_type: DrsType) -> tuple[list[str]|None, # terms
|
|
140
|
+
list[DrsIssue], # Errors
|
|
141
|
+
list[DrsIssue]]: # Warnings
|
|
142
|
+
errors: list[DrsIssue] = list()
|
|
143
|
+
warnings: list[DrsIssue] = list()
|
|
144
|
+
cursor_offset = 0
|
|
145
|
+
# Spaces at the beginning/end of expression:
|
|
146
|
+
start_with_space = drs_expression[0].isspace()
|
|
147
|
+
end_with_space = drs_expression[-1].isspace()
|
|
148
|
+
if start_with_space or end_with_space:
|
|
149
|
+
issue: ParsingIssue = Space()
|
|
150
|
+
if self.pedantic:
|
|
151
|
+
errors.append(issue)
|
|
152
|
+
else:
|
|
153
|
+
warnings.append(issue)
|
|
154
|
+
if start_with_space:
|
|
155
|
+
previous_len = len(drs_expression)
|
|
156
|
+
drs_expression = drs_expression.lstrip()
|
|
157
|
+
cursor_offset = previous_len - len(drs_expression)
|
|
158
|
+
if end_with_space:
|
|
159
|
+
drs_expression = drs_expression.rstrip()
|
|
160
|
+
terms = drs_expression.split(separator)
|
|
161
|
+
if len(terms) < 2:
|
|
162
|
+
errors.append(Unparsable(expected_drs_type=drs_type))
|
|
163
|
+
return None, errors, warnings # Early exit
|
|
164
|
+
max_term_index = len(terms)
|
|
165
|
+
cursor_position = initial_cursor_position = len(drs_expression) + 1
|
|
166
|
+
has_white_term = False
|
|
167
|
+
for index in range(max_term_index-1, -1, -1):
|
|
168
|
+
term = terms[index]
|
|
169
|
+
if (is_white_term := term.isspace()) or (not term):
|
|
170
|
+
has_white_term = has_white_term or is_white_term
|
|
171
|
+
cursor_position -= len(term) + 1
|
|
172
|
+
del terms[index]
|
|
173
|
+
continue
|
|
174
|
+
else:
|
|
175
|
+
break
|
|
176
|
+
if cursor_position != initial_cursor_position:
|
|
177
|
+
max_term_index = len(terms)
|
|
178
|
+
column = cursor_position+cursor_offset
|
|
179
|
+
if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
|
|
180
|
+
issue = ExtraSeparator(column=column)
|
|
181
|
+
warnings.append(issue)
|
|
182
|
+
else:
|
|
183
|
+
issue = ExtraChar(column=column)
|
|
184
|
+
errors.append(issue)
|
|
185
|
+
for index in range(max_term_index-1, -1, -1):
|
|
186
|
+
term = terms[index]
|
|
187
|
+
len_term = len(term)
|
|
188
|
+
if not term:
|
|
189
|
+
column = cursor_position + cursor_offset
|
|
190
|
+
issue = ExtraSeparator(column=column)
|
|
191
|
+
if (drs_type != DrsType.DIRECTORY) or self.pedantic or (index == 0):
|
|
192
|
+
errors.append(issue)
|
|
193
|
+
else:
|
|
194
|
+
warnings.append(issue)
|
|
195
|
+
del terms[index]
|
|
196
|
+
if term.isspace():
|
|
197
|
+
column = cursor_position + cursor_offset - len_term
|
|
198
|
+
issue = BlankTerm(column=column)
|
|
199
|
+
errors.append(issue)
|
|
200
|
+
del terms[index]
|
|
201
|
+
cursor_position -= len_term + 1
|
|
202
|
+
|
|
203
|
+
# Mypy doesn't understand that ParsingIssues are DrsIssues...
|
|
204
|
+
sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
|
|
205
|
+
sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
|
|
206
|
+
return terms, sorted_errors, sorted_warnings # type: ignore
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
|
|
210
|
+
return sorted(issues, key=lambda issue: issue.column if issue.column else 0)
|
|
211
|
+
|
|
212
|
+
def _validate_term(self, term: str, part: DrsPart) -> bool:
|
|
213
|
+
match part.kind:
|
|
214
|
+
case DrsPartKind.COLLECTION:
|
|
215
|
+
casted_part: DrsCollection = cast(DrsCollection, part)
|
|
216
|
+
try:
|
|
217
|
+
matching_terms = projects.valid_term_in_collection(term,
|
|
218
|
+
self.project_id,
|
|
219
|
+
casted_part.collection_id)
|
|
220
|
+
except Exception as e:
|
|
221
|
+
msg = f'problem while validating term: {e}.Abort.'
|
|
222
|
+
raise APIException(msg) from e
|
|
223
|
+
if len(matching_terms) > 0:
|
|
224
|
+
return True
|
|
225
|
+
else:
|
|
226
|
+
return False
|
|
227
|
+
case DrsPartKind.CONSTANT:
|
|
228
|
+
part_casted: DrsConstant = cast(DrsConstant, part)
|
|
229
|
+
return part_casted.value != term
|
|
230
|
+
case _:
|
|
231
|
+
raise RuntimeError(f'unsupported DRS specs part type {part.kind}')
|
|
232
|
+
|
|
233
|
+
def _create_report(self,
|
|
234
|
+
type: DrsType,
|
|
235
|
+
drs_expression: str,
|
|
236
|
+
errors: list[DrsIssue],
|
|
237
|
+
warnings: list[DrsIssue]) -> DrsValidationReport:
|
|
238
|
+
return DrsValidationReport(project_id=self.project_id, type=type,
|
|
239
|
+
expression=drs_expression,
|
|
240
|
+
errors=cast(list[ValidationError], errors),
|
|
241
|
+
warnings=cast(list[ValidationWarning], warnings))
|
|
242
|
+
|
|
243
|
+
def _validate(self,
|
|
244
|
+
drs_expression: str,
|
|
245
|
+
specs: DrsSpecification) -> DrsValidationReport:
|
|
246
|
+
terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
|
|
247
|
+
if not terms:
|
|
248
|
+
return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
|
|
249
|
+
term_index = 0
|
|
250
|
+
term_max_index = len(terms)
|
|
251
|
+
part_index = 0
|
|
252
|
+
part_max_index = len(specs.parts)
|
|
253
|
+
matching_code_mapping = dict()
|
|
254
|
+
while part_index < part_max_index:
|
|
255
|
+
term = terms[term_index]
|
|
256
|
+
part = specs.parts[part_index]
|
|
257
|
+
if self._validate_term(term, part):
|
|
258
|
+
term_index += 1
|
|
259
|
+
part_index += 1
|
|
260
|
+
matching_code_mapping[part.__str__()] = 0
|
|
261
|
+
elif part.kind == DrsPartKind.CONSTANT or \
|
|
262
|
+
cast(DrsCollection, part).is_required:
|
|
263
|
+
issue: ComplianceIssue = InvalidTerm(term=term,
|
|
264
|
+
term_position=term_index+1,
|
|
265
|
+
collection_id_or_constant_value=str(part))
|
|
266
|
+
errors.append(issue)
|
|
267
|
+
matching_code_mapping[part.__str__()] = 1
|
|
268
|
+
term_index += 1
|
|
269
|
+
part_index += 1
|
|
270
|
+
else: # The part is not required so try to match the term with the next part.
|
|
271
|
+
part_index += 1
|
|
272
|
+
matching_code_mapping[part.__str__()] = -1
|
|
273
|
+
if term_index == term_max_index:
|
|
274
|
+
break
|
|
275
|
+
# Cases:
|
|
276
|
+
# - All terms and collections have been processed.
|
|
277
|
+
# - Not enough term to process all collections.
|
|
278
|
+
# - Extra terms left whereas all collections have been processed:
|
|
279
|
+
# + The last collections are required => report extra terms.
|
|
280
|
+
# + The last collections are not required and these terms were not validated by them.
|
|
281
|
+
# => Should report error even if the collections are not required.
|
|
282
|
+
if part_index < part_max_index: # Missing terms.
|
|
283
|
+
for index in range(part_index, part_max_index):
|
|
284
|
+
part = specs.parts[index]
|
|
285
|
+
issue = MissingTerm(collection_id=str(part), collection_position=index+1)
|
|
286
|
+
if part.kind == DrsPartKind.CONSTANT or \
|
|
287
|
+
cast(DrsCollection, part).is_required:
|
|
288
|
+
errors.append(issue)
|
|
289
|
+
else:
|
|
290
|
+
warnings.append(issue)
|
|
291
|
+
elif term_index < term_max_index: # Extra terms.
|
|
292
|
+
part_index -= term_max_index - term_index
|
|
293
|
+
for index in range(term_index, term_max_index):
|
|
294
|
+
term = terms[index]
|
|
295
|
+
part = specs.parts[part_index]
|
|
296
|
+
if part.kind != DrsPartKind.CONSTANT and \
|
|
297
|
+
(not cast(DrsCollection, part).is_required) and \
|
|
298
|
+
matching_code_mapping[part.__str__()] < 0:
|
|
299
|
+
issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
|
|
300
|
+
else:
|
|
301
|
+
issue = ExtraTerm(term=term, term_position=index, collection_id=None)
|
|
302
|
+
errors.append(issue)
|
|
303
|
+
part_index += 1
|
|
304
|
+
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
if __name__ == "__main__":
|
|
308
|
+
project_id = 'cmip6plus'
|
|
309
|
+
validator = DrsValidator(project_id)
|
|
310
|
+
drs_expressions = [
|
|
311
|
+
".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
|
|
312
|
+
]
|
|
313
|
+
import time
|
|
314
|
+
for drs_expression in drs_expressions:
|
|
315
|
+
start_time = time.perf_counter_ns()
|
|
316
|
+
report = validator.validate_dataset_id(drs_expression)
|
|
317
|
+
stop_time = time.perf_counter_ns()
|
|
318
|
+
print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
|
|
319
|
+
if report.nb_errors > 0:
|
|
320
|
+
print(f'error(s): {report.nb_errors}')
|
|
321
|
+
for error in report.errors:
|
|
322
|
+
print(error)
|
|
323
|
+
else:
|
|
324
|
+
print('error(s): 0')
|
|
325
|
+
if report.nb_warnings > 0:
|
|
326
|
+
print(f'warning(s): {report.nb_warnings}')
|
|
327
|
+
for warning in report.warnings:
|
|
328
|
+
print(warning)
|
|
329
|
+
else:
|
|
330
|
+
print('warning(s): 0')
|
esgvoc/cli/drs.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import shlex
|
|
2
|
+
import sys
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
import esgvoc.api as ev
|
|
10
|
+
from esgvoc.apps.drs.generator import DrsGenerator
|
|
11
|
+
from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
|
|
12
|
+
from esgvoc.apps.drs.validator import DrsValidator
|
|
13
|
+
|
|
14
|
+
app = typer.Typer()
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Predefined list of projects and DRS types
|
|
20
|
+
# projects = ["cmip5", "cmip6","cmip6plus", "cmip7"]
|
|
21
|
+
projects = ev.get_all_projects()
|
|
22
|
+
drs_types = ["filename", "directory", "dataset"]
|
|
23
|
+
|
|
24
|
+
def display(table):
|
|
25
|
+
"""
|
|
26
|
+
Function to display a rich table in the console.
|
|
27
|
+
|
|
28
|
+
:param table: The table to be displayed
|
|
29
|
+
"""
|
|
30
|
+
console = Console(record=True, width=200)
|
|
31
|
+
console.print(table)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command()
|
|
35
|
+
def drsvalid(
|
|
36
|
+
drs_entries: Optional[List[str]] = typer.Argument(None, help="List of DRS validation inputs in the form <project> <drstype> <string>"),
|
|
37
|
+
file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>"),
|
|
38
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results"),
|
|
39
|
+
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the DRS entries validation"),
|
|
40
|
+
rm_prefix: Optional[str] = typer.Option(None,"-p","--prefix", help="Remove given prefix from all checked directory"),
|
|
41
|
+
pedantic: Optional[bool] = typer.Option(False,"-e","--enforce", help="Enable pedantic mode, enforcing strict compliance, mean that warnings are now errors.")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
) -> List[DrsValidationReport]:
|
|
46
|
+
"""
|
|
47
|
+
Validates DRS strings for a specific project and type.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
drs_entries (Optional[List[str]]): A list of DRS validation inputs in the form <project> <drstype> <string>.
|
|
51
|
+
file (Optional[typer.FileText]): File containing DRS validation inputs, one per line.
|
|
52
|
+
verbose (bool): If true, prints detailed validation results.
|
|
53
|
+
|
|
54
|
+
Usage Examples:
|
|
55
|
+
# Validate multiple filenames for CMIP6
|
|
56
|
+
drsvalid cmip6 filename file1.nc file2.nc file3.nc
|
|
57
|
+
|
|
58
|
+
# Validate using a file
|
|
59
|
+
drsvalid --file drs_input.txt
|
|
60
|
+
"""
|
|
61
|
+
current_project = None
|
|
62
|
+
current_drs_type = None
|
|
63
|
+
reports = []
|
|
64
|
+
|
|
65
|
+
entries = drs_entries or []
|
|
66
|
+
|
|
67
|
+
if not sys.stdin.isatty(): # Check if input is being piped via stdin
|
|
68
|
+
entries.extend(el for line in sys.stdin for el in shlex.split(line))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if file:
|
|
72
|
+
entries.extend(el for line in file for el in line.strip().split(" "))
|
|
73
|
+
|
|
74
|
+
i = 0
|
|
75
|
+
while i < len(entries):
|
|
76
|
+
if entries[i] in [""," "]:
|
|
77
|
+
i+=1
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
if entries[i] in projects:
|
|
81
|
+
current_project = entries[i]
|
|
82
|
+
i += 1
|
|
83
|
+
continue
|
|
84
|
+
if entries[i] in drs_types:
|
|
85
|
+
current_drs_type = entries[i]
|
|
86
|
+
i += 1
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if current_project is None:
|
|
90
|
+
raise typer.BadParameter(f"Invalid project: {entries[i]}")
|
|
91
|
+
|
|
92
|
+
if current_drs_type is None:
|
|
93
|
+
raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
|
|
94
|
+
|
|
95
|
+
string = entries[i]
|
|
96
|
+
i += 1
|
|
97
|
+
validator = DrsValidator(current_project, pedantic=pedantic)
|
|
98
|
+
report = None
|
|
99
|
+
match current_drs_type:
|
|
100
|
+
case "filename":
|
|
101
|
+
report = validator.validate_file_name(string)
|
|
102
|
+
case "directory":
|
|
103
|
+
if rm_prefix:
|
|
104
|
+
prefix = rm_prefix+"/" if rm_prefix[-1]!="/" else ""
|
|
105
|
+
else:
|
|
106
|
+
prefix=None
|
|
107
|
+
report = validator.validate_directory(string, prefix)
|
|
108
|
+
case "dataset":
|
|
109
|
+
report = validator.validate_dataset_id(string)
|
|
110
|
+
case _:
|
|
111
|
+
raise RuntimeError("drstype is not known")
|
|
112
|
+
reports.append(report)
|
|
113
|
+
|
|
114
|
+
if verbose:
|
|
115
|
+
table = Table(title="Validation result")
|
|
116
|
+
table.add_column("entry", style="cyan")
|
|
117
|
+
table.add_column("project & drs_type", style="cyan")
|
|
118
|
+
table.add_column("warnings", style="magenta")
|
|
119
|
+
table.add_column("errors", style="red")
|
|
120
|
+
table.add_column("valid")
|
|
121
|
+
|
|
122
|
+
for report in reports:
|
|
123
|
+
entry = str(report.expression)
|
|
124
|
+
proj_and_type = str(report.project_id) + " " + report.type + " "
|
|
125
|
+
warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
|
|
126
|
+
errors = "\n".join(["⚠️ " + str(error) for error in report.errors])
|
|
127
|
+
valid = "✅ Valid" if report else "❌ Invalid"
|
|
128
|
+
|
|
129
|
+
table.add_row("-"*4,"-"*4,"-"*4,"-"*4,"-"*4)
|
|
130
|
+
table.add_row(entry,proj_and_type, warnings, errors, valid)
|
|
131
|
+
|
|
132
|
+
console.print(table)
|
|
133
|
+
elif output:
|
|
134
|
+
with open(output, "w") as f:
|
|
135
|
+
for report in reports:
|
|
136
|
+
f.write(str(report) + "\n")
|
|
137
|
+
console.print(f"DRS validation entries saved to [green]{output}[/green]")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
else:
|
|
141
|
+
for report in reports:
|
|
142
|
+
console.print(str(report))
|
|
143
|
+
|
|
144
|
+
return reports
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@app.command()
|
|
148
|
+
def drsgen(
|
|
149
|
+
drs_entries: Optional[List[str]] = typer.Argument(None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_terms>"),
|
|
150
|
+
file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_terms>"),
|
|
151
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed generation results"),
|
|
152
|
+
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the generated DRS entries"),
|
|
153
|
+
) -> List[DrsGenerationReport]:
|
|
154
|
+
"""
|
|
155
|
+
Generates DRS strings for a specific project and type based on input bag of terms.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
drs_entries (Optional[List[str]]): A list of inputs in the form <project> <drstype> <bag_of_terms>.
|
|
159
|
+
file (Optional[typer.FileText]): File containing DRS generation inputs, one per line.
|
|
160
|
+
verbose (bool): If true, prints detailed generation results.
|
|
161
|
+
output (Optional[str]): File path to save the generated DRS entries.
|
|
162
|
+
|
|
163
|
+
Usage Examples:
|
|
164
|
+
# Generate multiple filenames for CMIP6
|
|
165
|
+
drsgen cmip6 filename var1=tas var2=pr
|
|
166
|
+
|
|
167
|
+
# Generate using a file
|
|
168
|
+
drsgen --file drs_input.txt
|
|
169
|
+
"""
|
|
170
|
+
current_project = None
|
|
171
|
+
current_drs_type = None
|
|
172
|
+
generated_reports = []
|
|
173
|
+
|
|
174
|
+
entries = drs_entries or []
|
|
175
|
+
|
|
176
|
+
if not sys.stdin.isatty(): # Check if input is being piped via stdin
|
|
177
|
+
entries.extend(el for line in sys.stdin for el in shlex.split(line))
|
|
178
|
+
|
|
179
|
+
if file:
|
|
180
|
+
entries.extend(el for line in file for el in shlex.split(line))
|
|
181
|
+
|
|
182
|
+
i = 0
|
|
183
|
+
while i < len(entries):
|
|
184
|
+
if entries[i] in [""," "]:
|
|
185
|
+
i+=1
|
|
186
|
+
continue
|
|
187
|
+
if entries[i] in projects:
|
|
188
|
+
current_project = entries[i]
|
|
189
|
+
i += 1
|
|
190
|
+
continue
|
|
191
|
+
if entries[i] in drs_types:
|
|
192
|
+
current_drs_type = entries[i]
|
|
193
|
+
i += 1
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
if current_project is None:
|
|
197
|
+
raise typer.BadParameter(f"Invalid project: {entries[i]}")
|
|
198
|
+
|
|
199
|
+
if current_drs_type is None:
|
|
200
|
+
raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
|
|
201
|
+
|
|
202
|
+
bag_of_terms = entries[i]
|
|
203
|
+
bag_of_terms = set(entries[i].split(" "))
|
|
204
|
+
i += 1
|
|
205
|
+
|
|
206
|
+
generator = DrsGenerator(current_project)
|
|
207
|
+
report = None
|
|
208
|
+
match current_drs_type:
|
|
209
|
+
case "filename":
|
|
210
|
+
report = generator.generate_file_name_from_bag_of_terms(bag_of_terms)
|
|
211
|
+
case "directory":
|
|
212
|
+
report = generator.generate_directory_from_bag_of_terms(bag_of_terms)
|
|
213
|
+
case "dataset":
|
|
214
|
+
report = generator.generate_dataset_id_from_bag_of_terms(bag_of_terms)
|
|
215
|
+
case _:
|
|
216
|
+
raise RuntimeError("drstype is not known")
|
|
217
|
+
generated_reports.append(report)
|
|
218
|
+
|
|
219
|
+
if verbose:
|
|
220
|
+
table = Table(title="Generation result")
|
|
221
|
+
table.add_column("deduced mapping entry", style="cyan")
|
|
222
|
+
table.add_column("warnings", style="magenta")
|
|
223
|
+
table.add_column("errors", style="red")
|
|
224
|
+
table.add_column("result", style="green", width=10)
|
|
225
|
+
for report in generated_reports:
|
|
226
|
+
entry = str(report.mapping_used)
|
|
227
|
+
warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
|
|
228
|
+
errors = "\n".join([f"🔍 {error}" for error in report.errors])
|
|
229
|
+
result = report.generated_drs_expression
|
|
230
|
+
table.add_row(entry, warnings, errors, result)
|
|
231
|
+
table.add_row("----", "----", "----", "----")
|
|
232
|
+
if table.columns[3].width is not None and len(result) > table.columns[3].width:
|
|
233
|
+
table.columns[3].width = len(result)+1
|
|
234
|
+
console.print(table)
|
|
235
|
+
|
|
236
|
+
elif output:
|
|
237
|
+
with open(output, "w") as f:
|
|
238
|
+
for report in generated_reports:
|
|
239
|
+
f.write(str(report) + "\n")
|
|
240
|
+
console.print(f"Generated entries saved to [green]{output}[/green]")
|
|
241
|
+
|
|
242
|
+
else:
|
|
243
|
+
for report in generated_reports:
|
|
244
|
+
console.print(str(report))
|
|
245
|
+
|
|
246
|
+
return generated_reports
|
|
247
|
+
if __name__ == "__main__":
|
|
248
|
+
app()
|
esgvoc/cli/get.py
CHANGED
|
@@ -19,7 +19,7 @@ def validate_key_format(key: str):
|
|
|
19
19
|
"""
|
|
20
20
|
Validate if the key matches the XXXX:YYYY:ZZZZ format.
|
|
21
21
|
"""
|
|
22
|
-
if not re.match(r"^
|
|
22
|
+
if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_.]*$", key):
|
|
23
23
|
raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
|
|
24
24
|
return key.split(":")
|
|
25
25
|
|
|
@@ -96,30 +96,31 @@ def display(data:Any):
|
|
|
96
96
|
@app.command()
|
|
97
97
|
def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format")):
|
|
98
98
|
"""
|
|
99
|
-
Retrieve a specific value from the database system
|
|
100
|
-
This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
|
|
101
|
-
and term in a structured format
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
<
|
|
109
|
-
<
|
|
110
|
-
|
|
99
|
+
Retrieve a specific value from the database system.\n
|
|
100
|
+
This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
|
|
101
|
+
and term in a structured format.\n
|
|
102
|
+
\n
|
|
103
|
+
|
|
104
|
+
Usage:\n
|
|
105
|
+
`get <project>:<collection>:<term>`\n
|
|
106
|
+
\n
|
|
107
|
+
Arguments:\n
|
|
108
|
+
<project>\tThe project id to query. like `cmip6plus`\n
|
|
109
|
+
<collection>\tThe collection id in the specified database.\n
|
|
110
|
+
<term>\t\tThe term id within the specified collection.\n
|
|
111
|
+
\n
|
|
111
112
|
Example:
|
|
112
|
-
To retrieve the value from the "cmip6plus" project, under the "institution_id" column,
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
Notes
|
|
119
|
-
- Ensure data exist in your system before using this command (use status command to see whats available)
|
|
120
|
-
- Use a colon (`:`) to separate the parts of the argument.
|
|
121
|
-
- if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended.
|
|
122
|
-
|
|
113
|
+
To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
|
|
114
|
+
`get cmip6plus:institution_id:ipsl`\n
|
|
115
|
+
The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl` \n
|
|
116
|
+
- to get list of available term from universe institution `:institution:` \n
|
|
117
|
+
\n
|
|
118
|
+
\n
|
|
119
|
+
Notes:\n
|
|
120
|
+
- Ensure data exist in your system before using this command (use `esgvoc status` command to see whats available).\n
|
|
121
|
+
- Use a colon (`:`) to separate the parts of the argument. \n
|
|
122
|
+
- if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended. \n
|
|
123
|
+
\n
|
|
123
124
|
"""
|
|
124
125
|
known_projects = get_all_projects()
|
|
125
126
|
|
|
@@ -133,7 +134,7 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
|
|
|
133
134
|
if where == "" or where=="universe":
|
|
134
135
|
res = handle_universe(what,who)
|
|
135
136
|
elif where in known_projects:
|
|
136
|
-
res = handle_project(where,what,who,
|
|
137
|
+
res = handle_project(where,what,who,None)
|
|
137
138
|
else:
|
|
138
139
|
res = handle_unknown(where,what,who)
|
|
139
140
|
|
esgvoc/cli/install.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import typer
|
|
2
|
-
from esgvoc.core.service import
|
|
2
|
+
from esgvoc.core.service import current_state
|
|
3
3
|
|
|
4
4
|
app = typer.Typer()
|
|
5
5
|
|
|
6
6
|
@app.command()
|
|
7
7
|
def install():
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
"""Initialize default config and apply settings"""
|
|
9
|
+
try:
|
|
10
|
+
typer.echo("Initialized default configuration")
|
|
11
|
+
current_state.synchronize_all()
|
|
12
|
+
except Exception as e:
|
|
13
|
+
typer.echo(f"Error during installation: {str(e)}", err=True)
|
|
14
|
+
raise typer.Exit(1)
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
app()
|