esgvoc 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/api/__init__.py +15 -4
- esgvoc/api/data_descriptors/__init__.py +3 -0
- esgvoc/api/data_descriptors/directory_date.py +48 -0
- esgvoc/api/project_specs.py +82 -0
- esgvoc/api/projects.py +160 -130
- esgvoc/api/report.py +78 -50
- esgvoc/api/search.py +28 -10
- esgvoc/api/universe.py +17 -18
- esgvoc/apps/__init__.py +7 -0
- esgvoc/apps/drs/__init__.py +0 -16
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +424 -0
- esgvoc/apps/drs/report.py +401 -0
- esgvoc/apps/drs/validator.py +332 -0
- esgvoc/cli/config.py +3 -0
- esgvoc/cli/drs.py +238 -0
- esgvoc/cli/get.py +1 -1
- esgvoc/cli/main.py +4 -3
- esgvoc/cli/status.py +13 -1
- esgvoc/cli/valid.py +1 -5
- esgvoc/core/db/models/mixins.py +7 -0
- esgvoc/core/db/models/project.py +3 -8
- esgvoc/core/db/project_ingestion.py +4 -1
- esgvoc/core/db/universe_ingestion.py +3 -3
- esgvoc/core/service/settings.py +17 -8
- esgvoc/core/service/settings.toml +11 -6
- esgvoc/core/service/settings_default.toml +11 -14
- esgvoc/core/service/state.py +19 -12
- esgvoc-0.2.1.dist-info/METADATA +58 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/RECORD +33 -26
- esgvoc-0.2.1.dist-info/licenses/LICENSE.txt +519 -0
- esgvoc/apps/drs/models.py +0 -43
- esgvoc/apps/drs/parser.py +0 -27
- esgvoc-0.1.2.dist-info/METADATA +0 -54
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/WHEEL +0 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
from typing import cast
|
|
2
|
+
from esgvoc.api.project_specs import (ProjectSpecs,
|
|
3
|
+
DrsType,
|
|
4
|
+
DrsPart,
|
|
5
|
+
DrsSpecification,
|
|
6
|
+
DrsPartKind,
|
|
7
|
+
DrsCollection,
|
|
8
|
+
DrsConstant)
|
|
9
|
+
import esgvoc.api.projects as projects
|
|
10
|
+
import esgvoc.apps.drs.constants as constants
|
|
11
|
+
from esgvoc.apps.drs.report import (DrsValidationReport,
|
|
12
|
+
DrsIssue,
|
|
13
|
+
ParserIssue,
|
|
14
|
+
ValidationIssue,
|
|
15
|
+
Space,
|
|
16
|
+
Unparsable,
|
|
17
|
+
ExtraSeparator,
|
|
18
|
+
ExtraChar,
|
|
19
|
+
BlankToken,
|
|
20
|
+
InvalidToken,
|
|
21
|
+
ExtraToken,
|
|
22
|
+
MissingToken,
|
|
23
|
+
FileNameExtensionIssue)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DrsApplication:
|
|
27
|
+
"""
|
|
28
|
+
Generic DRS application class.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, project_id: str, pedantic: bool = False) -> None:
|
|
32
|
+
self.project_id: str = project_id
|
|
33
|
+
"""The project id."""
|
|
34
|
+
self.pedantic: bool = pedantic
|
|
35
|
+
"""Same as the option of GCC: turn warnings into errors. Default False."""
|
|
36
|
+
project_specs: ProjectSpecs = projects.get_project_specs(project_id)
|
|
37
|
+
for specs in project_specs.drs_specs:
|
|
38
|
+
match specs.type:
|
|
39
|
+
case DrsType.DIRECTORY:
|
|
40
|
+
self.directory_specs: DrsSpecification = specs
|
|
41
|
+
"""The DRS directory specs of the project."""
|
|
42
|
+
case DrsType.FILE_NAME:
|
|
43
|
+
self.file_name_specs: DrsSpecification = specs
|
|
44
|
+
"""The DRS file name specs of the project."""
|
|
45
|
+
case DrsType.DATASET_ID:
|
|
46
|
+
self.dataset_id_specs: DrsSpecification = specs
|
|
47
|
+
"""The DRS dataset id specs of the project."""
|
|
48
|
+
case _:
|
|
49
|
+
raise ValueError(f'unsupported DRS specs type {specs.type}')
|
|
50
|
+
|
|
51
|
+
def _get_full_file_name_extension(self) -> str:
|
|
52
|
+
"""
|
|
53
|
+
Returns the full file name extension (the separator plus the extension) of the DRS file
|
|
54
|
+
name specs of the project.
|
|
55
|
+
|
|
56
|
+
:returns: The full file name extension.
|
|
57
|
+
:rtype: str
|
|
58
|
+
"""
|
|
59
|
+
specs: DrsSpecification = self.file_name_specs
|
|
60
|
+
if specs.properties:
|
|
61
|
+
full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
|
|
62
|
+
specs.properties[constants.FILE_NAME_EXTENSION_KEY]
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError('missing properties in the DRS file name specifications of the ' +
|
|
65
|
+
f'project {self.project_id}')
|
|
66
|
+
return full_extension
|
|
67
|
+
|
|
68
|
+
def _get_specs(self, drs_type: DrsType|str) -> DrsSpecification:
|
|
69
|
+
match drs_type:
|
|
70
|
+
case DrsType.DIRECTORY:
|
|
71
|
+
specs = self.directory_specs
|
|
72
|
+
case DrsType.FILE_NAME:
|
|
73
|
+
specs = self.file_name_specs
|
|
74
|
+
case DrsType.DATASET_ID:
|
|
75
|
+
specs = self.dataset_id_specs
|
|
76
|
+
case _:
|
|
77
|
+
raise ValueError(f'unsupported DRS type {drs_type}')
|
|
78
|
+
return specs
|
|
79
|
+
|
|
80
|
+
class DrsValidator(DrsApplication):
|
|
81
|
+
"""
|
|
82
|
+
Valid a DRS directory, dataset id and file name expression against a project.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def validate_directory(self, drs_expression: str) -> DrsValidationReport:
|
|
86
|
+
"""
|
|
87
|
+
Validate a DRS directory expression.
|
|
88
|
+
|
|
89
|
+
:param drs_expression: A DRS directory expression.
|
|
90
|
+
:type drs_expression: str
|
|
91
|
+
:returns: A validation report.
|
|
92
|
+
:rtype: DrsValidationReport
|
|
93
|
+
"""
|
|
94
|
+
return self._validate(drs_expression, self.directory_specs)
|
|
95
|
+
|
|
96
|
+
def validate_dataset_id(self, drs_expression: str) -> DrsValidationReport:
|
|
97
|
+
"""
|
|
98
|
+
Validate a DRS dataset id expression.
|
|
99
|
+
|
|
100
|
+
:param drs_expression: A DRS dataset id expression.
|
|
101
|
+
:type drs_expression: str
|
|
102
|
+
:returns: A validation report.
|
|
103
|
+
:rtype: DrsValidationReport
|
|
104
|
+
"""
|
|
105
|
+
return self._validate(drs_expression, self.dataset_id_specs)
|
|
106
|
+
|
|
107
|
+
def validate_file_name(self, drs_expression: str) -> DrsValidationReport:
|
|
108
|
+
"""
|
|
109
|
+
Validate a file name expression.
|
|
110
|
+
|
|
111
|
+
:param drs_expression: A DRS file name expression.
|
|
112
|
+
:type drs_expression: str
|
|
113
|
+
:returns: A validation report.
|
|
114
|
+
:rtype: DrsValidationReport
|
|
115
|
+
"""
|
|
116
|
+
full_extension = self._get_full_file_name_extension()
|
|
117
|
+
if drs_expression.endswith(full_extension):
|
|
118
|
+
drs_expression = drs_expression.replace(full_extension, '')
|
|
119
|
+
result = self._validate(drs_expression, self.file_name_specs)
|
|
120
|
+
else:
|
|
121
|
+
issue = FileNameExtensionIssue(expected_extension=full_extension)
|
|
122
|
+
result = self._create_report(self.file_name_specs.type, drs_expression,
|
|
123
|
+
[issue], [])
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
|
|
127
|
+
"""
|
|
128
|
+
Validate a DRS expression.
|
|
129
|
+
|
|
130
|
+
:param drs_expression: A DRS expression.
|
|
131
|
+
:type drs_expression: str
|
|
132
|
+
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
133
|
+
:type drs_type: DrsType|str
|
|
134
|
+
:returns: A validation report.
|
|
135
|
+
:rtype: DrsValidationReport
|
|
136
|
+
"""
|
|
137
|
+
specs = self._get_specs(drs_type)
|
|
138
|
+
return self._validate(drs_expression, specs)
|
|
139
|
+
|
|
140
|
+
def _parse(self,
|
|
141
|
+
drs_expression: str,
|
|
142
|
+
separator: str,
|
|
143
|
+
drs_type: DrsType) -> tuple[list[str]|None, # Tokens
|
|
144
|
+
list[DrsIssue], # Errors
|
|
145
|
+
list[DrsIssue]]: # Warnings
|
|
146
|
+
errors: list[DrsIssue] = list()
|
|
147
|
+
warnings: list[DrsIssue] = list()
|
|
148
|
+
cursor_offset = 0
|
|
149
|
+
# Spaces at the beginning/end of expression:
|
|
150
|
+
start_with_space = drs_expression[0].isspace()
|
|
151
|
+
end_with_space = drs_expression[-1].isspace()
|
|
152
|
+
if start_with_space or end_with_space:
|
|
153
|
+
issue: ParserIssue = Space()
|
|
154
|
+
if self.pedantic:
|
|
155
|
+
errors.append(issue)
|
|
156
|
+
else:
|
|
157
|
+
warnings.append(issue)
|
|
158
|
+
if start_with_space:
|
|
159
|
+
previous_len = len(drs_expression)
|
|
160
|
+
drs_expression = drs_expression.lstrip()
|
|
161
|
+
cursor_offset = previous_len - len(drs_expression)
|
|
162
|
+
if end_with_space:
|
|
163
|
+
drs_expression = drs_expression.rstrip()
|
|
164
|
+
tokens = drs_expression.split(separator)
|
|
165
|
+
if len(tokens) < 2:
|
|
166
|
+
errors.append(Unparsable(expected_drs_type=drs_type))
|
|
167
|
+
return None, errors, warnings # Early exit
|
|
168
|
+
max_token_index = len(tokens)
|
|
169
|
+
cursor_position = initial_cursor_position = len(drs_expression) + 1
|
|
170
|
+
has_white_token = False
|
|
171
|
+
for index in range(max_token_index-1, -1, -1):
|
|
172
|
+
token = tokens[index]
|
|
173
|
+
if (is_white_token := token.isspace()) or (not token):
|
|
174
|
+
has_white_token = has_white_token or is_white_token
|
|
175
|
+
cursor_position -= len(token) + 1
|
|
176
|
+
del tokens[index]
|
|
177
|
+
continue
|
|
178
|
+
else:
|
|
179
|
+
break
|
|
180
|
+
if cursor_position != initial_cursor_position:
|
|
181
|
+
max_token_index = len(tokens)
|
|
182
|
+
column = cursor_position+cursor_offset
|
|
183
|
+
if (drs_type == DrsType.DIRECTORY) and (not has_white_token):
|
|
184
|
+
issue = ExtraSeparator(column=column)
|
|
185
|
+
warnings.append(issue)
|
|
186
|
+
else:
|
|
187
|
+
issue = ExtraChar(column=column)
|
|
188
|
+
errors.append(issue)
|
|
189
|
+
for index in range(max_token_index-1, -1, -1):
|
|
190
|
+
token = tokens[index]
|
|
191
|
+
len_token = len(token)
|
|
192
|
+
if not token:
|
|
193
|
+
column = cursor_position + cursor_offset
|
|
194
|
+
issue = ExtraSeparator(column=column)
|
|
195
|
+
if (drs_type != DrsType.DIRECTORY) or self.pedantic or (index == 0):
|
|
196
|
+
errors.append(issue)
|
|
197
|
+
else:
|
|
198
|
+
warnings.append(issue)
|
|
199
|
+
del tokens[index]
|
|
200
|
+
if token.isspace():
|
|
201
|
+
column = cursor_position + cursor_offset - len_token
|
|
202
|
+
issue = BlankToken(column=column)
|
|
203
|
+
errors.append(issue)
|
|
204
|
+
del tokens[index]
|
|
205
|
+
cursor_position -= len_token + 1
|
|
206
|
+
|
|
207
|
+
# Mypy doesn't understand that ParserIssues are DrsIssues...
|
|
208
|
+
sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
|
|
209
|
+
sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
|
|
210
|
+
return tokens, sorted_errors, sorted_warnings # type: ignore
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def _sort_parser_issues(issues: list[ParserIssue]) -> list[ParserIssue]:
|
|
214
|
+
return sorted(issues, key=lambda issue: issue.column if issue.column else 0)
|
|
215
|
+
|
|
216
|
+
def _validate_token(self, token: str, part: DrsPart) -> bool:
|
|
217
|
+
match part.kind:
|
|
218
|
+
case DrsPartKind.COLLECTION:
|
|
219
|
+
casted_part: DrsCollection = cast(DrsCollection, part)
|
|
220
|
+
try:
|
|
221
|
+
matching_terms = projects.valid_term_in_collection(token,
|
|
222
|
+
self.project_id,
|
|
223
|
+
casted_part.collection_id)
|
|
224
|
+
except Exception as e:
|
|
225
|
+
msg = f'problem while validating token: {e}.Abort.'
|
|
226
|
+
raise ValueError(msg) from e
|
|
227
|
+
if len(matching_terms) > 0:
|
|
228
|
+
return True
|
|
229
|
+
else:
|
|
230
|
+
return False
|
|
231
|
+
case DrsPartKind.CONSTANT:
|
|
232
|
+
part_casted: DrsConstant = cast(DrsConstant, part)
|
|
233
|
+
return part_casted.value != token
|
|
234
|
+
case _:
|
|
235
|
+
raise ValueError(f'unsupported DRS specs part type {part.kind}')
|
|
236
|
+
|
|
237
|
+
def _create_report(self,
|
|
238
|
+
type: DrsType,
|
|
239
|
+
drs_expression: str,
|
|
240
|
+
errors: list[DrsIssue],
|
|
241
|
+
warnings: list[DrsIssue]) -> DrsValidationReport:
|
|
242
|
+
return DrsValidationReport(project_id=self.project_id, type=type,
|
|
243
|
+
expression=drs_expression, errors=errors, warnings=warnings)
|
|
244
|
+
|
|
245
|
+
def _validate(self,
|
|
246
|
+
drs_expression: str,
|
|
247
|
+
specs: DrsSpecification) -> DrsValidationReport:
|
|
248
|
+
tokens, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
|
|
249
|
+
if not tokens:
|
|
250
|
+
return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
|
|
251
|
+
token_index = 0
|
|
252
|
+
token_max_index = len(tokens)
|
|
253
|
+
part_index = 0
|
|
254
|
+
part_max_index = len(specs.parts)
|
|
255
|
+
matching_code_mapping = dict()
|
|
256
|
+
while part_index < part_max_index:
|
|
257
|
+
token = tokens[token_index]
|
|
258
|
+
part = specs.parts[part_index]
|
|
259
|
+
if self._validate_token(token, part):
|
|
260
|
+
token_index += 1
|
|
261
|
+
part_index += 1
|
|
262
|
+
matching_code_mapping[part.__str__()] = 0
|
|
263
|
+
elif part.kind == DrsPartKind.CONSTANT or \
|
|
264
|
+
cast(DrsCollection, part).is_required:
|
|
265
|
+
issue: ValidationIssue = InvalidToken(token=token,
|
|
266
|
+
token_position=token_index+1,
|
|
267
|
+
collection_id_or_constant_value=str(part))
|
|
268
|
+
errors.append(issue)
|
|
269
|
+
matching_code_mapping[part.__str__()] = 1
|
|
270
|
+
token_index += 1
|
|
271
|
+
part_index += 1
|
|
272
|
+
else: # The part is not required so try to match the token with the next part.
|
|
273
|
+
part_index += 1
|
|
274
|
+
matching_code_mapping[part.__str__()] = -1
|
|
275
|
+
if token_index == token_max_index:
|
|
276
|
+
break
|
|
277
|
+
# Cases:
|
|
278
|
+
# - All tokens and collections have been processed.
|
|
279
|
+
# - Not enough token to process all collections.
|
|
280
|
+
# - Extra tokens left whereas all collections have been processed:
|
|
281
|
+
# + The last collections are required => report extra tokens.
|
|
282
|
+
# + The last collections are not required and these tokens were not validated by them.
|
|
283
|
+
# => Should report error even if the collections are not required.
|
|
284
|
+
if part_index < part_max_index: # Missing tokens.
|
|
285
|
+
for index in range(part_index, part_max_index):
|
|
286
|
+
part = specs.parts[index]
|
|
287
|
+
issue = MissingToken(collection_id=str(part), collection_position=index+1)
|
|
288
|
+
if part.kind == DrsPartKind.CONSTANT or \
|
|
289
|
+
cast(DrsCollection, part).is_required:
|
|
290
|
+
errors.append(issue)
|
|
291
|
+
else:
|
|
292
|
+
warnings.append(issue)
|
|
293
|
+
elif token_index < token_max_index: # Extra tokens.
|
|
294
|
+
part_index -= token_max_index - token_index
|
|
295
|
+
for index in range(token_index, token_max_index):
|
|
296
|
+
token = tokens[index]
|
|
297
|
+
part = specs.parts[part_index]
|
|
298
|
+
if part.kind != DrsPartKind.CONSTANT and \
|
|
299
|
+
(not cast(DrsCollection, part).is_required) and \
|
|
300
|
+
matching_code_mapping[part.__str__()] < 0:
|
|
301
|
+
issue = ExtraToken(token=token, token_position=index, collection_id=str(part))
|
|
302
|
+
else:
|
|
303
|
+
issue = ExtraToken(token=token, token_position=index, collection_id=None)
|
|
304
|
+
errors.append(issue)
|
|
305
|
+
part_index += 1
|
|
306
|
+
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
project_id = 'cmip6plus'
|
|
311
|
+
validator = DrsValidator(project_id)
|
|
312
|
+
drs_expressions = [
|
|
313
|
+
".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
|
|
314
|
+
]
|
|
315
|
+
import time
|
|
316
|
+
for drs_expression in drs_expressions:
|
|
317
|
+
start_time = time.perf_counter_ns()
|
|
318
|
+
report = validator.validate_dataset_id(drs_expression)
|
|
319
|
+
stop_time = time.perf_counter_ns()
|
|
320
|
+
print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
|
|
321
|
+
if report.nb_errors > 0:
|
|
322
|
+
print(f'error(s): {report.nb_errors}')
|
|
323
|
+
for error in report.errors:
|
|
324
|
+
print(error)
|
|
325
|
+
else:
|
|
326
|
+
print('error(s): 0')
|
|
327
|
+
if report.nb_warnings > 0:
|
|
328
|
+
print(f'warning(s): {report.nb_warnings}')
|
|
329
|
+
for warning in report.warnings:
|
|
330
|
+
print(warning)
|
|
331
|
+
else:
|
|
332
|
+
print('warning(s): 0')
|
esgvoc/cli/config.py
CHANGED
|
@@ -34,6 +34,9 @@ def config(key: str |None = typer.Argument(None), value: str|None = typer.Argume
|
|
|
34
34
|
- With no arguments: display all settings.
|
|
35
35
|
- With one argument (key): display the value of the key.
|
|
36
36
|
- With two arguments (key and value): modify the key's value and save.
|
|
37
|
+
|
|
38
|
+
usage :
|
|
39
|
+
esgvoc config universe.db_path .cache/dbs/somethingelse
|
|
37
40
|
"""
|
|
38
41
|
|
|
39
42
|
settings = load_settings()
|
esgvoc/cli/drs.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
from esgvoc.apps.drs.generator import DrsGenerator
|
|
2
|
+
from esgvoc.apps.drs.report import DrsValidationReport, DrsGeneratorReport
|
|
3
|
+
from esgvoc.apps.drs.validator import DrsValidator
|
|
4
|
+
import sys
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
import esgvoc.api as ev
|
|
10
|
+
import shlex
|
|
11
|
+
|
|
12
|
+
app = typer.Typer()
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Predefined list of projects and DRS types
|
|
18
|
+
# projects = ["cmip5", "cmip6","cmip6plus", "cmip7"]
|
|
19
|
+
projects = ev.get_all_projects()
|
|
20
|
+
drs_types = ["filename", "directory", "dataset"]
|
|
21
|
+
|
|
22
|
+
def display(table):
|
|
23
|
+
"""
|
|
24
|
+
Function to display a rich table in the console.
|
|
25
|
+
|
|
26
|
+
:param table: The table to be displayed
|
|
27
|
+
"""
|
|
28
|
+
console = Console(record=True, width=200)
|
|
29
|
+
console.print(table)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@app.command()
|
|
33
|
+
def drsvalid(
|
|
34
|
+
drs_entries: Optional[List[str]] = typer.Argument(None, help="List of DRS validation inputs in the form <project> <drstype> <string>"),
|
|
35
|
+
file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>"),
|
|
36
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results"),
|
|
37
|
+
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the DRS entries validation"),
|
|
38
|
+
|
|
39
|
+
) -> List[DrsValidationReport]:
|
|
40
|
+
"""
|
|
41
|
+
Validates DRS strings for a specific project and type.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
drs_entries (Optional[List[str]]): A list of DRS validation inputs in the form <project> <drstype> <string>.
|
|
45
|
+
file (Optional[typer.FileText]): File containing DRS validation inputs, one per line.
|
|
46
|
+
verbose (bool): If true, prints detailed validation results.
|
|
47
|
+
|
|
48
|
+
Usage Examples:
|
|
49
|
+
# Validate multiple filenames for CMIP6
|
|
50
|
+
drsvalid cmip6 filename file1.nc file2.nc file3.nc
|
|
51
|
+
|
|
52
|
+
# Validate using a file
|
|
53
|
+
drsvalid --file drs_input.txt
|
|
54
|
+
"""
|
|
55
|
+
current_project = None
|
|
56
|
+
current_drs_type = None
|
|
57
|
+
reports = []
|
|
58
|
+
|
|
59
|
+
entries = drs_entries or []
|
|
60
|
+
|
|
61
|
+
if not sys.stdin.isatty(): # Check if input is being piped via stdin
|
|
62
|
+
entries.extend(el for line in sys.stdin for el in shlex.split(line))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if file:
|
|
66
|
+
entries.extend(el for line in file for el in line.strip().split(" "))
|
|
67
|
+
|
|
68
|
+
i = 0
|
|
69
|
+
while i < len(entries):
|
|
70
|
+
if entries[i] in [""," "]:
|
|
71
|
+
i+=1
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
if entries[i] in projects:
|
|
75
|
+
current_project = entries[i]
|
|
76
|
+
i += 1
|
|
77
|
+
continue
|
|
78
|
+
if entries[i] in drs_types:
|
|
79
|
+
current_drs_type = entries[i]
|
|
80
|
+
i += 1
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if current_project is None:
|
|
84
|
+
raise typer.BadParameter(f"Invalid project: {entries[i]}")
|
|
85
|
+
|
|
86
|
+
if current_drs_type is None:
|
|
87
|
+
raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
|
|
88
|
+
|
|
89
|
+
string = entries[i]
|
|
90
|
+
i += 1
|
|
91
|
+
validator = DrsValidator(current_project)
|
|
92
|
+
report = None
|
|
93
|
+
match current_drs_type:
|
|
94
|
+
case "filename":
|
|
95
|
+
report = validator.validate_file_name(string)
|
|
96
|
+
case "directory":
|
|
97
|
+
report = validator.validate_directory(string)
|
|
98
|
+
case "dataset":
|
|
99
|
+
report = validator.validate_dataset_id(string)
|
|
100
|
+
case _:
|
|
101
|
+
raise RuntimeError("drstype is not known")
|
|
102
|
+
reports.append(report)
|
|
103
|
+
|
|
104
|
+
if verbose:
|
|
105
|
+
table = Table(title="Validation result")
|
|
106
|
+
table.add_column("entry", style="cyan")
|
|
107
|
+
table.add_column("project & drs_type", style="cyan")
|
|
108
|
+
table.add_column("warnings", style="magenta")
|
|
109
|
+
table.add_column("errors", style="red")
|
|
110
|
+
table.add_column("valid")
|
|
111
|
+
|
|
112
|
+
for report in reports:
|
|
113
|
+
entry = str(report.expression)
|
|
114
|
+
proj_and_type = str(report.project_id) + " " + report.type + " "
|
|
115
|
+
warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
|
|
116
|
+
errors = "\n".join(["⚠️ " + str(error) for error in report.errors])
|
|
117
|
+
valid = "✅ Valid" if report else "❌ Invalid"
|
|
118
|
+
|
|
119
|
+
table.add_row("-"*4,"-"*4,"-"*4,"-"*4,"-"*4)
|
|
120
|
+
table.add_row(entry,proj_and_type, warnings, errors, valid)
|
|
121
|
+
|
|
122
|
+
console.print(table)
|
|
123
|
+
elif output:
|
|
124
|
+
with open(output, "w") as f:
|
|
125
|
+
for report in reports:
|
|
126
|
+
f.write(str(report) + "\n")
|
|
127
|
+
console.print(f"DRS validation entries saved to [green]{output}[/green]")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
for report in reports:
|
|
132
|
+
console.print(str(report))
|
|
133
|
+
|
|
134
|
+
return reports
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@app.command()
|
|
138
|
+
def drsgen(
|
|
139
|
+
drs_entries: Optional[List[str]] = typer.Argument(None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_tokens>"),
|
|
140
|
+
file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_tokens>"),
|
|
141
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed generation results"),
|
|
142
|
+
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the generated DRS entries"),
|
|
143
|
+
) -> List[DrsGeneratorReport]:
|
|
144
|
+
"""
|
|
145
|
+
Generates DRS strings for a specific project and type based on input bag of tokens.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
drs_entries (Optional[List[str]]): A list of inputs in the form <project> <drstype> <bag_of_tokens>.
|
|
149
|
+
file (Optional[typer.FileText]): File containing DRS generation inputs, one per line.
|
|
150
|
+
verbose (bool): If true, prints detailed generation results.
|
|
151
|
+
output (Optional[str]): File path to save the generated DRS entries.
|
|
152
|
+
|
|
153
|
+
Usage Examples:
|
|
154
|
+
# Generate multiple filenames for CMIP6
|
|
155
|
+
drsgen cmip6 filename var1=tas var2=pr
|
|
156
|
+
|
|
157
|
+
# Generate using a file
|
|
158
|
+
drsgen --file drs_input.txt
|
|
159
|
+
"""
|
|
160
|
+
current_project = None
|
|
161
|
+
current_drs_type = None
|
|
162
|
+
generated_reports = []
|
|
163
|
+
|
|
164
|
+
entries = drs_entries or []
|
|
165
|
+
|
|
166
|
+
if not sys.stdin.isatty(): # Check if input is being piped via stdin
|
|
167
|
+
entries.extend(el for line in sys.stdin for el in shlex.split(line))
|
|
168
|
+
|
|
169
|
+
if file:
|
|
170
|
+
entries.extend(el for line in file for el in shlex.split(line))
|
|
171
|
+
|
|
172
|
+
i = 0
|
|
173
|
+
while i < len(entries):
|
|
174
|
+
if entries[i] in [""," "]:
|
|
175
|
+
i+=1
|
|
176
|
+
continue
|
|
177
|
+
if entries[i] in projects:
|
|
178
|
+
current_project = entries[i]
|
|
179
|
+
i += 1
|
|
180
|
+
continue
|
|
181
|
+
if entries[i] in drs_types:
|
|
182
|
+
current_drs_type = entries[i]
|
|
183
|
+
i += 1
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
if current_project is None:
|
|
187
|
+
raise typer.BadParameter(f"Invalid project: {entries[i]}")
|
|
188
|
+
|
|
189
|
+
if current_drs_type is None:
|
|
190
|
+
raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
|
|
191
|
+
|
|
192
|
+
bag_of_tokens = entries[i]
|
|
193
|
+
bag_of_tokens = set(entries[i].split(" "))
|
|
194
|
+
i += 1
|
|
195
|
+
|
|
196
|
+
generator = DrsGenerator(current_project)
|
|
197
|
+
report = None
|
|
198
|
+
match current_drs_type:
|
|
199
|
+
case "filename":
|
|
200
|
+
report = generator.generate_file_name_from_bag_of_tokens(bag_of_tokens)
|
|
201
|
+
case "directory":
|
|
202
|
+
report = generator.generate_directory_from_bag_of_tokens(bag_of_tokens)
|
|
203
|
+
case "dataset":
|
|
204
|
+
report = generator.generate_dataset_id_from_bag_of_tokens(bag_of_tokens)
|
|
205
|
+
case _:
|
|
206
|
+
raise RuntimeError("drstype is not known")
|
|
207
|
+
generated_reports.append(report)
|
|
208
|
+
|
|
209
|
+
if verbose:
|
|
210
|
+
table = Table(title="Generation result")
|
|
211
|
+
table.add_column("deduced mapping entry", style="cyan")
|
|
212
|
+
table.add_column("warnings", style="magenta")
|
|
213
|
+
table.add_column("errors", style="red")
|
|
214
|
+
table.add_column("result", style="green", width=10)
|
|
215
|
+
for report in generated_reports:
|
|
216
|
+
entry = str(report.mapping_used)
|
|
217
|
+
warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
|
|
218
|
+
errors = "\n".join([f"🔍 {error}" for error in report.errors])
|
|
219
|
+
result = report.computed_drs_expression
|
|
220
|
+
table.add_row(entry, warnings, errors, result)
|
|
221
|
+
table.add_row("----", "----", "----", "----")
|
|
222
|
+
if table.columns[3].width is not None and len(result) > table.columns[3].width:
|
|
223
|
+
table.columns[3].width = len(result)+1
|
|
224
|
+
console.print(table)
|
|
225
|
+
|
|
226
|
+
elif output:
|
|
227
|
+
with open(output, "w") as f:
|
|
228
|
+
for report in generated_reports:
|
|
229
|
+
f.write(str(report) + "\n")
|
|
230
|
+
console.print(f"Generated entries saved to [green]{output}[/green]")
|
|
231
|
+
|
|
232
|
+
else:
|
|
233
|
+
for report in generated_reports:
|
|
234
|
+
console.print(str(report))
|
|
235
|
+
|
|
236
|
+
return generated_reports
|
|
237
|
+
if __name__ == "__main__":
|
|
238
|
+
app()
|
esgvoc/cli/get.py
CHANGED
|
@@ -19,7 +19,7 @@ def validate_key_format(key: str):
|
|
|
19
19
|
"""
|
|
20
20
|
Validate if the key matches the XXXX:YYYY:ZZZZ format.
|
|
21
21
|
"""
|
|
22
|
-
if not re.match(r"^
|
|
22
|
+
if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*$", key):
|
|
23
23
|
raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
|
|
24
24
|
return key.split(":")
|
|
25
25
|
|
esgvoc/cli/main.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
|
|
2
1
|
import typer
|
|
3
2
|
from esgvoc.cli.config import app as config_app
|
|
4
3
|
from esgvoc.cli.get import app as get_app
|
|
5
4
|
from esgvoc.cli.status import app as status_app
|
|
6
5
|
from esgvoc.cli.valid import app as valid_app
|
|
7
6
|
from esgvoc.cli.install import app as install_app
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
from esgvoc.cli.drs import app as drs_app
|
|
10
8
|
app = typer.Typer()
|
|
11
9
|
|
|
12
10
|
# Register the subcommands
|
|
@@ -15,8 +13,11 @@ app.add_typer(get_app)
|
|
|
15
13
|
app.add_typer(status_app)
|
|
16
14
|
app.add_typer(valid_app)
|
|
17
15
|
app.add_typer(install_app)
|
|
16
|
+
app.add_typer(drs_app)
|
|
18
17
|
|
|
19
18
|
def main():
|
|
20
19
|
app()
|
|
20
|
+
|
|
21
|
+
|
|
21
22
|
if __name__ == "__main__":
|
|
22
23
|
main()
|
esgvoc/cli/status.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from esgvoc.core import service
|
|
2
|
+
from rich.table import Table
|
|
2
3
|
import typer
|
|
3
4
|
from rich.console import Console
|
|
4
5
|
|
|
@@ -21,6 +22,17 @@ def status():
|
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
24
|
service.state_service.get_state_summary()
|
|
24
|
-
display(service.state_service.table())
|
|
25
|
+
#display(service.state_service.table())
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
table = Table(show_header=False, show_lines=True)
|
|
29
|
+
|
|
30
|
+
table.add_row("","Remote github repo","Local repository","Cache Database", style = "bright_green")
|
|
31
|
+
table.add_row("Universe path",service.state_service.universe.github_repo,service.state_service.universe.local_path,service.state_service.universe.db_path, style = "white")
|
|
32
|
+
table.add_row("Version",service.state_service.universe.github_version,service.state_service.universe.local_version,service.state_service.universe.db_version, style="bright_blue")
|
|
33
|
+
for proj_name,proj in service.state_service.projects.items():
|
|
34
|
+
table.add_row(f"{proj_name} path",proj.github_repo,proj.local_path,proj.db_path, style="white")
|
|
35
|
+
table.add_row("Version",proj.github_version,proj.local_version,proj.db_version,style ="bright_blue")
|
|
36
|
+
display(table)
|
|
25
37
|
|
|
26
38
|
|
esgvoc/cli/valid.py
CHANGED
|
@@ -6,10 +6,8 @@ from esgvoc.api.projects import (
|
|
|
6
6
|
valid_term_in_project,
|
|
7
7
|
valid_term_in_all_projects
|
|
8
8
|
)
|
|
9
|
-
from esgvoc.api import BasicValidationErrorVisitor
|
|
10
9
|
from requests import logging
|
|
11
10
|
from rich.table import Table
|
|
12
|
-
from sqlmodel import except_
|
|
13
11
|
import typer
|
|
14
12
|
import re
|
|
15
13
|
from rich.console import Console
|
|
@@ -121,11 +119,9 @@ def valid(
|
|
|
121
119
|
# Parse and collect errors for verbose mode
|
|
122
120
|
if validation_result == []:
|
|
123
121
|
detailed_results.append({"validation":validation, "errors":["did not found matching term"]})
|
|
124
|
-
|
|
125
122
|
results.append(False)
|
|
126
123
|
if project and collection and term and exception_message is None:
|
|
127
|
-
|
|
128
|
-
errors = [error.accept(visitor) for error in validation_result.errors]
|
|
124
|
+
errors = [str(error) for error in validation_result.errors]
|
|
129
125
|
detailed_results.append({"validation": validation, "errors": errors})
|
|
130
126
|
if exception_message is not None:
|
|
131
127
|
detailed_results.append({"validation": validation, "errors": [exception_message]})
|
esgvoc/core/db/models/mixins.py
CHANGED
|
@@ -4,10 +4,17 @@ from sqlmodel import Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class TermKind(Enum):
|
|
7
|
+
"""
|
|
8
|
+
The kinds of term.
|
|
9
|
+
"""
|
|
7
10
|
PLAIN = "plain"
|
|
11
|
+
"""End written term."""
|
|
8
12
|
PATTERN = "pattern"
|
|
13
|
+
"""Regex based terms"""
|
|
9
14
|
COMPOSITE = "composite"
|
|
15
|
+
"""Term composed of terms."""
|
|
10
16
|
MIXED = 'mixed'
|
|
17
|
+
"""To be defined."""
|
|
11
18
|
|
|
12
19
|
|
|
13
20
|
class PkMixin:
|