esgvoc 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +0 -6
- esgvoc/api/data_descriptors/__init__.py +6 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +2 -2
- esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/vertical_label.py +2 -2
- esgvoc/api/project_specs.py +48 -130
- esgvoc/api/projects.py +104 -63
- esgvoc/apps/drs/generator.py +47 -42
- esgvoc/apps/drs/validator.py +22 -38
- esgvoc/apps/jsg/json_schema_generator.py +252 -136
- esgvoc/apps/jsg/templates/template.jinja +249 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/cv_tester.py +1368 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/config.py +671 -86
- esgvoc/cli/drs.py +39 -21
- esgvoc/cli/main.py +2 -0
- esgvoc/cli/test_cv.py +257 -0
- esgvoc/core/constants.py +10 -7
- esgvoc/core/data_handler.py +24 -22
- esgvoc/core/db/connection.py +7 -0
- esgvoc/core/db/project_ingestion.py +34 -9
- esgvoc/core/db/universe_ingestion.py +1 -2
- esgvoc/core/service/configuration/setting.py +192 -21
- esgvoc/core/service/data_merger.py +1 -1
- esgvoc/core/service/state.py +18 -2
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -1
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/RECORD +40 -29
- esgvoc/apps/jsg/cmip6_template.json +0 -74
- /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/apps/drs/generator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Any, Iterable, Mapping, cast
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
|
-
from esgvoc.api.project_specs import
|
|
4
|
+
from esgvoc.api.project_specs import DrsSpecification, DrsType
|
|
5
5
|
from esgvoc.api.search import MatchingTerm
|
|
6
6
|
from esgvoc.apps.drs.report import (
|
|
7
7
|
AssignedTerm,
|
|
@@ -177,47 +177,54 @@ class DrsGenerator(DrsApplication):
|
|
|
177
177
|
part_position: int = 0
|
|
178
178
|
for part in specs.parts:
|
|
179
179
|
part_position += 1
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
issue = InvalidTerm(
|
|
189
|
-
term=part_value,
|
|
190
|
-
term_position=part_position,
|
|
191
|
-
collection_id_or_constant_value=collection_id,
|
|
192
|
-
)
|
|
193
|
-
errors.append(issue)
|
|
194
|
-
part_value = DrsGenerationReport.INVALID_TAG
|
|
195
|
-
else:
|
|
196
|
-
other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
|
|
197
|
-
if collection_part.is_required:
|
|
198
|
-
errors.append(other_issue)
|
|
199
|
-
part_value = DrsGenerationReport.MISSING_TAG
|
|
180
|
+
collection_id = part.source_collection
|
|
181
|
+
if collection_id in mapping:
|
|
182
|
+
part_value = mapping[collection_id]
|
|
183
|
+
if has_to_valid_terms:
|
|
184
|
+
if part.source_collection_term is None:
|
|
185
|
+
matching_terms = projects.valid_term_in_collection(part_value,
|
|
186
|
+
self.project_id,
|
|
187
|
+
collection_id)
|
|
200
188
|
else:
|
|
201
|
-
|
|
202
|
-
|
|
189
|
+
matching_terms = projects.valid_term(
|
|
190
|
+
part_value,
|
|
191
|
+
self.project_id,
|
|
192
|
+
collection_id,
|
|
193
|
+
part.source_collection_term).validated
|
|
194
|
+
if not matching_terms:
|
|
195
|
+
issue = InvalidTerm(term=part_value,
|
|
196
|
+
term_position=part_position,
|
|
197
|
+
collection_id_or_constant_value=collection_id)
|
|
198
|
+
errors.append(issue)
|
|
199
|
+
part_value = DrsGenerationReport.INVALID_TAG
|
|
203
200
|
else:
|
|
204
|
-
|
|
205
|
-
|
|
201
|
+
other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
|
|
202
|
+
if part.is_required:
|
|
203
|
+
errors.append(other_issue)
|
|
204
|
+
part_value = DrsGenerationReport.MISSING_TAG
|
|
205
|
+
else:
|
|
206
|
+
warnings.append(other_issue)
|
|
207
|
+
continue # The for loop.
|
|
206
208
|
|
|
207
209
|
drs_expression += part_value + specs.separator
|
|
208
210
|
|
|
209
|
-
drs_expression = drs_expression[0
|
|
211
|
+
drs_expression = drs_expression[0: len(drs_expression) - len(specs.separator)]
|
|
210
212
|
return drs_expression, errors, warnings
|
|
211
213
|
|
|
212
214
|
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
213
215
|
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
214
216
|
for term in terms:
|
|
215
|
-
matching_terms: list[MatchingTerm] =
|
|
216
|
-
for
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
matching_terms: list[MatchingTerm] = list()
|
|
218
|
+
for part in specs.parts:
|
|
219
|
+
if part.source_collection_term is None:
|
|
220
|
+
matching_terms.extend(projects.valid_term_in_collection(term, self.project_id,
|
|
221
|
+
part.source_collection))
|
|
222
|
+
else:
|
|
223
|
+
if projects.valid_term(term, self.project_id, part.source_collection,
|
|
224
|
+
part.source_collection_term).validated:
|
|
225
|
+
matching_terms.append(MatchingTerm(project_id=self.project_id,
|
|
226
|
+
collection_id=part.source_collection,
|
|
227
|
+
term_id=part.source_collection_term))
|
|
221
228
|
for matching_term in matching_terms:
|
|
222
229
|
if matching_term.collection_id not in collection_terms_mapping:
|
|
223
230
|
collection_terms_mapping[matching_term.collection_id] = set()
|
|
@@ -230,15 +237,13 @@ class DrsGenerator(DrsApplication):
|
|
|
230
237
|
if self.pedantic:
|
|
231
238
|
errors.extend(warnings)
|
|
232
239
|
warnings.clear()
|
|
233
|
-
return DrsGenerationReport(
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
warnings=cast(list[GenerationWarning], warnings),
|
|
241
|
-
)
|
|
240
|
+
return DrsGenerationReport(project_id=self.project_id,
|
|
241
|
+
type=specs.type,
|
|
242
|
+
given_mapping_or_bag_of_terms=terms,
|
|
243
|
+
mapping_used=mapping,
|
|
244
|
+
generated_drs_expression=drs_expression,
|
|
245
|
+
errors=cast(list[GenerationError], errors),
|
|
246
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
242
247
|
|
|
243
248
|
@staticmethod
|
|
244
249
|
def _resolve_conflicts(
|
|
@@ -329,7 +334,7 @@ class DrsGenerator(DrsApplication):
|
|
|
329
334
|
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
330
335
|
for collection_ids in conflicting_collection_ids_list:
|
|
331
336
|
for collection_index in range(0, len(collection_ids)):
|
|
332
|
-
collection_set = collection_ids[collection_index + 1
|
|
337
|
+
collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
|
|
333
338
|
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
|
|
334
339
|
*[
|
|
335
340
|
collection_terms_mapping[index] # noqa E127
|
esgvoc/apps/drs/validator.py
CHANGED
|
@@ -3,10 +3,7 @@ from typing import cast
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
4
|
import esgvoc.apps.drs.constants as constants
|
|
5
5
|
from esgvoc.api.project_specs import (
|
|
6
|
-
DrsCollection,
|
|
7
|
-
DrsConstant,
|
|
8
6
|
DrsPart,
|
|
9
|
-
DrsPartKind,
|
|
10
7
|
DrsSpecification,
|
|
11
8
|
DrsType,
|
|
12
9
|
ProjectSpecs,
|
|
@@ -44,19 +41,12 @@ class DrsApplication:
|
|
|
44
41
|
project_specs: ProjectSpecs | None = projects.get_project(project_id)
|
|
45
42
|
if not project_specs:
|
|
46
43
|
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
self.file_name_specs: DrsSpecification = specs
|
|
54
|
-
"""The DRS file name specs of the project."""
|
|
55
|
-
case DrsType.DATASET_ID:
|
|
56
|
-
self.dataset_id_specs: DrsSpecification = specs
|
|
57
|
-
"""The DRS dataset id specs of the project."""
|
|
58
|
-
case _:
|
|
59
|
-
raise EsgvocDbError(f"unsupported DRS specs type '{specs.type}'")
|
|
44
|
+
self.directory_specs: DrsSpecification = project_specs.drs_specs[DrsType.DIRECTORY]
|
|
45
|
+
"""The DRS directory specs of the project."""
|
|
46
|
+
self.file_name_specs: DrsSpecification = project_specs.drs_specs[DrsType.FILE_NAME]
|
|
47
|
+
"""The DRS file name specs of the project."""
|
|
48
|
+
self.dataset_id_specs: DrsSpecification = project_specs.drs_specs[DrsType.DATASET_ID]
|
|
49
|
+
"""The DRS dataset id specs of the project."""
|
|
60
50
|
|
|
61
51
|
def _get_full_file_name_extension(self) -> str:
|
|
62
52
|
"""
|
|
@@ -229,21 +219,18 @@ class DrsValidator(DrsApplication):
|
|
|
229
219
|
return sorted(issues, key=lambda issue: issue.column if issue.column else 0)
|
|
230
220
|
|
|
231
221
|
def _validate_term(self, term: str, part: DrsPart) -> bool:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
return part_casted.value != term
|
|
245
|
-
case _:
|
|
246
|
-
raise EsgvocDbError(f"unsupported DRS specs part type '{part.kind}'")
|
|
222
|
+
if part.source_collection_term is None:
|
|
223
|
+
matching_terms = projects.valid_term_in_collection(
|
|
224
|
+
term,
|
|
225
|
+
self.project_id,
|
|
226
|
+
part.source_collection)
|
|
227
|
+
if len(matching_terms) > 0:
|
|
228
|
+
return True
|
|
229
|
+
else:
|
|
230
|
+
return False
|
|
231
|
+
else:
|
|
232
|
+
return projects.valid_term(term, self.project_id, part.source_collection,
|
|
233
|
+
part.source_collection_term).validated
|
|
247
234
|
|
|
248
235
|
def _create_report(self,
|
|
249
236
|
type: DrsType,
|
|
@@ -268,13 +255,12 @@ class DrsValidator(DrsApplication):
|
|
|
268
255
|
matching_code_mapping = dict()
|
|
269
256
|
while part_index < part_max_index:
|
|
270
257
|
term = terms[term_index]
|
|
271
|
-
part = specs.parts[part_index]
|
|
258
|
+
part: DrsPart = specs.parts[part_index]
|
|
272
259
|
if self._validate_term(term, part):
|
|
273
260
|
term_index += 1
|
|
274
261
|
part_index += 1
|
|
275
262
|
matching_code_mapping[part.__str__()] = 0
|
|
276
|
-
elif part.
|
|
277
|
-
cast(DrsCollection, part).is_required: # noqa E127
|
|
263
|
+
elif part.is_required:
|
|
278
264
|
issue: ComplianceIssue = InvalidTerm(term=term,
|
|
279
265
|
term_position=term_index+1,
|
|
280
266
|
collection_id_or_constant_value=str(part))
|
|
@@ -298,8 +284,7 @@ class DrsValidator(DrsApplication):
|
|
|
298
284
|
for index in range(part_index, part_max_index):
|
|
299
285
|
part = specs.parts[index]
|
|
300
286
|
issue = MissingTerm(collection_id=str(part), collection_position=index+1)
|
|
301
|
-
if part.
|
|
302
|
-
cast(DrsCollection, part).is_required:
|
|
287
|
+
if part.is_required:
|
|
303
288
|
errors.append(issue)
|
|
304
289
|
else:
|
|
305
290
|
warnings.append(issue)
|
|
@@ -308,8 +293,7 @@ class DrsValidator(DrsApplication):
|
|
|
308
293
|
for index in range(term_index, term_max_index):
|
|
309
294
|
term = terms[index]
|
|
310
295
|
part = specs.parts[part_index]
|
|
311
|
-
if part.
|
|
312
|
-
(not cast(DrsCollection, part).is_required) and \
|
|
296
|
+
if (not part.is_required) and \
|
|
313
297
|
matching_code_mapping[part.__str__()] < 0: # noqa E125
|
|
314
298
|
issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
|
|
315
299
|
else:
|