esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +96 -72
- esgvoc/api/data_descriptors/__init__.py +18 -12
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +3 -46
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/project_specs.py +3 -2
- esgvoc/api/projects.py +727 -446
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +29 -16
- esgvoc/api/search.py +140 -95
- esgvoc/api/universe.py +362 -156
- esgvoc/apps/__init__.py +3 -4
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +185 -198
- esgvoc/apps/drs/report.py +272 -136
- esgvoc/apps/drs/validator.py +132 -145
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/drs.py +32 -21
- esgvoc/cli/get.py +35 -31
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +0 -2
- esgvoc/cli/status.py +5 -5
- esgvoc/cli/valid.py +40 -40
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +50 -8
- esgvoc/core/db/models/universe.py +51 -12
- esgvoc/core/db/project_ingestion.py +60 -46
- esgvoc/core/db/universe_ingestion.py +58 -29
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +49 -32
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
- esgvoc-0.4.0.dist-info/RECORD +80 -0
- esgvoc/api/_utils.py +0 -39
- esgvoc/cli/config.py +0 -82
- esgvoc/core/service/settings.py +0 -73
- esgvoc/core/service/settings.toml +0 -17
- esgvoc/core/service/settings_default.toml +0 -17
- esgvoc-0.2.1.dist-info/RECORD +0 -73
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/apps/drs/generator.py
CHANGED
|
@@ -1,27 +1,25 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Iterable, Mapping, cast
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
|
-
|
|
5
|
-
from esgvoc.
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
|
|
5
|
+
from esgvoc.apps.drs.report import (
|
|
6
|
+
AssignedTerm,
|
|
7
|
+
ConflictingCollections,
|
|
8
|
+
DrsGenerationReport,
|
|
9
|
+
GenerationError,
|
|
10
|
+
GenerationIssue,
|
|
11
|
+
GenerationWarning,
|
|
12
|
+
InvalidTerm,
|
|
13
|
+
MissingTerm,
|
|
14
|
+
TooManyTermCollection,
|
|
15
|
+
)
|
|
11
16
|
from esgvoc.apps.drs.validator import DrsApplication
|
|
12
|
-
from esgvoc.
|
|
13
|
-
DrsIssue,
|
|
14
|
-
GeneratorIssue,
|
|
15
|
-
TooManyTokensCollection,
|
|
16
|
-
InvalidToken,
|
|
17
|
-
MissingToken,
|
|
18
|
-
ConflictingCollections,
|
|
19
|
-
AssignedToken)
|
|
17
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
def _get_first_item(items: set[Any]) -> Any:
|
|
23
21
|
result = None
|
|
24
|
-
for result in items:
|
|
22
|
+
for result in items: # noqa: B007
|
|
25
23
|
break
|
|
26
24
|
return result
|
|
27
25
|
|
|
@@ -35,137 +33,148 @@ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
|
|
|
35
33
|
class DrsGenerator(DrsApplication):
|
|
36
34
|
"""
|
|
37
35
|
Generate a directory, dataset id and file name expression specified by the given project from
|
|
38
|
-
a mapping of collection ids and
|
|
36
|
+
a mapping of collection ids and terms or an unordered bag of terms.
|
|
39
37
|
"""
|
|
40
|
-
|
|
41
|
-
def generate_directory_from_mapping(self, mapping: Mapping[str, str]) ->
|
|
38
|
+
|
|
39
|
+
def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
42
40
|
"""
|
|
43
|
-
Generate a directory DRS expression from a mapping of collection ids and
|
|
41
|
+
Generate a directory DRS expression from a mapping of collection ids and terms.
|
|
44
42
|
|
|
45
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
43
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
46
44
|
:type mapping: Mapping[str, str]
|
|
47
45
|
:returns: A generation report.
|
|
48
46
|
:rtype: DrsGeneratorReport
|
|
49
47
|
"""
|
|
50
48
|
return self._generate_from_mapping(mapping, self.directory_specs)
|
|
51
|
-
|
|
52
|
-
def
|
|
49
|
+
|
|
50
|
+
def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
53
51
|
"""
|
|
54
|
-
Generate a directory DRS expression from an unordered bag of
|
|
52
|
+
Generate a directory DRS expression from an unordered bag of terms.
|
|
55
53
|
|
|
56
|
-
:param
|
|
57
|
-
:type
|
|
54
|
+
:param terms: An unordered bag of terms.
|
|
55
|
+
:type terms: Iterable[str]
|
|
58
56
|
:returns: A generation report.
|
|
59
57
|
:rtype: DrsGeneratorReport
|
|
60
58
|
"""
|
|
61
|
-
return self.
|
|
59
|
+
return self._generate_from_bag_of_terms(terms, self.directory_specs)
|
|
62
60
|
|
|
63
|
-
def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) ->
|
|
61
|
+
def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
64
62
|
"""
|
|
65
|
-
Generate a dataset id DRS expression from a mapping of collection ids and
|
|
63
|
+
Generate a dataset id DRS expression from a mapping of collection ids and terms.
|
|
66
64
|
|
|
67
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
65
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
68
66
|
:type mapping: Mapping[str, str]
|
|
69
67
|
:returns: A generation report.
|
|
70
68
|
:rtype: DrsGeneratorReport
|
|
71
69
|
"""
|
|
72
70
|
return self._generate_from_mapping(mapping, self.dataset_id_specs)
|
|
73
|
-
|
|
74
|
-
def
|
|
71
|
+
|
|
72
|
+
def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
75
73
|
"""
|
|
76
|
-
Generate a dataset id DRS expression from an unordered bag of
|
|
74
|
+
Generate a dataset id DRS expression from an unordered bag of terms.
|
|
77
75
|
|
|
78
|
-
:param
|
|
79
|
-
:type
|
|
76
|
+
:param terms: An unordered bag of terms.
|
|
77
|
+
:type terms: Iterable[str]
|
|
80
78
|
:returns: A generation report.
|
|
81
79
|
:rtype: DrsGeneratorReport
|
|
82
80
|
"""
|
|
83
|
-
return self.
|
|
84
|
-
|
|
81
|
+
return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
|
|
85
82
|
|
|
86
|
-
def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) ->
|
|
83
|
+
def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
87
84
|
"""
|
|
88
|
-
Generate a file name DRS expression from a mapping of collection ids and
|
|
85
|
+
Generate a file name DRS expression from a mapping of collection ids and terms.
|
|
89
86
|
The file name extension is append automatically, according to the DRS specification,
|
|
90
|
-
so none of the
|
|
87
|
+
so none of the terms given must include the extension.
|
|
91
88
|
|
|
92
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
89
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
93
90
|
:type mapping: Mapping[str, str]
|
|
94
91
|
:returns: A generation report.
|
|
95
92
|
:rtype: DrsGeneratorReport
|
|
96
93
|
"""
|
|
97
94
|
report = self._generate_from_mapping(mapping, self.file_name_specs)
|
|
98
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
95
|
+
report.generated_drs_expression = report.generated_drs_expression + \
|
|
96
|
+
self._get_full_file_name_extension() # noqa E127
|
|
97
|
+
return report
|
|
98
|
+
|
|
99
|
+
def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
102
100
|
"""
|
|
103
|
-
Generate a file name DRS expression from an unordered bag of
|
|
101
|
+
Generate a file name DRS expression from an unordered bag of terms.
|
|
104
102
|
The file name extension is append automatically, according to the DRS specification,
|
|
105
|
-
so none of the
|
|
103
|
+
so none of the terms given must include the extension.
|
|
106
104
|
|
|
107
|
-
:param
|
|
108
|
-
:type
|
|
105
|
+
:param terms: An unordered bag of terms.
|
|
106
|
+
:type terms: Iterable[str]
|
|
109
107
|
:returns: A generation report.
|
|
110
108
|
:rtype: DrsGeneratorReport
|
|
111
109
|
"""
|
|
112
|
-
report = self.
|
|
113
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
114
|
-
|
|
110
|
+
report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
|
|
111
|
+
report.generated_drs_expression = report.generated_drs_expression + \
|
|
112
|
+
self._get_full_file_name_extension() # noqa E127
|
|
113
|
+
return report
|
|
115
114
|
|
|
116
115
|
def generate_from_mapping(self, mapping: Mapping[str, str],
|
|
117
|
-
drs_type: DrsType|str) ->
|
|
116
|
+
drs_type: DrsType | str) -> DrsGenerationReport:
|
|
118
117
|
"""
|
|
119
|
-
Generate a DRS expression from a mapping of collection ids and
|
|
118
|
+
Generate a DRS expression from a mapping of collection ids and terms.
|
|
120
119
|
|
|
121
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
120
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
122
121
|
:type mapping: Mapping[str, str]
|
|
123
122
|
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
124
123
|
:type drs_type: DrsType|str
|
|
125
124
|
:returns: A generation report.
|
|
126
125
|
:rtype: DrsGeneratorReport
|
|
127
126
|
"""
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
match drs_type:
|
|
128
|
+
case DrsType.DIRECTORY:
|
|
129
|
+
return self.generate_directory_from_mapping(mapping=mapping)
|
|
130
|
+
case DrsType.FILE_NAME:
|
|
131
|
+
return self.generate_file_name_from_mapping(mapping=mapping)
|
|
132
|
+
case DrsType.DATASET_ID:
|
|
133
|
+
return self.generate_dataset_id_from_mapping(mapping=mapping)
|
|
134
|
+
case _:
|
|
135
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
136
|
+
|
|
137
|
+
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
|
|
138
|
+
-> DrsGenerationReport: # noqa E127
|
|
136
139
|
"""
|
|
137
|
-
Generate a DRS expression from an unordered bag of
|
|
140
|
+
Generate a DRS expression from an unordered bag of terms.
|
|
138
141
|
|
|
139
|
-
:param
|
|
140
|
-
:type
|
|
142
|
+
:param terms: An unordered bag of terms.
|
|
143
|
+
:type terms: Iterable[str]
|
|
141
144
|
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
142
145
|
:type drs_type: DrsType|str
|
|
143
146
|
:returns: A generation report.
|
|
144
147
|
:rtype: DrsGeneratorReport
|
|
145
148
|
"""
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
+
match drs_type:
|
|
150
|
+
case DrsType.DIRECTORY:
|
|
151
|
+
return self.generate_directory_from_bag_of_terms(terms=terms)
|
|
152
|
+
case DrsType.FILE_NAME:
|
|
153
|
+
return self.generate_file_name_from_bag_of_terms(terms=terms)
|
|
154
|
+
case DrsType.DATASET_ID:
|
|
155
|
+
return self.generate_dataset_id_from_bag_of_terms(terms=terms)
|
|
156
|
+
case _:
|
|
157
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
149
158
|
|
|
150
159
|
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
|
|
151
|
-
|
|
160
|
+
-> DrsGenerationReport: # noqa E127
|
|
152
161
|
drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
|
|
153
162
|
if self.pedantic:
|
|
154
163
|
errors.extend(warnings)
|
|
155
164
|
warnings.clear()
|
|
156
|
-
return
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
165
|
+
return DrsGenerationReport(project_id=self.project_id, type=specs.type,
|
|
166
|
+
given_mapping_or_bag_of_terms=mapping,
|
|
167
|
+
mapping_used=mapping,
|
|
168
|
+
generated_drs_expression=drs_expression,
|
|
169
|
+
errors=cast(list[GenerationError], errors),
|
|
170
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
162
171
|
|
|
163
172
|
def __generate_from_mapping(self, mapping: Mapping[str, str],
|
|
164
173
|
specs: DrsSpecification,
|
|
165
|
-
has_to_valid_terms: bool)\
|
|
166
|
-
|
|
167
|
-
errors: list[
|
|
168
|
-
warnings: list[
|
|
174
|
+
has_to_valid_terms: bool) \
|
|
175
|
+
-> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
|
|
176
|
+
errors: list[GenerationIssue] = list()
|
|
177
|
+
warnings: list[GenerationIssue] = list()
|
|
169
178
|
drs_expression = ""
|
|
170
179
|
part_position: int = 0
|
|
171
180
|
for part in specs.parts:
|
|
@@ -180,65 +189,65 @@ class DrsGenerator(DrsApplication):
|
|
|
180
189
|
self.project_id,
|
|
181
190
|
collection_id)
|
|
182
191
|
if not matching_terms:
|
|
183
|
-
issue =
|
|
184
|
-
|
|
185
|
-
|
|
192
|
+
issue = InvalidTerm(term=part_value,
|
|
193
|
+
term_position=part_position,
|
|
194
|
+
collection_id_or_constant_value=collection_id)
|
|
186
195
|
errors.append(issue)
|
|
187
|
-
part_value =
|
|
196
|
+
part_value = DrsGenerationReport.INVALID_TAG
|
|
188
197
|
else:
|
|
189
|
-
other_issue =
|
|
190
|
-
|
|
198
|
+
other_issue = MissingTerm(collection_id=collection_id,
|
|
199
|
+
collection_position=part_position)
|
|
191
200
|
if collection_part.is_required:
|
|
192
201
|
errors.append(other_issue)
|
|
193
|
-
part_value =
|
|
202
|
+
part_value = DrsGenerationReport.MISSING_TAG
|
|
194
203
|
else:
|
|
195
204
|
warnings.append(other_issue)
|
|
196
|
-
continue
|
|
205
|
+
continue # The for loop.
|
|
197
206
|
else:
|
|
198
207
|
constant_part = cast(DrsConstant, part)
|
|
199
208
|
part_value = constant_part.value
|
|
200
|
-
|
|
209
|
+
|
|
201
210
|
drs_expression += part_value + specs.separator
|
|
202
|
-
|
|
211
|
+
|
|
203
212
|
drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
|
|
204
213
|
return drs_expression, errors, warnings
|
|
205
214
|
|
|
206
|
-
def
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
for
|
|
210
|
-
matching_terms = projects.valid_term_in_project(
|
|
215
|
+
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
|
|
216
|
+
-> DrsGenerationReport: # noqa E127
|
|
217
|
+
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
218
|
+
for term in terms:
|
|
219
|
+
matching_terms = projects.valid_term_in_project(term, self.project_id)
|
|
211
220
|
for matching_term in matching_terms:
|
|
212
|
-
if matching_term.collection_id not in
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
mapping, errors = DrsGenerator.
|
|
221
|
+
if matching_term.collection_id not in collection_terms_mapping:
|
|
222
|
+
collection_terms_mapping[matching_term.collection_id] = set()
|
|
223
|
+
collection_terms_mapping[matching_term.collection_id].add(term)
|
|
224
|
+
collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
|
|
225
|
+
mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
|
|
217
226
|
drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
|
|
218
227
|
errors.extend(errs)
|
|
219
228
|
warnings.extend(warns)
|
|
220
229
|
if self.pedantic:
|
|
221
230
|
errors.extend(warnings)
|
|
222
231
|
warnings.clear()
|
|
223
|
-
return
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
232
|
+
return DrsGenerationReport(project_id=self.project_id, type=specs.type,
|
|
233
|
+
given_mapping_or_bag_of_terms=terms,
|
|
234
|
+
mapping_used=mapping, generated_drs_expression=drs_expression,
|
|
235
|
+
errors=cast(list[GenerationError], errors),
|
|
236
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
237
|
+
|
|
229
238
|
@staticmethod
|
|
230
|
-
def _resolve_conflicts(
|
|
231
|
-
|
|
232
|
-
warnings: list[
|
|
239
|
+
def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
|
|
240
|
+
-> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
|
|
241
|
+
warnings: list[GenerationIssue] = list()
|
|
233
242
|
conflicting_collection_ids_list: list[list[str]] = list()
|
|
234
|
-
collection_ids: list[str] = list(
|
|
243
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
235
244
|
len_collection_ids: int = len(collection_ids)
|
|
236
|
-
|
|
245
|
+
|
|
237
246
|
for l_collection_index in range(0, len_collection_ids - 1):
|
|
238
247
|
conflicting_collection_ids: list[str] = list()
|
|
239
248
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
240
|
-
if
|
|
241
|
-
|
|
249
|
+
if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
|
|
250
|
+
collection_terms_mapping[collection_ids[r_collection_index]]):
|
|
242
251
|
continue
|
|
243
252
|
else:
|
|
244
253
|
not_registered = True
|
|
@@ -256,105 +265,102 @@ class DrsGenerator(DrsApplication):
|
|
|
256
265
|
# Each time a collection is resolved, we must restart the loop so as to check if others can be,
|
|
257
266
|
# until no progress is made.
|
|
258
267
|
while True:
|
|
259
|
-
# 1. Non-conflicting collections with only one
|
|
260
|
-
# Non-conflicting collections with more than one
|
|
268
|
+
# 1. Non-conflicting collections with only one term are assigned.
|
|
269
|
+
# Non-conflicting collections with more than one term will be raise an error
|
|
261
270
|
# in the _check method.
|
|
262
|
-
|
|
271
|
+
|
|
263
272
|
# Nothing to do.
|
|
264
273
|
|
|
265
|
-
# 2a. Collections with one
|
|
266
|
-
# We don't search for collection with more than one
|
|
267
|
-
# the same, because we cannot choose which
|
|
268
|
-
# So stick with one
|
|
274
|
+
# 2a. Collections with one term that are conflicting to each other will raise an error.
|
|
275
|
+
# We don't search for collection with more than one term which term sets are exactly
|
|
276
|
+
# the same, because we cannot choose which term will be removed in 2b.
|
|
277
|
+
# So stick with one term collections: those collection will be detected in method _check.
|
|
269
278
|
collection_ids_with_len_eq_1_list: list[list[str]] = list()
|
|
270
279
|
for collection_ids in conflicting_collection_ids_list:
|
|
271
280
|
tmp_conflicting_collection_ids: list[str] = list()
|
|
272
281
|
for collection_id in collection_ids:
|
|
273
|
-
if len(
|
|
282
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
274
283
|
tmp_conflicting_collection_ids.append(collection_id)
|
|
275
284
|
if len(tmp_conflicting_collection_ids) > 1:
|
|
276
285
|
collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
|
|
277
|
-
# 2b. As it is not possible to resolve collections sharing the same unique
|
|
278
|
-
# raise errors, remove the faulty collections and their
|
|
286
|
+
# 2b. As it is not possible to resolve collections sharing the same unique term:
|
|
287
|
+
# raise errors, remove the faulty collections and their term.
|
|
279
288
|
if collection_ids_with_len_eq_1_list:
|
|
280
289
|
for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
|
|
281
290
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
282
291
|
collection_ids_to_be_removed)
|
|
283
|
-
DrsGenerator.
|
|
284
|
-
|
|
292
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
293
|
+
collection_ids_to_be_removed)
|
|
285
294
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
286
295
|
# as conflicting collections may be resolved.
|
|
287
296
|
continue
|
|
288
297
|
|
|
289
|
-
# 3.a For each collections with only one
|
|
290
|
-
# collections with more than one
|
|
298
|
+
# 3.a For each collections with only one term, assign their term to the detriment of
|
|
299
|
+
# collections with more than one term.
|
|
291
300
|
wining_collection_ids: list[str] = list()
|
|
292
301
|
for collection_ids in conflicting_collection_ids_list:
|
|
293
302
|
for collection_id in collection_ids:
|
|
294
|
-
if len(
|
|
303
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
295
304
|
wining_collection_ids.append(collection_id)
|
|
296
|
-
|
|
297
|
-
issue =
|
|
305
|
+
term = _get_first_item(collection_terms_mapping[collection_id])
|
|
306
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
298
307
|
warnings.append(issue)
|
|
299
308
|
# 3.b Update conflicting collections.
|
|
300
309
|
if wining_collection_ids:
|
|
301
310
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
302
311
|
wining_collection_ids)
|
|
303
|
-
DrsGenerator.
|
|
304
|
-
|
|
312
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
313
|
+
wining_collection_ids)
|
|
305
314
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
306
315
|
# as conflicting collections may be resolved.
|
|
307
316
|
continue
|
|
308
317
|
|
|
309
|
-
# 4.a For each
|
|
310
|
-
# If the difference is one
|
|
311
|
-
|
|
318
|
+
# 4.a For each term set of the remaining conflicting collections, compute their difference.
|
|
319
|
+
# If the difference is one term, this term is assigned to the collection that owns it.
|
|
320
|
+
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
312
321
|
for collection_ids in conflicting_collection_ids_list:
|
|
313
322
|
for collection_index in range(0, len(collection_ids)):
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
collection_ids[:collection_index]
|
|
319
|
-
]
|
|
320
|
-
)
|
|
323
|
+
collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
|
|
324
|
+
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
|
|
325
|
+
.difference(*[collection_terms_mapping[index] # noqa E127
|
|
326
|
+
for index in collection_set])
|
|
321
327
|
if len(diff) == 1:
|
|
322
|
-
|
|
328
|
+
wining_id_and_term_pairs.append((collection_ids[collection_index],
|
|
323
329
|
_get_first_item(diff)))
|
|
324
330
|
# 4.b Update conflicting collections.
|
|
325
|
-
if
|
|
331
|
+
if wining_id_and_term_pairs:
|
|
326
332
|
wining_collection_ids = list()
|
|
327
|
-
for collection_id,
|
|
333
|
+
for collection_id, term in wining_id_and_term_pairs:
|
|
328
334
|
wining_collection_ids.append(collection_id)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
issue =
|
|
335
|
+
collection_terms_mapping[collection_id].clear()
|
|
336
|
+
collection_terms_mapping[collection_id].add(term)
|
|
337
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
332
338
|
warnings.append(issue)
|
|
333
339
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
334
340
|
wining_collection_ids)
|
|
335
|
-
DrsGenerator.
|
|
341
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
336
342
|
wining_collection_ids)
|
|
337
343
|
continue
|
|
338
344
|
else:
|
|
339
|
-
break
|
|
340
|
-
return
|
|
345
|
+
break # Stop the loop when no progress is made.
|
|
346
|
+
return collection_terms_mapping, warnings
|
|
341
347
|
|
|
342
348
|
@staticmethod
|
|
343
|
-
def
|
|
344
|
-
|
|
345
|
-
errors: list[
|
|
346
|
-
# 1. Looking for collections that share strictly the same
|
|
347
|
-
collection_ids: list[str] = list(
|
|
349
|
+
def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
|
|
350
|
+
-> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
|
|
351
|
+
errors: list[GenerationIssue] = list()
|
|
352
|
+
# 1. Looking for collections that share strictly the same term(s).
|
|
353
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
348
354
|
len_collection_ids: int = len(collection_ids)
|
|
349
355
|
faulty_collections_list: list[set[str]] = list()
|
|
350
356
|
for l_collection_index in range(0, len_collection_ids - 1):
|
|
351
357
|
l_collection_id = collection_ids[l_collection_index]
|
|
352
|
-
|
|
358
|
+
l_term_set = collection_terms_mapping[l_collection_id]
|
|
353
359
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
354
360
|
r_collection_id = collection_ids[r_collection_index]
|
|
355
|
-
|
|
356
|
-
#
|
|
357
|
-
if
|
|
361
|
+
r_term_set = collection_terms_mapping[r_collection_id]
|
|
362
|
+
# Check if the set is empty because the difference will always be an empty set!
|
|
363
|
+
if l_term_set and (not l_term_set.difference(r_term_set)):
|
|
358
364
|
not_registered = True
|
|
359
365
|
for faulty_collections in faulty_collections_list:
|
|
360
366
|
if l_collection_id in faulty_collections or \
|
|
@@ -366,35 +372,35 @@ class DrsGenerator(DrsApplication):
|
|
|
366
372
|
if not_registered:
|
|
367
373
|
faulty_collections_list.append({l_collection_id, r_collection_id})
|
|
368
374
|
for faulty_collections in faulty_collections_list:
|
|
369
|
-
|
|
375
|
+
terms = collection_terms_mapping[_get_first_item(faulty_collections)]
|
|
370
376
|
issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
|
|
371
|
-
|
|
377
|
+
terms=_transform_set_and_sort(terms))
|
|
372
378
|
errors.append(issue)
|
|
373
379
|
for collection_id in faulty_collections:
|
|
374
|
-
del
|
|
375
|
-
|
|
376
|
-
# 2. Looking for collections with more than one
|
|
380
|
+
del collection_terms_mapping[collection_id]
|
|
381
|
+
|
|
382
|
+
# 2. Looking for collections with more than one term.
|
|
377
383
|
result: dict[str, str] = dict()
|
|
378
|
-
for collection_id,
|
|
379
|
-
|
|
380
|
-
if
|
|
381
|
-
result[collection_id] = _get_first_item(
|
|
382
|
-
elif
|
|
383
|
-
other_issue =
|
|
384
|
-
|
|
384
|
+
for collection_id, term_set in collection_terms_mapping.items():
|
|
385
|
+
len_term_set = len(term_set)
|
|
386
|
+
if len_term_set == 1:
|
|
387
|
+
result[collection_id] = _get_first_item(term_set)
|
|
388
|
+
elif len_term_set > 1:
|
|
389
|
+
other_issue = TooManyTermCollection(collection_id=collection_id,
|
|
390
|
+
terms=_transform_set_and_sort(term_set))
|
|
385
391
|
errors.append(other_issue)
|
|
386
|
-
#else: Don't add emptied collection to the result.
|
|
392
|
+
# else: Don't add emptied collection to the result.
|
|
387
393
|
return result, errors
|
|
388
394
|
|
|
389
395
|
@staticmethod
|
|
390
|
-
def
|
|
396
|
+
def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
|
|
391
397
|
collection_ids_to_be_removed: list[str]) -> None:
|
|
392
398
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
393
|
-
# Should only be one
|
|
394
|
-
|
|
395
|
-
for collection_id in
|
|
399
|
+
# Should only be one term.
|
|
400
|
+
term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
|
|
401
|
+
for collection_id in collection_terms_mapping.keys():
|
|
396
402
|
if (collection_id not in collection_ids_to_be_removed):
|
|
397
|
-
|
|
403
|
+
collection_terms_mapping[collection_id].discard(term_to_be_removed)
|
|
398
404
|
|
|
399
405
|
@staticmethod
|
|
400
406
|
def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
|
|
@@ -403,22 +409,3 @@ class DrsGenerator(DrsApplication):
|
|
|
403
409
|
for conflicting_collection_ids in conflicting_collection_ids_list:
|
|
404
410
|
if collection_id_to_be_removed in conflicting_collection_ids:
|
|
405
411
|
conflicting_collection_ids.remove(collection_id_to_be_removed)
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
if __name__ == "__main__":
|
|
409
|
-
project_id = 'cmip6plus'
|
|
410
|
-
generator = DrsGenerator(project_id)
|
|
411
|
-
mapping = \
|
|
412
|
-
{
|
|
413
|
-
'member_id': 'r2i2p1f2',
|
|
414
|
-
'activity_id': 'CMIP',
|
|
415
|
-
'source_id': 'MIROC6',
|
|
416
|
-
'mip_era': 'CMIP6Plus',
|
|
417
|
-
'experiment_id': 'amip',
|
|
418
|
-
'variable_id': 'od550aer',
|
|
419
|
-
'table_id': 'ACmon',
|
|
420
|
-
'grid_label': 'gn',
|
|
421
|
-
'institution_id': 'IPSL',
|
|
422
|
-
}
|
|
423
|
-
report = generator.generate_file_name_from_mapping(mapping)
|
|
424
|
-
print(report.warnings)
|