esgvoc 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +23 -34
- esgvoc/api/_utils.py +28 -14
- esgvoc/api/data_descriptors/__init__.py +18 -12
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +3 -46
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/projects.py +187 -171
- esgvoc/api/report.py +21 -12
- esgvoc/api/search.py +3 -1
- esgvoc/api/universe.py +44 -34
- esgvoc/apps/__init__.py +3 -4
- esgvoc/apps/drs/generator.py +166 -161
- esgvoc/apps/drs/report.py +222 -131
- esgvoc/apps/drs/validator.py +103 -105
- esgvoc/cli/drs.py +29 -19
- esgvoc/cli/get.py +26 -25
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +0 -2
- esgvoc/cli/status.py +5 -5
- esgvoc/cli/valid.py +40 -40
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +1 -1
- esgvoc/core/db/universe_ingestion.py +6 -5
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +49 -32
- {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/METADATA +34 -3
- esgvoc-0.3.0.dist-info/RECORD +78 -0
- esgvoc/cli/config.py +0 -82
- esgvoc/core/service/settings.py +0 -73
- esgvoc/core/service/settings.toml +0 -17
- esgvoc/core/service/settings_default.toml +0 -17
- esgvoc-0.2.1.dist-info/RECORD +0 -73
- {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/apps/drs/generator.py
CHANGED
|
@@ -1,22 +1,14 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Iterable, Mapping, cast
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPartKind,
|
|
5
|
+
DrsSpecification, DrsType)
|
|
6
|
+
from esgvoc.apps.drs.report import (AssignedTerm, ConflictingCollections,
|
|
7
|
+
DrsGenerationReport, GenerationError,
|
|
8
|
+
GenerationIssue, GenerationWarning,
|
|
9
|
+
InvalidTerm, MissingTerm,
|
|
10
|
+
TooManyTermCollection)
|
|
11
11
|
from esgvoc.apps.drs.validator import DrsApplication
|
|
12
|
-
from esgvoc.apps.drs.report import (DrsGeneratorReport,
|
|
13
|
-
DrsIssue,
|
|
14
|
-
GeneratorIssue,
|
|
15
|
-
TooManyTokensCollection,
|
|
16
|
-
InvalidToken,
|
|
17
|
-
MissingToken,
|
|
18
|
-
ConflictingCollections,
|
|
19
|
-
AssignedToken)
|
|
20
12
|
|
|
21
13
|
|
|
22
14
|
def _get_first_item(items: set[Any]) -> Any:
|
|
@@ -35,137 +27,150 @@ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
|
|
|
35
27
|
class DrsGenerator(DrsApplication):
|
|
36
28
|
"""
|
|
37
29
|
Generate a directory, dataset id and file name expression specified by the given project from
|
|
38
|
-
a mapping of collection ids and
|
|
30
|
+
a mapping of collection ids and terms or an unordered bag of terms.
|
|
39
31
|
"""
|
|
40
|
-
|
|
41
|
-
def generate_directory_from_mapping(self, mapping: Mapping[str, str]) ->
|
|
32
|
+
|
|
33
|
+
def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
42
34
|
"""
|
|
43
|
-
Generate a directory DRS expression from a mapping of collection ids and
|
|
35
|
+
Generate a directory DRS expression from a mapping of collection ids and terms.
|
|
44
36
|
|
|
45
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
37
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
46
38
|
:type mapping: Mapping[str, str]
|
|
47
39
|
:returns: A generation report.
|
|
48
40
|
:rtype: DrsGeneratorReport
|
|
49
41
|
"""
|
|
50
42
|
return self._generate_from_mapping(mapping, self.directory_specs)
|
|
51
|
-
|
|
52
|
-
def
|
|
43
|
+
|
|
44
|
+
def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
53
45
|
"""
|
|
54
|
-
Generate a directory DRS expression from an unordered bag of
|
|
46
|
+
Generate a directory DRS expression from an unordered bag of terms.
|
|
55
47
|
|
|
56
|
-
:param
|
|
57
|
-
:type
|
|
48
|
+
:param terms: An unordered bag of terms.
|
|
49
|
+
:type terms: Iterable[str]
|
|
58
50
|
:returns: A generation report.
|
|
59
51
|
:rtype: DrsGeneratorReport
|
|
60
52
|
"""
|
|
61
|
-
return self.
|
|
53
|
+
return self._generate_from_bag_of_terms(terms, self.directory_specs)
|
|
62
54
|
|
|
63
|
-
def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) ->
|
|
55
|
+
def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
64
56
|
"""
|
|
65
|
-
Generate a dataset id DRS expression from a mapping of collection ids and
|
|
57
|
+
Generate a dataset id DRS expression from a mapping of collection ids and terms.
|
|
66
58
|
|
|
67
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
59
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
68
60
|
:type mapping: Mapping[str, str]
|
|
69
61
|
:returns: A generation report.
|
|
70
62
|
:rtype: DrsGeneratorReport
|
|
71
63
|
"""
|
|
72
64
|
return self._generate_from_mapping(mapping, self.dataset_id_specs)
|
|
73
|
-
|
|
74
|
-
def
|
|
65
|
+
|
|
66
|
+
def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
75
67
|
"""
|
|
76
|
-
Generate a dataset id DRS expression from an unordered bag of
|
|
68
|
+
Generate a dataset id DRS expression from an unordered bag of terms.
|
|
77
69
|
|
|
78
|
-
:param
|
|
79
|
-
:type
|
|
70
|
+
:param terms: An unordered bag of terms.
|
|
71
|
+
:type terms: Iterable[str]
|
|
80
72
|
:returns: A generation report.
|
|
81
73
|
:rtype: DrsGeneratorReport
|
|
82
74
|
"""
|
|
83
|
-
return self.
|
|
84
|
-
|
|
75
|
+
return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
|
|
85
76
|
|
|
86
|
-
|
|
77
|
+
|
|
78
|
+
def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
87
79
|
"""
|
|
88
|
-
Generate a file name DRS expression from a mapping of collection ids and
|
|
80
|
+
Generate a file name DRS expression from a mapping of collection ids and terms.
|
|
89
81
|
The file name extension is append automatically, according to the DRS specification,
|
|
90
|
-
so none of the
|
|
82
|
+
so none of the terms given must include the extension.
|
|
91
83
|
|
|
92
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
84
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
93
85
|
:type mapping: Mapping[str, str]
|
|
94
86
|
:returns: A generation report.
|
|
95
87
|
:rtype: DrsGeneratorReport
|
|
96
88
|
"""
|
|
97
89
|
report = self._generate_from_mapping(mapping, self.file_name_specs)
|
|
98
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
90
|
+
report.generated_drs_expression = report.generated_drs_expression + \
|
|
91
|
+
self._get_full_file_name_extension()
|
|
92
|
+
return report
|
|
93
|
+
|
|
94
|
+
def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
102
95
|
"""
|
|
103
|
-
Generate a file name DRS expression from an unordered bag of
|
|
96
|
+
Generate a file name DRS expression from an unordered bag of terms.
|
|
104
97
|
The file name extension is append automatically, according to the DRS specification,
|
|
105
|
-
so none of the
|
|
98
|
+
so none of the terms given must include the extension.
|
|
106
99
|
|
|
107
|
-
:param
|
|
108
|
-
:type
|
|
100
|
+
:param terms: An unordered bag of terms.
|
|
101
|
+
:type terms: Iterable[str]
|
|
109
102
|
:returns: A generation report.
|
|
110
103
|
:rtype: DrsGeneratorReport
|
|
111
104
|
"""
|
|
112
|
-
report = self.
|
|
113
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
114
|
-
|
|
105
|
+
report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
|
|
106
|
+
report.generated_drs_expression = report.generated_drs_expression + \
|
|
107
|
+
self._get_full_file_name_extension()
|
|
108
|
+
return report
|
|
115
109
|
|
|
116
110
|
def generate_from_mapping(self, mapping: Mapping[str, str],
|
|
117
|
-
drs_type: DrsType|str) ->
|
|
111
|
+
drs_type: DrsType|str) -> DrsGenerationReport:
|
|
118
112
|
"""
|
|
119
|
-
Generate a DRS expression from a mapping of collection ids and
|
|
113
|
+
Generate a DRS expression from a mapping of collection ids and terms.
|
|
120
114
|
|
|
121
|
-
:param mapping: A mapping of collection ids (keys) and
|
|
115
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
122
116
|
:type mapping: Mapping[str, str]
|
|
123
117
|
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
124
118
|
:type drs_type: DrsType|str
|
|
125
119
|
:returns: A generation report.
|
|
126
120
|
:rtype: DrsGeneratorReport
|
|
127
121
|
"""
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
122
|
+
match drs_type:
|
|
123
|
+
case DrsType.DIRECTORY:
|
|
124
|
+
return self.generate_directory_from_mapping(mapping=mapping)
|
|
125
|
+
case DrsType.FILE_NAME:
|
|
126
|
+
return self.generate_file_name_from_mapping(mapping=mapping)
|
|
127
|
+
case DrsType.DATASET_ID:
|
|
128
|
+
return self.generate_dataset_id_from_mapping(mapping=mapping)
|
|
129
|
+
case _:
|
|
130
|
+
raise RuntimeError(f'unsupported drs type {drs_type}')
|
|
131
|
+
|
|
132
|
+
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType|str) \
|
|
133
|
+
-> DrsGenerationReport:
|
|
136
134
|
"""
|
|
137
|
-
Generate a DRS expression from an unordered bag of
|
|
135
|
+
Generate a DRS expression from an unordered bag of terms.
|
|
138
136
|
|
|
139
|
-
:param
|
|
140
|
-
:type
|
|
137
|
+
:param terms: An unordered bag of terms.
|
|
138
|
+
:type terms: Iterable[str]
|
|
141
139
|
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
142
140
|
:type drs_type: DrsType|str
|
|
143
141
|
:returns: A generation report.
|
|
144
142
|
:rtype: DrsGeneratorReport
|
|
145
143
|
"""
|
|
146
|
-
|
|
147
|
-
|
|
144
|
+
match drs_type:
|
|
145
|
+
case DrsType.DIRECTORY:
|
|
146
|
+
return self.generate_directory_from_bag_of_terms(terms=terms)
|
|
147
|
+
case DrsType.FILE_NAME:
|
|
148
|
+
return self.generate_file_name_from_bag_of_terms(terms=terms)
|
|
149
|
+
case DrsType.DATASET_ID:
|
|
150
|
+
return self.generate_dataset_id_from_bag_of_terms(terms=terms)
|
|
151
|
+
case _:
|
|
152
|
+
raise RuntimeError(f'unsupported drs type {drs_type}')
|
|
148
153
|
|
|
149
154
|
|
|
150
155
|
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
|
|
151
|
-
->
|
|
156
|
+
-> DrsGenerationReport:
|
|
152
157
|
drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
|
|
153
158
|
if self.pedantic:
|
|
154
159
|
errors.extend(warnings)
|
|
155
160
|
warnings.clear()
|
|
156
|
-
return
|
|
157
|
-
|
|
161
|
+
return DrsGenerationReport(project_id=self.project_id, type=specs.type,
|
|
162
|
+
given_mapping_or_bag_of_terms=mapping,
|
|
158
163
|
mapping_used=mapping,
|
|
159
164
|
generated_drs_expression=drs_expression,
|
|
160
|
-
errors=cast(list[
|
|
161
|
-
warnings=cast(list[
|
|
165
|
+
errors=cast(list[GenerationError], errors),
|
|
166
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
162
167
|
|
|
163
168
|
def __generate_from_mapping(self, mapping: Mapping[str, str],
|
|
164
169
|
specs: DrsSpecification,
|
|
165
170
|
has_to_valid_terms: bool)\
|
|
166
|
-
-> tuple[str, list[
|
|
167
|
-
errors: list[
|
|
168
|
-
warnings: list[
|
|
171
|
+
-> tuple[str, list[GenerationIssue], list[GenerationIssue]]:
|
|
172
|
+
errors: list[GenerationIssue] = list()
|
|
173
|
+
warnings: list[GenerationIssue] = list()
|
|
169
174
|
drs_expression = ""
|
|
170
175
|
part_position: int = 0
|
|
171
176
|
for part in specs.parts:
|
|
@@ -180,65 +185,65 @@ class DrsGenerator(DrsApplication):
|
|
|
180
185
|
self.project_id,
|
|
181
186
|
collection_id)
|
|
182
187
|
if not matching_terms:
|
|
183
|
-
issue =
|
|
184
|
-
|
|
188
|
+
issue = InvalidTerm(term=part_value,
|
|
189
|
+
term_position=part_position,
|
|
185
190
|
collection_id_or_constant_value=collection_id)
|
|
186
191
|
errors.append(issue)
|
|
187
|
-
part_value =
|
|
192
|
+
part_value = DrsGenerationReport.INVALID_TAG
|
|
188
193
|
else:
|
|
189
|
-
other_issue =
|
|
194
|
+
other_issue = MissingTerm(collection_id=collection_id,
|
|
190
195
|
collection_position=part_position)
|
|
191
196
|
if collection_part.is_required:
|
|
192
197
|
errors.append(other_issue)
|
|
193
|
-
part_value =
|
|
198
|
+
part_value = DrsGenerationReport.MISSING_TAG
|
|
194
199
|
else:
|
|
195
200
|
warnings.append(other_issue)
|
|
196
201
|
continue # The for loop.
|
|
197
202
|
else:
|
|
198
203
|
constant_part = cast(DrsConstant, part)
|
|
199
204
|
part_value = constant_part.value
|
|
200
|
-
|
|
205
|
+
|
|
201
206
|
drs_expression += part_value + specs.separator
|
|
202
|
-
|
|
207
|
+
|
|
203
208
|
drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
|
|
204
209
|
return drs_expression, errors, warnings
|
|
205
210
|
|
|
206
|
-
def
|
|
207
|
-
->
|
|
208
|
-
|
|
209
|
-
for
|
|
210
|
-
matching_terms = projects.valid_term_in_project(
|
|
211
|
+
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
|
|
212
|
+
-> DrsGenerationReport:
|
|
213
|
+
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
214
|
+
for term in terms:
|
|
215
|
+
matching_terms = projects.valid_term_in_project(term, self.project_id)
|
|
211
216
|
for matching_term in matching_terms:
|
|
212
|
-
if matching_term.collection_id not in
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
mapping, errors = DrsGenerator.
|
|
217
|
+
if matching_term.collection_id not in collection_terms_mapping:
|
|
218
|
+
collection_terms_mapping[matching_term.collection_id] = set()
|
|
219
|
+
collection_terms_mapping[matching_term.collection_id].add(term)
|
|
220
|
+
collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
|
|
221
|
+
mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
|
|
217
222
|
drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
|
|
218
223
|
errors.extend(errs)
|
|
219
224
|
warnings.extend(warns)
|
|
220
225
|
if self.pedantic:
|
|
221
226
|
errors.extend(warnings)
|
|
222
227
|
warnings.clear()
|
|
223
|
-
return
|
|
224
|
-
|
|
228
|
+
return DrsGenerationReport(project_id=self.project_id, type=specs.type,
|
|
229
|
+
given_mapping_or_bag_of_terms=terms,
|
|
225
230
|
mapping_used=mapping,generated_drs_expression=drs_expression,
|
|
226
|
-
errors=cast(list[
|
|
227
|
-
warnings=cast(list[
|
|
228
|
-
|
|
231
|
+
errors=cast(list[GenerationError], errors),
|
|
232
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
233
|
+
|
|
229
234
|
@staticmethod
|
|
230
|
-
def _resolve_conflicts(
|
|
231
|
-
-> tuple[dict[str, set[str]], list[
|
|
232
|
-
warnings: list[
|
|
235
|
+
def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
|
|
236
|
+
-> tuple[dict[str, set[str]], list[GenerationIssue]]:
|
|
237
|
+
warnings: list[GenerationIssue] = list()
|
|
233
238
|
conflicting_collection_ids_list: list[list[str]] = list()
|
|
234
|
-
collection_ids: list[str] = list(
|
|
239
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
235
240
|
len_collection_ids: int = len(collection_ids)
|
|
236
|
-
|
|
241
|
+
|
|
237
242
|
for l_collection_index in range(0, len_collection_ids - 1):
|
|
238
243
|
conflicting_collection_ids: list[str] = list()
|
|
239
244
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
240
|
-
if
|
|
241
|
-
(
|
|
245
|
+
if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint \
|
|
246
|
+
(collection_terms_mapping[collection_ids[r_collection_index]]):
|
|
242
247
|
continue
|
|
243
248
|
else:
|
|
244
249
|
not_registered = True
|
|
@@ -256,105 +261,105 @@ class DrsGenerator(DrsApplication):
|
|
|
256
261
|
# Each time a collection is resolved, we must restart the loop so as to check if others can be,
|
|
257
262
|
# until no progress is made.
|
|
258
263
|
while True:
|
|
259
|
-
# 1. Non-conflicting collections with only one
|
|
260
|
-
# Non-conflicting collections with more than one
|
|
264
|
+
# 1. Non-conflicting collections with only one term are assigned.
|
|
265
|
+
# Non-conflicting collections with more than one term will be raise an error
|
|
261
266
|
# in the _check method.
|
|
262
|
-
|
|
267
|
+
|
|
263
268
|
# Nothing to do.
|
|
264
269
|
|
|
265
|
-
# 2a. Collections with one
|
|
266
|
-
# We don't search for collection with more than one
|
|
267
|
-
# the same, because we cannot choose which
|
|
268
|
-
# So stick with one
|
|
270
|
+
# 2a. Collections with one term that are conflicting to each other will raise an error.
|
|
271
|
+
# We don't search for collection with more than one term which term sets are exactly
|
|
272
|
+
# the same, because we cannot choose which term will be removed in 2b.
|
|
273
|
+
# So stick with one term collections: those collection will be detected in method _check.
|
|
269
274
|
collection_ids_with_len_eq_1_list: list[list[str]] = list()
|
|
270
275
|
for collection_ids in conflicting_collection_ids_list:
|
|
271
276
|
tmp_conflicting_collection_ids: list[str] = list()
|
|
272
277
|
for collection_id in collection_ids:
|
|
273
|
-
if len(
|
|
278
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
274
279
|
tmp_conflicting_collection_ids.append(collection_id)
|
|
275
280
|
if len(tmp_conflicting_collection_ids) > 1:
|
|
276
281
|
collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
|
|
277
|
-
# 2b. As it is not possible to resolve collections sharing the same unique
|
|
278
|
-
# raise errors, remove the faulty collections and their
|
|
282
|
+
# 2b. As it is not possible to resolve collections sharing the same unique term:
|
|
283
|
+
# raise errors, remove the faulty collections and their term.
|
|
279
284
|
if collection_ids_with_len_eq_1_list:
|
|
280
285
|
for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
|
|
281
286
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
282
287
|
collection_ids_to_be_removed)
|
|
283
|
-
DrsGenerator.
|
|
288
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
284
289
|
collection_ids_to_be_removed)
|
|
285
290
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
286
291
|
# as conflicting collections may be resolved.
|
|
287
292
|
continue
|
|
288
293
|
|
|
289
|
-
# 3.a For each collections with only one
|
|
290
|
-
# collections with more than one
|
|
294
|
+
# 3.a For each collections with only one term, assign their term to the detriment of
|
|
295
|
+
# collections with more than one term.
|
|
291
296
|
wining_collection_ids: list[str] = list()
|
|
292
297
|
for collection_ids in conflicting_collection_ids_list:
|
|
293
298
|
for collection_id in collection_ids:
|
|
294
|
-
if len(
|
|
299
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
295
300
|
wining_collection_ids.append(collection_id)
|
|
296
|
-
|
|
297
|
-
issue =
|
|
301
|
+
term = _get_first_item(collection_terms_mapping[collection_id])
|
|
302
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
298
303
|
warnings.append(issue)
|
|
299
304
|
# 3.b Update conflicting collections.
|
|
300
305
|
if wining_collection_ids:
|
|
301
306
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
302
307
|
wining_collection_ids)
|
|
303
|
-
DrsGenerator.
|
|
308
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
304
309
|
wining_collection_ids)
|
|
305
310
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
306
311
|
# as conflicting collections may be resolved.
|
|
307
312
|
continue
|
|
308
313
|
|
|
309
|
-
# 4.a For each
|
|
310
|
-
# If the difference is one
|
|
311
|
-
|
|
314
|
+
# 4.a For each term set of the remaining conflicting collections, compute their difference.
|
|
315
|
+
# If the difference is one term, this term is assigned to the collection that owns it.
|
|
316
|
+
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
312
317
|
for collection_ids in conflicting_collection_ids_list:
|
|
313
318
|
for collection_index in range(0, len(collection_ids)):
|
|
314
|
-
diff: set[str] =
|
|
319
|
+
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
|
|
315
320
|
.difference(
|
|
316
|
-
*[
|
|
321
|
+
*[collection_terms_mapping[index]
|
|
317
322
|
for index in collection_ids[collection_index + 1 :] +\
|
|
318
323
|
collection_ids[:collection_index]
|
|
319
324
|
]
|
|
320
325
|
)
|
|
321
326
|
if len(diff) == 1:
|
|
322
|
-
|
|
327
|
+
wining_id_and_term_pairs.append((collection_ids[collection_index],
|
|
323
328
|
_get_first_item(diff)))
|
|
324
329
|
# 4.b Update conflicting collections.
|
|
325
|
-
if
|
|
330
|
+
if wining_id_and_term_pairs:
|
|
326
331
|
wining_collection_ids = list()
|
|
327
|
-
for collection_id,
|
|
332
|
+
for collection_id, term in wining_id_and_term_pairs:
|
|
328
333
|
wining_collection_ids.append(collection_id)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
issue =
|
|
334
|
+
collection_terms_mapping[collection_id].clear()
|
|
335
|
+
collection_terms_mapping[collection_id].add(term)
|
|
336
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
332
337
|
warnings.append(issue)
|
|
333
338
|
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
334
339
|
wining_collection_ids)
|
|
335
|
-
DrsGenerator.
|
|
340
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
336
341
|
wining_collection_ids)
|
|
337
342
|
continue
|
|
338
343
|
else:
|
|
339
344
|
break # Stop the loop when no progress is made.
|
|
340
|
-
return
|
|
345
|
+
return collection_terms_mapping, warnings
|
|
341
346
|
|
|
342
347
|
@staticmethod
|
|
343
|
-
def
|
|
344
|
-
-> tuple[dict[str, str], list[
|
|
345
|
-
errors: list[
|
|
346
|
-
# 1. Looking for collections that share strictly the same
|
|
347
|
-
collection_ids: list[str] = list(
|
|
348
|
+
def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
|
|
349
|
+
-> tuple[dict[str, str], list[GenerationIssue]]:
|
|
350
|
+
errors: list[GenerationIssue] = list()
|
|
351
|
+
# 1. Looking for collections that share strictly the same term(s).
|
|
352
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
348
353
|
len_collection_ids: int = len(collection_ids)
|
|
349
354
|
faulty_collections_list: list[set[str]] = list()
|
|
350
355
|
for l_collection_index in range(0, len_collection_ids - 1):
|
|
351
356
|
l_collection_id = collection_ids[l_collection_index]
|
|
352
|
-
|
|
357
|
+
l_term_set = collection_terms_mapping[l_collection_id]
|
|
353
358
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
354
359
|
r_collection_id = collection_ids[r_collection_index]
|
|
355
|
-
|
|
360
|
+
r_term_set = collection_terms_mapping[r_collection_id]
|
|
356
361
|
# check if the set is empty because the difference will always be an empty set!
|
|
357
|
-
if
|
|
362
|
+
if l_term_set and (not l_term_set.difference(r_term_set)):
|
|
358
363
|
not_registered = True
|
|
359
364
|
for faulty_collections in faulty_collections_list:
|
|
360
365
|
if l_collection_id in faulty_collections or \
|
|
@@ -366,35 +371,35 @@ class DrsGenerator(DrsApplication):
|
|
|
366
371
|
if not_registered:
|
|
367
372
|
faulty_collections_list.append({l_collection_id, r_collection_id})
|
|
368
373
|
for faulty_collections in faulty_collections_list:
|
|
369
|
-
|
|
374
|
+
terms = collection_terms_mapping[_get_first_item(faulty_collections)]
|
|
370
375
|
issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
|
|
371
|
-
|
|
376
|
+
terms=_transform_set_and_sort(terms))
|
|
372
377
|
errors.append(issue)
|
|
373
378
|
for collection_id in faulty_collections:
|
|
374
|
-
del
|
|
375
|
-
|
|
376
|
-
# 2. Looking for collections with more than one
|
|
379
|
+
del collection_terms_mapping[collection_id]
|
|
380
|
+
|
|
381
|
+
# 2. Looking for collections with more than one term.
|
|
377
382
|
result: dict[str, str] = dict()
|
|
378
|
-
for collection_id,
|
|
379
|
-
|
|
380
|
-
if
|
|
381
|
-
result[collection_id] = _get_first_item(
|
|
382
|
-
elif
|
|
383
|
-
other_issue =
|
|
384
|
-
|
|
383
|
+
for collection_id, term_set in collection_terms_mapping.items():
|
|
384
|
+
len_term_set = len(term_set)
|
|
385
|
+
if len_term_set == 1:
|
|
386
|
+
result[collection_id] = _get_first_item(term_set)
|
|
387
|
+
elif len_term_set > 1:
|
|
388
|
+
other_issue = TooManyTermCollection(collection_id=collection_id,
|
|
389
|
+
terms=_transform_set_and_sort(term_set))
|
|
385
390
|
errors.append(other_issue)
|
|
386
391
|
#else: Don't add emptied collection to the result.
|
|
387
392
|
return result, errors
|
|
388
393
|
|
|
389
394
|
@staticmethod
|
|
390
|
-
def
|
|
395
|
+
def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
|
|
391
396
|
collection_ids_to_be_removed: list[str]) -> None:
|
|
392
397
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
393
|
-
# Should only be one
|
|
394
|
-
|
|
395
|
-
for collection_id in
|
|
398
|
+
# Should only be one term.
|
|
399
|
+
term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
|
|
400
|
+
for collection_id in collection_terms_mapping.keys():
|
|
396
401
|
if (collection_id not in collection_ids_to_be_removed):
|
|
397
|
-
|
|
402
|
+
collection_terms_mapping[collection_id].discard(term_to_be_removed)
|
|
398
403
|
|
|
399
404
|
@staticmethod
|
|
400
405
|
def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
|
|
@@ -421,4 +426,4 @@ if __name__ == "__main__":
|
|
|
421
426
|
'institution_id': 'IPSL',
|
|
422
427
|
}
|
|
423
428
|
report = generator.generate_file_name_from_mapping(mapping)
|
|
424
|
-
print(report.warnings)
|
|
429
|
+
print(report.warnings)
|