esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
File without changes
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
from typing import Any, Iterable, Mapping, cast
|
|
2
|
+
|
|
3
|
+
import esgvoc.api.projects as projects
|
|
4
|
+
from esgvoc.api.project_specs import DrsSpecification, DrsType
|
|
5
|
+
from esgvoc.api.search import MatchingTerm
|
|
6
|
+
from esgvoc.apps.drs.report import (
|
|
7
|
+
AssignedTerm,
|
|
8
|
+
ConflictingCollections,
|
|
9
|
+
DrsGenerationReport,
|
|
10
|
+
GenerationError,
|
|
11
|
+
GenerationIssue,
|
|
12
|
+
GenerationWarning,
|
|
13
|
+
InvalidTerm,
|
|
14
|
+
MissingTerm,
|
|
15
|
+
TooManyTermCollection,
|
|
16
|
+
)
|
|
17
|
+
from esgvoc.apps.drs.validator import DrsApplication
|
|
18
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_first_item(items: set[Any]) -> Any:
|
|
22
|
+
result = None
|
|
23
|
+
for result in items: # noqa: B007
|
|
24
|
+
break
|
|
25
|
+
return result
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
|
|
29
|
+
result = list(_set)
|
|
30
|
+
result.sort()
|
|
31
|
+
return result
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DrsGenerator(DrsApplication):
|
|
35
|
+
"""
|
|
36
|
+
Generate a directory, dataset id and file name expression specified by the given project from
|
|
37
|
+
a mapping of collection ids and terms or an unordered bag of terms.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
41
|
+
"""
|
|
42
|
+
Generate a directory DRS expression from a mapping of collection ids and terms.
|
|
43
|
+
|
|
44
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
45
|
+
:type mapping: Mapping[str, str]
|
|
46
|
+
:returns: A generation report.
|
|
47
|
+
:rtype: DrsGeneratorReport
|
|
48
|
+
"""
|
|
49
|
+
return self._generate_from_mapping(mapping, self.directory_specs)
|
|
50
|
+
|
|
51
|
+
def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
52
|
+
"""
|
|
53
|
+
Generate a directory DRS expression from an unordered bag of terms.
|
|
54
|
+
|
|
55
|
+
:param terms: An unordered bag of terms.
|
|
56
|
+
:type terms: Iterable[str]
|
|
57
|
+
:returns: A generation report.
|
|
58
|
+
:rtype: DrsGeneratorReport
|
|
59
|
+
"""
|
|
60
|
+
return self._generate_from_bag_of_terms(terms, self.directory_specs)
|
|
61
|
+
|
|
62
|
+
def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
63
|
+
"""
|
|
64
|
+
Generate a dataset id DRS expression from a mapping of collection ids and terms.
|
|
65
|
+
|
|
66
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
67
|
+
:type mapping: Mapping[str, str]
|
|
68
|
+
:returns: A generation report.
|
|
69
|
+
:rtype: DrsGeneratorReport
|
|
70
|
+
"""
|
|
71
|
+
return self._generate_from_mapping(mapping, self.dataset_id_specs)
|
|
72
|
+
|
|
73
|
+
def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
74
|
+
"""
|
|
75
|
+
Generate a dataset id DRS expression from an unordered bag of terms.
|
|
76
|
+
|
|
77
|
+
:param terms: An unordered bag of terms.
|
|
78
|
+
:type terms: Iterable[str]
|
|
79
|
+
:returns: A generation report.
|
|
80
|
+
:rtype: DrsGeneratorReport
|
|
81
|
+
"""
|
|
82
|
+
return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
|
|
83
|
+
|
|
84
|
+
def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
|
|
85
|
+
"""
|
|
86
|
+
Generate a file name DRS expression from a mapping of collection ids and terms.
|
|
87
|
+
The file name extension is append automatically, according to the DRS specification,
|
|
88
|
+
so none of the terms given must include the extension.
|
|
89
|
+
|
|
90
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
91
|
+
:type mapping: Mapping[str, str]
|
|
92
|
+
:returns: A generation report.
|
|
93
|
+
:rtype: DrsGeneratorReport
|
|
94
|
+
"""
|
|
95
|
+
report = self._generate_from_mapping(mapping, self.file_name_specs)
|
|
96
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
97
|
+
return report
|
|
98
|
+
|
|
99
|
+
def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
100
|
+
"""
|
|
101
|
+
Generate a file name DRS expression from an unordered bag of terms.
|
|
102
|
+
The file name extension is append automatically, according to the DRS specification,
|
|
103
|
+
so none of the terms given must include the extension.
|
|
104
|
+
|
|
105
|
+
:param terms: An unordered bag of terms.
|
|
106
|
+
:type terms: Iterable[str]
|
|
107
|
+
:returns: A generation report.
|
|
108
|
+
:rtype: DrsGeneratorReport
|
|
109
|
+
"""
|
|
110
|
+
report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
|
|
111
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
112
|
+
return report
|
|
113
|
+
|
|
114
|
+
def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
|
|
115
|
+
"""
|
|
116
|
+
Generate a DRS expression from a mapping of collection ids and terms.
|
|
117
|
+
|
|
118
|
+
:param mapping: A mapping of collection ids (keys) and terms (values).
|
|
119
|
+
:type mapping: Mapping[str, str]
|
|
120
|
+
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
121
|
+
:type drs_type: DrsType|str
|
|
122
|
+
:returns: A generation report.
|
|
123
|
+
:rtype: DrsGeneratorReport
|
|
124
|
+
"""
|
|
125
|
+
match drs_type:
|
|
126
|
+
case DrsType.DIRECTORY:
|
|
127
|
+
return self.generate_directory_from_mapping(mapping=mapping)
|
|
128
|
+
case DrsType.FILE_NAME:
|
|
129
|
+
return self.generate_file_name_from_mapping(mapping=mapping)
|
|
130
|
+
case DrsType.DATASET_ID:
|
|
131
|
+
return self.generate_dataset_id_from_mapping(mapping=mapping)
|
|
132
|
+
case _:
|
|
133
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
134
|
+
|
|
135
|
+
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
|
|
136
|
+
"""
|
|
137
|
+
Generate a DRS expression from an unordered bag of terms.
|
|
138
|
+
|
|
139
|
+
:param terms: An unordered bag of terms.
|
|
140
|
+
:type terms: Iterable[str]
|
|
141
|
+
:param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
|
|
142
|
+
:type drs_type: DrsType|str
|
|
143
|
+
:returns: A generation report.
|
|
144
|
+
:rtype: DrsGeneratorReport
|
|
145
|
+
"""
|
|
146
|
+
match drs_type:
|
|
147
|
+
case DrsType.DIRECTORY:
|
|
148
|
+
return self.generate_directory_from_bag_of_terms(terms=terms)
|
|
149
|
+
case DrsType.FILE_NAME:
|
|
150
|
+
return self.generate_file_name_from_bag_of_terms(terms=terms)
|
|
151
|
+
case DrsType.DATASET_ID:
|
|
152
|
+
return self.generate_dataset_id_from_bag_of_terms(terms=terms)
|
|
153
|
+
case _:
|
|
154
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
155
|
+
|
|
156
|
+
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
157
|
+
drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
|
|
158
|
+
if self.pedantic:
|
|
159
|
+
errors.extend(warnings)
|
|
160
|
+
warnings.clear()
|
|
161
|
+
return DrsGenerationReport(
|
|
162
|
+
project_id=self.project_id,
|
|
163
|
+
type=specs.type,
|
|
164
|
+
given_mapping_or_bag_of_terms=mapping,
|
|
165
|
+
mapping_used=mapping,
|
|
166
|
+
generated_drs_expression=drs_expression,
|
|
167
|
+
errors=cast(list[GenerationError], errors),
|
|
168
|
+
warnings=cast(list[GenerationWarning], warnings),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def __generate_from_mapping(
|
|
172
|
+
self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
|
|
173
|
+
) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
|
|
174
|
+
errors: list[GenerationIssue] = list()
|
|
175
|
+
warnings: list[GenerationIssue] = list()
|
|
176
|
+
drs_expression = ""
|
|
177
|
+
part_position: int = 0
|
|
178
|
+
for part in specs.parts:
|
|
179
|
+
part_position += 1
|
|
180
|
+
collection_id = part.source_collection
|
|
181
|
+
if collection_id in mapping:
|
|
182
|
+
part_value = mapping[collection_id]
|
|
183
|
+
if has_to_valid_terms:
|
|
184
|
+
if part.source_collection_term is None:
|
|
185
|
+
matching_terms = projects.valid_term_in_collection(part_value,
|
|
186
|
+
self.project_id,
|
|
187
|
+
collection_id)
|
|
188
|
+
else:
|
|
189
|
+
matching_terms = projects.valid_term(
|
|
190
|
+
part_value,
|
|
191
|
+
self.project_id,
|
|
192
|
+
collection_id,
|
|
193
|
+
part.source_collection_term).validated
|
|
194
|
+
if not matching_terms:
|
|
195
|
+
issue = InvalidTerm(term=part_value,
|
|
196
|
+
term_position=part_position,
|
|
197
|
+
collection_id_or_constant_value=collection_id)
|
|
198
|
+
errors.append(issue)
|
|
199
|
+
part_value = DrsGenerationReport.INVALID_TAG
|
|
200
|
+
else:
|
|
201
|
+
other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
|
|
202
|
+
if part.is_required:
|
|
203
|
+
errors.append(other_issue)
|
|
204
|
+
part_value = DrsGenerationReport.MISSING_TAG
|
|
205
|
+
else:
|
|
206
|
+
warnings.append(other_issue)
|
|
207
|
+
continue # The for loop.
|
|
208
|
+
|
|
209
|
+
drs_expression += part_value + specs.separator
|
|
210
|
+
|
|
211
|
+
drs_expression = drs_expression[0: len(drs_expression) - len(specs.separator)]
|
|
212
|
+
return drs_expression, errors, warnings
|
|
213
|
+
|
|
214
|
+
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
215
|
+
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
216
|
+
for term in terms:
|
|
217
|
+
matching_terms: list[MatchingTerm] = list()
|
|
218
|
+
for part in specs.parts:
|
|
219
|
+
if part.source_collection_term is None:
|
|
220
|
+
matching_terms.extend(projects.valid_term_in_collection(term, self.project_id,
|
|
221
|
+
part.source_collection))
|
|
222
|
+
else:
|
|
223
|
+
if projects.valid_term(term, self.project_id, part.source_collection,
|
|
224
|
+
part.source_collection_term).validated:
|
|
225
|
+
matching_terms.append(MatchingTerm(project_id=self.project_id,
|
|
226
|
+
collection_id=part.source_collection,
|
|
227
|
+
term_id=part.source_collection_term))
|
|
228
|
+
for matching_term in matching_terms:
|
|
229
|
+
if matching_term.collection_id not in collection_terms_mapping:
|
|
230
|
+
collection_terms_mapping[matching_term.collection_id] = set()
|
|
231
|
+
collection_terms_mapping[matching_term.collection_id].add(term)
|
|
232
|
+
collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
|
|
233
|
+
mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
|
|
234
|
+
drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
|
|
235
|
+
errors.extend(errs)
|
|
236
|
+
warnings.extend(warns)
|
|
237
|
+
if self.pedantic:
|
|
238
|
+
errors.extend(warnings)
|
|
239
|
+
warnings.clear()
|
|
240
|
+
return DrsGenerationReport(project_id=self.project_id,
|
|
241
|
+
type=specs.type,
|
|
242
|
+
given_mapping_or_bag_of_terms=terms,
|
|
243
|
+
mapping_used=mapping,
|
|
244
|
+
generated_drs_expression=drs_expression,
|
|
245
|
+
errors=cast(list[GenerationError], errors),
|
|
246
|
+
warnings=cast(list[GenerationWarning], warnings))
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def _resolve_conflicts(
|
|
250
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
251
|
+
) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
|
|
252
|
+
warnings: list[GenerationIssue] = list()
|
|
253
|
+
conflicting_collection_ids_list: list[list[str]] = list()
|
|
254
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
255
|
+
len_collection_ids: int = len(collection_ids)
|
|
256
|
+
|
|
257
|
+
for l_collection_index in range(0, len_collection_ids - 1):
|
|
258
|
+
conflicting_collection_ids: list[str] = list()
|
|
259
|
+
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
260
|
+
if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
|
|
261
|
+
collection_terms_mapping[collection_ids[r_collection_index]]
|
|
262
|
+
):
|
|
263
|
+
continue
|
|
264
|
+
else:
|
|
265
|
+
not_registered = True
|
|
266
|
+
for cc_ids in conflicting_collection_ids_list:
|
|
267
|
+
if (
|
|
268
|
+
collection_ids[l_collection_index] in cc_ids
|
|
269
|
+
and collection_ids[r_collection_index] in cc_ids
|
|
270
|
+
):
|
|
271
|
+
not_registered = False
|
|
272
|
+
break
|
|
273
|
+
if not_registered:
|
|
274
|
+
conflicting_collection_ids.append(collection_ids[r_collection_index])
|
|
275
|
+
if conflicting_collection_ids:
|
|
276
|
+
conflicting_collection_ids.append(collection_ids[l_collection_index])
|
|
277
|
+
conflicting_collection_ids_list.append(conflicting_collection_ids)
|
|
278
|
+
|
|
279
|
+
# Each time a collection is resolved, we must restart the loop so as to check if others can be,
|
|
280
|
+
# until no progress is made.
|
|
281
|
+
while True:
|
|
282
|
+
# 1. Non-conflicting collections with only one term are assigned.
|
|
283
|
+
# Non-conflicting collections with more than one term will be raise an error
|
|
284
|
+
# in the _check method.
|
|
285
|
+
|
|
286
|
+
# Nothing to do.
|
|
287
|
+
|
|
288
|
+
# 2a. Collections with one term that are conflicting to each other will raise an error.
|
|
289
|
+
# We don't search for collection with more than one term which term sets are exactly
|
|
290
|
+
# the same, because we cannot choose which term will be removed in 2b.
|
|
291
|
+
# So stick with one term collections: those collection will be detected in method _check.
|
|
292
|
+
collection_ids_with_len_eq_1_list: list[list[str]] = list()
|
|
293
|
+
for collection_ids in conflicting_collection_ids_list:
|
|
294
|
+
tmp_conflicting_collection_ids: list[str] = list()
|
|
295
|
+
for collection_id in collection_ids:
|
|
296
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
297
|
+
tmp_conflicting_collection_ids.append(collection_id)
|
|
298
|
+
if len(tmp_conflicting_collection_ids) > 1:
|
|
299
|
+
collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
|
|
300
|
+
# 2b. As it is not possible to resolve collections sharing the same unique term:
|
|
301
|
+
# raise errors, remove the faulty collections and their term.
|
|
302
|
+
if collection_ids_with_len_eq_1_list:
|
|
303
|
+
for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
|
|
304
|
+
DrsGenerator._remove_ids_from_conflicts(
|
|
305
|
+
conflicting_collection_ids_list, collection_ids_to_be_removed
|
|
306
|
+
)
|
|
307
|
+
DrsGenerator._remove_term_from_other_term_sets(
|
|
308
|
+
collection_terms_mapping, collection_ids_to_be_removed
|
|
309
|
+
)
|
|
310
|
+
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
311
|
+
# as conflicting collections may be resolved.
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
# 3.a For each collections with only one term, assign their term to the detriment of
|
|
315
|
+
# collections with more than one term.
|
|
316
|
+
wining_collection_ids: list[str] = list()
|
|
317
|
+
for collection_ids in conflicting_collection_ids_list:
|
|
318
|
+
for collection_id in collection_ids:
|
|
319
|
+
if len(collection_terms_mapping[collection_id]) == 1:
|
|
320
|
+
wining_collection_ids.append(collection_id)
|
|
321
|
+
term = _get_first_item(collection_terms_mapping[collection_id])
|
|
322
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
323
|
+
warnings.append(issue)
|
|
324
|
+
# 3.b Update conflicting collections.
|
|
325
|
+
if wining_collection_ids:
|
|
326
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
327
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
328
|
+
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
329
|
+
# as conflicting collections may be resolved.
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
# 4.a For each term set of the remaining conflicting collections, compute their difference.
|
|
333
|
+
# If the difference is one term, this term is assigned to the collection that owns it.
|
|
334
|
+
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
335
|
+
for collection_ids in conflicting_collection_ids_list:
|
|
336
|
+
for collection_index in range(0, len(collection_ids)):
|
|
337
|
+
collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
|
|
338
|
+
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
|
|
339
|
+
*[
|
|
340
|
+
collection_terms_mapping[index] # noqa E127
|
|
341
|
+
for index in collection_set
|
|
342
|
+
]
|
|
343
|
+
)
|
|
344
|
+
if len(diff) == 1:
|
|
345
|
+
wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
|
|
346
|
+
# 4.b Update conflicting collections.
|
|
347
|
+
if wining_id_and_term_pairs:
|
|
348
|
+
wining_collection_ids = list()
|
|
349
|
+
for collection_id, term in wining_id_and_term_pairs:
|
|
350
|
+
wining_collection_ids.append(collection_id)
|
|
351
|
+
collection_terms_mapping[collection_id].clear()
|
|
352
|
+
collection_terms_mapping[collection_id].add(term)
|
|
353
|
+
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
354
|
+
warnings.append(issue)
|
|
355
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
356
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
357
|
+
continue
|
|
358
|
+
else:
|
|
359
|
+
break # Stop the loop when no progress is made.
|
|
360
|
+
return collection_terms_mapping, warnings
|
|
361
|
+
|
|
362
|
+
@staticmethod
|
|
363
|
+
def _check_collection_terms_mapping(
|
|
364
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
365
|
+
) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
|
|
366
|
+
errors: list[GenerationIssue] = list()
|
|
367
|
+
# 1. Looking for collections that share strictly the same term(s).
|
|
368
|
+
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
369
|
+
len_collection_ids: int = len(collection_ids)
|
|
370
|
+
faulty_collections_list: list[set[str]] = list()
|
|
371
|
+
for l_collection_index in range(0, len_collection_ids - 1):
|
|
372
|
+
l_collection_id = collection_ids[l_collection_index]
|
|
373
|
+
l_term_set = collection_terms_mapping[l_collection_id]
|
|
374
|
+
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
375
|
+
r_collection_id = collection_ids[r_collection_index]
|
|
376
|
+
r_term_set = collection_terms_mapping[r_collection_id]
|
|
377
|
+
# Check if the set is empty because the difference will always be an empty set!
|
|
378
|
+
if l_term_set and (not l_term_set.difference(r_term_set)):
|
|
379
|
+
not_registered = True
|
|
380
|
+
for faulty_collections in faulty_collections_list:
|
|
381
|
+
if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
|
|
382
|
+
faulty_collections.add(l_collection_id)
|
|
383
|
+
faulty_collections.add(r_collection_id)
|
|
384
|
+
not_registered = False
|
|
385
|
+
break
|
|
386
|
+
if not_registered:
|
|
387
|
+
faulty_collections_list.append({l_collection_id, r_collection_id})
|
|
388
|
+
for faulty_collections in faulty_collections_list:
|
|
389
|
+
terms = collection_terms_mapping[_get_first_item(faulty_collections)]
|
|
390
|
+
issue = ConflictingCollections(
|
|
391
|
+
collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
|
|
392
|
+
)
|
|
393
|
+
errors.append(issue)
|
|
394
|
+
for collection_id in faulty_collections:
|
|
395
|
+
del collection_terms_mapping[collection_id]
|
|
396
|
+
|
|
397
|
+
# 2. Looking for collections with more than one term.
|
|
398
|
+
result: dict[str, str] = dict()
|
|
399
|
+
for collection_id, term_set in collection_terms_mapping.items():
|
|
400
|
+
len_term_set = len(term_set)
|
|
401
|
+
if len_term_set == 1:
|
|
402
|
+
result[collection_id] = _get_first_item(term_set)
|
|
403
|
+
elif len_term_set > 1:
|
|
404
|
+
other_issue = TooManyTermCollection(
|
|
405
|
+
collection_id=collection_id, terms=_transform_set_and_sort(term_set)
|
|
406
|
+
)
|
|
407
|
+
errors.append(other_issue)
|
|
408
|
+
# else: Don't add emptied collection to the result.
|
|
409
|
+
return result, errors
|
|
410
|
+
|
|
411
|
+
@staticmethod
|
|
412
|
+
def _remove_term_from_other_term_sets(
|
|
413
|
+
collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
|
|
414
|
+
) -> None:
|
|
415
|
+
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
416
|
+
# Should only be one term.
|
|
417
|
+
term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
|
|
418
|
+
for collection_id in collection_terms_mapping.keys():
|
|
419
|
+
if collection_id not in collection_ids_to_be_removed:
|
|
420
|
+
collection_terms_mapping[collection_id].discard(term_to_be_removed)
|
|
421
|
+
|
|
422
|
+
@staticmethod
|
|
423
|
+
def _remove_ids_from_conflicts(
|
|
424
|
+
conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
|
|
425
|
+
) -> None:
|
|
426
|
+
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
427
|
+
for conflicting_collection_ids in conflicting_collection_ids_list:
|
|
428
|
+
if collection_id_to_be_removed in conflicting_collection_ids:
|
|
429
|
+
conflicting_collection_ids.remove(collection_id_to_be_removed)
|