esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
File without changes
@@ -0,0 +1,2 @@
1
+ FILE_NAME_EXTENSION_KEY = 'extension'
2
+ FILE_NAME_EXTENSION_SEPARATOR_KEY = 'extension_separator'
@@ -0,0 +1,429 @@
1
+ from typing import Any, Iterable, Mapping, cast
2
+
3
+ import esgvoc.api.projects as projects
4
+ from esgvoc.api.project_specs import DrsSpecification, DrsType
5
+ from esgvoc.api.search import MatchingTerm
6
+ from esgvoc.apps.drs.report import (
7
+ AssignedTerm,
8
+ ConflictingCollections,
9
+ DrsGenerationReport,
10
+ GenerationError,
11
+ GenerationIssue,
12
+ GenerationWarning,
13
+ InvalidTerm,
14
+ MissingTerm,
15
+ TooManyTermCollection,
16
+ )
17
+ from esgvoc.apps.drs.validator import DrsApplication
18
+ from esgvoc.core.exceptions import EsgvocDbError
19
+
20
+
21
+ def _get_first_item(items: set[Any]) -> Any:
22
+ result = None
23
+ for result in items: # noqa: B007
24
+ break
25
+ return result
26
+
27
+
28
+ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
29
+ result = list(_set)
30
+ result.sort()
31
+ return result
32
+
33
+
34
+ class DrsGenerator(DrsApplication):
35
+ """
36
+ Generate a directory, dataset id and file name expression specified by the given project from
37
+ a mapping of collection ids and terms or an unordered bag of terms.
38
+ """
39
+
40
+ def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
41
+ """
42
+ Generate a directory DRS expression from a mapping of collection ids and terms.
43
+
44
+ :param mapping: A mapping of collection ids (keys) and terms (values).
45
+ :type mapping: Mapping[str, str]
46
+ :returns: A generation report.
47
+ :rtype: DrsGeneratorReport
48
+ """
49
+ return self._generate_from_mapping(mapping, self.directory_specs)
50
+
51
+ def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
52
+ """
53
+ Generate a directory DRS expression from an unordered bag of terms.
54
+
55
+ :param terms: An unordered bag of terms.
56
+ :type terms: Iterable[str]
57
+ :returns: A generation report.
58
+ :rtype: DrsGeneratorReport
59
+ """
60
+ return self._generate_from_bag_of_terms(terms, self.directory_specs)
61
+
62
+ def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
63
+ """
64
+ Generate a dataset id DRS expression from a mapping of collection ids and terms.
65
+
66
+ :param mapping: A mapping of collection ids (keys) and terms (values).
67
+ :type mapping: Mapping[str, str]
68
+ :returns: A generation report.
69
+ :rtype: DrsGeneratorReport
70
+ """
71
+ return self._generate_from_mapping(mapping, self.dataset_id_specs)
72
+
73
+ def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
74
+ """
75
+ Generate a dataset id DRS expression from an unordered bag of terms.
76
+
77
+ :param terms: An unordered bag of terms.
78
+ :type terms: Iterable[str]
79
+ :returns: A generation report.
80
+ :rtype: DrsGeneratorReport
81
+ """
82
+ return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
83
+
84
+ def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
85
+ """
86
+ Generate a file name DRS expression from a mapping of collection ids and terms.
87
+ The file name extension is append automatically, according to the DRS specification,
88
+ so none of the terms given must include the extension.
89
+
90
+ :param mapping: A mapping of collection ids (keys) and terms (values).
91
+ :type mapping: Mapping[str, str]
92
+ :returns: A generation report.
93
+ :rtype: DrsGeneratorReport
94
+ """
95
+ report = self._generate_from_mapping(mapping, self.file_name_specs)
96
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
97
+ return report
98
+
99
+ def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
100
+ """
101
+ Generate a file name DRS expression from an unordered bag of terms.
102
+ The file name extension is append automatically, according to the DRS specification,
103
+ so none of the terms given must include the extension.
104
+
105
+ :param terms: An unordered bag of terms.
106
+ :type terms: Iterable[str]
107
+ :returns: A generation report.
108
+ :rtype: DrsGeneratorReport
109
+ """
110
+ report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
111
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
112
+ return report
113
+
114
+ def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
115
+ """
116
+ Generate a DRS expression from a mapping of collection ids and terms.
117
+
118
+ :param mapping: A mapping of collection ids (keys) and terms (values).
119
+ :type mapping: Mapping[str, str]
120
+ :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
121
+ :type drs_type: DrsType|str
122
+ :returns: A generation report.
123
+ :rtype: DrsGeneratorReport
124
+ """
125
+ match drs_type:
126
+ case DrsType.DIRECTORY:
127
+ return self.generate_directory_from_mapping(mapping=mapping)
128
+ case DrsType.FILE_NAME:
129
+ return self.generate_file_name_from_mapping(mapping=mapping)
130
+ case DrsType.DATASET_ID:
131
+ return self.generate_dataset_id_from_mapping(mapping=mapping)
132
+ case _:
133
+ raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
134
+
135
+ def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
136
+ """
137
+ Generate a DRS expression from an unordered bag of terms.
138
+
139
+ :param terms: An unordered bag of terms.
140
+ :type terms: Iterable[str]
141
+ :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
142
+ :type drs_type: DrsType|str
143
+ :returns: A generation report.
144
+ :rtype: DrsGeneratorReport
145
+ """
146
+ match drs_type:
147
+ case DrsType.DIRECTORY:
148
+ return self.generate_directory_from_bag_of_terms(terms=terms)
149
+ case DrsType.FILE_NAME:
150
+ return self.generate_file_name_from_bag_of_terms(terms=terms)
151
+ case DrsType.DATASET_ID:
152
+ return self.generate_dataset_id_from_bag_of_terms(terms=terms)
153
+ case _:
154
+ raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
155
+
156
+ def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
157
+ drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
158
+ if self.pedantic:
159
+ errors.extend(warnings)
160
+ warnings.clear()
161
+ return DrsGenerationReport(
162
+ project_id=self.project_id,
163
+ type=specs.type,
164
+ given_mapping_or_bag_of_terms=mapping,
165
+ mapping_used=mapping,
166
+ generated_drs_expression=drs_expression,
167
+ errors=cast(list[GenerationError], errors),
168
+ warnings=cast(list[GenerationWarning], warnings),
169
+ )
170
+
171
+ def __generate_from_mapping(
172
+ self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
173
+ ) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
174
+ errors: list[GenerationIssue] = list()
175
+ warnings: list[GenerationIssue] = list()
176
+ drs_expression = ""
177
+ part_position: int = 0
178
+ for part in specs.parts:
179
+ part_position += 1
180
+ collection_id = part.source_collection
181
+ if collection_id in mapping:
182
+ part_value = mapping[collection_id]
183
+ if has_to_valid_terms:
184
+ if part.source_collection_term is None:
185
+ matching_terms = projects.valid_term_in_collection(part_value,
186
+ self.project_id,
187
+ collection_id)
188
+ else:
189
+ matching_terms = projects.valid_term(
190
+ part_value,
191
+ self.project_id,
192
+ collection_id,
193
+ part.source_collection_term).validated
194
+ if not matching_terms:
195
+ issue = InvalidTerm(term=part_value,
196
+ term_position=part_position,
197
+ collection_id_or_constant_value=collection_id)
198
+ errors.append(issue)
199
+ part_value = DrsGenerationReport.INVALID_TAG
200
+ else:
201
+ other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
202
+ if part.is_required:
203
+ errors.append(other_issue)
204
+ part_value = DrsGenerationReport.MISSING_TAG
205
+ else:
206
+ warnings.append(other_issue)
207
+ continue # The for loop.
208
+
209
+ drs_expression += part_value + specs.separator
210
+
211
+ drs_expression = drs_expression[0: len(drs_expression) - len(specs.separator)]
212
+ return drs_expression, errors, warnings
213
+
214
+ def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
215
+ collection_terms_mapping: dict[str, set[str]] = dict()
216
+ for term in terms:
217
+ matching_terms: list[MatchingTerm] = list()
218
+ for part in specs.parts:
219
+ if part.source_collection_term is None:
220
+ matching_terms.extend(projects.valid_term_in_collection(term, self.project_id,
221
+ part.source_collection))
222
+ else:
223
+ if projects.valid_term(term, self.project_id, part.source_collection,
224
+ part.source_collection_term).validated:
225
+ matching_terms.append(MatchingTerm(project_id=self.project_id,
226
+ collection_id=part.source_collection,
227
+ term_id=part.source_collection_term))
228
+ for matching_term in matching_terms:
229
+ if matching_term.collection_id not in collection_terms_mapping:
230
+ collection_terms_mapping[matching_term.collection_id] = set()
231
+ collection_terms_mapping[matching_term.collection_id].add(term)
232
+ collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
233
+ mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
234
+ drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
235
+ errors.extend(errs)
236
+ warnings.extend(warns)
237
+ if self.pedantic:
238
+ errors.extend(warnings)
239
+ warnings.clear()
240
+ return DrsGenerationReport(project_id=self.project_id,
241
+ type=specs.type,
242
+ given_mapping_or_bag_of_terms=terms,
243
+ mapping_used=mapping,
244
+ generated_drs_expression=drs_expression,
245
+ errors=cast(list[GenerationError], errors),
246
+ warnings=cast(list[GenerationWarning], warnings))
247
+
248
+ @staticmethod
249
+ def _resolve_conflicts(
250
+ collection_terms_mapping: dict[str, set[str]],
251
+ ) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
252
+ warnings: list[GenerationIssue] = list()
253
+ conflicting_collection_ids_list: list[list[str]] = list()
254
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
255
+ len_collection_ids: int = len(collection_ids)
256
+
257
+ for l_collection_index in range(0, len_collection_ids - 1):
258
+ conflicting_collection_ids: list[str] = list()
259
+ for r_collection_index in range(l_collection_index + 1, len_collection_ids):
260
+ if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
261
+ collection_terms_mapping[collection_ids[r_collection_index]]
262
+ ):
263
+ continue
264
+ else:
265
+ not_registered = True
266
+ for cc_ids in conflicting_collection_ids_list:
267
+ if (
268
+ collection_ids[l_collection_index] in cc_ids
269
+ and collection_ids[r_collection_index] in cc_ids
270
+ ):
271
+ not_registered = False
272
+ break
273
+ if not_registered:
274
+ conflicting_collection_ids.append(collection_ids[r_collection_index])
275
+ if conflicting_collection_ids:
276
+ conflicting_collection_ids.append(collection_ids[l_collection_index])
277
+ conflicting_collection_ids_list.append(conflicting_collection_ids)
278
+
279
+ # Each time a collection is resolved, we must restart the loop so as to check if others can be,
280
+ # until no progress is made.
281
+ while True:
282
+ # 1. Non-conflicting collections with only one term are assigned.
283
+ # Non-conflicting collections with more than one term will be raise an error
284
+ # in the _check method.
285
+
286
+ # Nothing to do.
287
+
288
+ # 2a. Collections with one term that are conflicting to each other will raise an error.
289
+ # We don't search for collection with more than one term which term sets are exactly
290
+ # the same, because we cannot choose which term will be removed in 2b.
291
+ # So stick with one term collections: those collection will be detected in method _check.
292
+ collection_ids_with_len_eq_1_list: list[list[str]] = list()
293
+ for collection_ids in conflicting_collection_ids_list:
294
+ tmp_conflicting_collection_ids: list[str] = list()
295
+ for collection_id in collection_ids:
296
+ if len(collection_terms_mapping[collection_id]) == 1:
297
+ tmp_conflicting_collection_ids.append(collection_id)
298
+ if len(tmp_conflicting_collection_ids) > 1:
299
+ collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
300
+ # 2b. As it is not possible to resolve collections sharing the same unique term:
301
+ # raise errors, remove the faulty collections and their term.
302
+ if collection_ids_with_len_eq_1_list:
303
+ for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
304
+ DrsGenerator._remove_ids_from_conflicts(
305
+ conflicting_collection_ids_list, collection_ids_to_be_removed
306
+ )
307
+ DrsGenerator._remove_term_from_other_term_sets(
308
+ collection_terms_mapping, collection_ids_to_be_removed
309
+ )
310
+ # Every time conflicting_collection_ids_list is modified, we must restart the loop,
311
+ # as conflicting collections may be resolved.
312
+ continue
313
+
314
+ # 3.a For each collections with only one term, assign their term to the detriment of
315
+ # collections with more than one term.
316
+ wining_collection_ids: list[str] = list()
317
+ for collection_ids in conflicting_collection_ids_list:
318
+ for collection_id in collection_ids:
319
+ if len(collection_terms_mapping[collection_id]) == 1:
320
+ wining_collection_ids.append(collection_id)
321
+ term = _get_first_item(collection_terms_mapping[collection_id])
322
+ issue = AssignedTerm(collection_id=collection_id, term=term)
323
+ warnings.append(issue)
324
+ # 3.b Update conflicting collections.
325
+ if wining_collection_ids:
326
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
327
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
328
+ # Every time conflicting_collection_ids_list is modified, we must restart the loop,
329
+ # as conflicting collections may be resolved.
330
+ continue
331
+
332
+ # 4.a For each term set of the remaining conflicting collections, compute their difference.
333
+ # If the difference is one term, this term is assigned to the collection that owns it.
334
+ wining_id_and_term_pairs: list[tuple[str, str]] = list()
335
+ for collection_ids in conflicting_collection_ids_list:
336
+ for collection_index in range(0, len(collection_ids)):
337
+ collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
338
+ diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
339
+ *[
340
+ collection_terms_mapping[index] # noqa E127
341
+ for index in collection_set
342
+ ]
343
+ )
344
+ if len(diff) == 1:
345
+ wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
346
+ # 4.b Update conflicting collections.
347
+ if wining_id_and_term_pairs:
348
+ wining_collection_ids = list()
349
+ for collection_id, term in wining_id_and_term_pairs:
350
+ wining_collection_ids.append(collection_id)
351
+ collection_terms_mapping[collection_id].clear()
352
+ collection_terms_mapping[collection_id].add(term)
353
+ issue = AssignedTerm(collection_id=collection_id, term=term)
354
+ warnings.append(issue)
355
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
356
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
357
+ continue
358
+ else:
359
+ break # Stop the loop when no progress is made.
360
+ return collection_terms_mapping, warnings
361
+
362
+ @staticmethod
363
+ def _check_collection_terms_mapping(
364
+ collection_terms_mapping: dict[str, set[str]],
365
+ ) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
366
+ errors: list[GenerationIssue] = list()
367
+ # 1. Looking for collections that share strictly the same term(s).
368
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
369
+ len_collection_ids: int = len(collection_ids)
370
+ faulty_collections_list: list[set[str]] = list()
371
+ for l_collection_index in range(0, len_collection_ids - 1):
372
+ l_collection_id = collection_ids[l_collection_index]
373
+ l_term_set = collection_terms_mapping[l_collection_id]
374
+ for r_collection_index in range(l_collection_index + 1, len_collection_ids):
375
+ r_collection_id = collection_ids[r_collection_index]
376
+ r_term_set = collection_terms_mapping[r_collection_id]
377
+ # Check if the set is empty because the difference will always be an empty set!
378
+ if l_term_set and (not l_term_set.difference(r_term_set)):
379
+ not_registered = True
380
+ for faulty_collections in faulty_collections_list:
381
+ if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
382
+ faulty_collections.add(l_collection_id)
383
+ faulty_collections.add(r_collection_id)
384
+ not_registered = False
385
+ break
386
+ if not_registered:
387
+ faulty_collections_list.append({l_collection_id, r_collection_id})
388
+ for faulty_collections in faulty_collections_list:
389
+ terms = collection_terms_mapping[_get_first_item(faulty_collections)]
390
+ issue = ConflictingCollections(
391
+ collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
392
+ )
393
+ errors.append(issue)
394
+ for collection_id in faulty_collections:
395
+ del collection_terms_mapping[collection_id]
396
+
397
+ # 2. Looking for collections with more than one term.
398
+ result: dict[str, str] = dict()
399
+ for collection_id, term_set in collection_terms_mapping.items():
400
+ len_term_set = len(term_set)
401
+ if len_term_set == 1:
402
+ result[collection_id] = _get_first_item(term_set)
403
+ elif len_term_set > 1:
404
+ other_issue = TooManyTermCollection(
405
+ collection_id=collection_id, terms=_transform_set_and_sort(term_set)
406
+ )
407
+ errors.append(other_issue)
408
+ # else: Don't add emptied collection to the result.
409
+ return result, errors
410
+
411
+ @staticmethod
412
+ def _remove_term_from_other_term_sets(
413
+ collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
414
+ ) -> None:
415
+ for collection_id_to_be_removed in collection_ids_to_be_removed:
416
+ # Should only be one term.
417
+ term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
418
+ for collection_id in collection_terms_mapping.keys():
419
+ if collection_id not in collection_ids_to_be_removed:
420
+ collection_terms_mapping[collection_id].discard(term_to_be_removed)
421
+
422
+ @staticmethod
423
+ def _remove_ids_from_conflicts(
424
+ conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
425
+ ) -> None:
426
+ for collection_id_to_be_removed in collection_ids_to_be_removed:
427
+ for conflicting_collection_ids in conflicting_collection_ids_list:
428
+ if collection_id_to_be_removed in conflicting_collection_ids:
429
+ conflicting_collection_ids.remove(collection_id_to_be_removed)