esgvoc 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (70) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +23 -34
  3. esgvoc/api/_utils.py +28 -14
  4. esgvoc/api/data_descriptors/__init__.py +18 -12
  5. esgvoc/api/data_descriptors/activity.py +8 -45
  6. esgvoc/api/data_descriptors/area_label.py +6 -0
  7. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  8. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  9. esgvoc/api/data_descriptors/consortium.py +16 -56
  10. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  11. esgvoc/api/data_descriptors/date.py +3 -46
  12. esgvoc/api/data_descriptors/directory_date.py +3 -46
  13. esgvoc/api/data_descriptors/experiment.py +19 -54
  14. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  15. esgvoc/api/data_descriptors/frequency.py +6 -43
  16. esgvoc/api/data_descriptors/grid_label.py +6 -44
  17. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  18. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  19. esgvoc/api/data_descriptors/institution.py +11 -54
  20. esgvoc/api/data_descriptors/license.py +4 -44
  21. esgvoc/api/data_descriptors/mip_era.py +6 -44
  22. esgvoc/api/data_descriptors/model_component.py +7 -45
  23. esgvoc/api/data_descriptors/organisation.py +3 -40
  24. esgvoc/api/data_descriptors/physic_index.py +3 -45
  25. esgvoc/api/data_descriptors/product.py +4 -43
  26. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  27. esgvoc/api/data_descriptors/realm.py +4 -42
  28. esgvoc/api/data_descriptors/resolution.py +6 -44
  29. esgvoc/api/data_descriptors/source.py +18 -53
  30. esgvoc/api/data_descriptors/source_type.py +3 -41
  31. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  32. esgvoc/api/data_descriptors/table.py +6 -48
  33. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  34. esgvoc/api/data_descriptors/time_range.py +3 -27
  35. esgvoc/api/data_descriptors/variable.py +13 -71
  36. esgvoc/api/data_descriptors/variant_label.py +3 -47
  37. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  38. esgvoc/api/projects.py +187 -171
  39. esgvoc/api/report.py +21 -12
  40. esgvoc/api/search.py +3 -1
  41. esgvoc/api/universe.py +44 -34
  42. esgvoc/apps/__init__.py +3 -4
  43. esgvoc/apps/drs/generator.py +166 -161
  44. esgvoc/apps/drs/report.py +222 -131
  45. esgvoc/apps/drs/validator.py +103 -105
  46. esgvoc/cli/drs.py +29 -19
  47. esgvoc/cli/get.py +26 -25
  48. esgvoc/cli/install.py +11 -8
  49. esgvoc/cli/main.py +0 -2
  50. esgvoc/cli/status.py +5 -5
  51. esgvoc/cli/valid.py +40 -40
  52. esgvoc/core/db/models/universe.py +3 -3
  53. esgvoc/core/db/project_ingestion.py +1 -1
  54. esgvoc/core/db/universe_ingestion.py +6 -5
  55. esgvoc/core/logging_handler.py +1 -1
  56. esgvoc/core/repo_fetcher.py +4 -3
  57. esgvoc/core/service/__init__.py +37 -5
  58. esgvoc/core/service/configuration/config_manager.py +188 -0
  59. esgvoc/core/service/configuration/setting.py +88 -0
  60. esgvoc/core/service/state.py +49 -32
  61. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/METADATA +34 -3
  62. esgvoc-0.3.0.dist-info/RECORD +78 -0
  63. esgvoc/cli/config.py +0 -82
  64. esgvoc/core/service/settings.py +0 -73
  65. esgvoc/core/service/settings.toml +0 -17
  66. esgvoc/core/service/settings_default.toml +0 -17
  67. esgvoc-0.2.1.dist-info/RECORD +0 -73
  68. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
  70. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,22 +1,14 @@
1
- from typing import cast, Iterable, Mapping, Any
1
+ from typing import Any, Iterable, Mapping, cast
2
2
 
3
3
  import esgvoc.api.projects as projects
4
-
5
- from esgvoc.api.project_specs import (DrsSpecification,
6
- DrsPartKind,
7
- DrsCollection,
8
- DrsConstant,
9
- DrsType)
10
-
4
+ from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPartKind,
5
+ DrsSpecification, DrsType)
6
+ from esgvoc.apps.drs.report import (AssignedTerm, ConflictingCollections,
7
+ DrsGenerationReport, GenerationError,
8
+ GenerationIssue, GenerationWarning,
9
+ InvalidTerm, MissingTerm,
10
+ TooManyTermCollection)
11
11
  from esgvoc.apps.drs.validator import DrsApplication
12
- from esgvoc.apps.drs.report import (DrsGeneratorReport,
13
- DrsIssue,
14
- GeneratorIssue,
15
- TooManyTokensCollection,
16
- InvalidToken,
17
- MissingToken,
18
- ConflictingCollections,
19
- AssignedToken)
20
12
 
21
13
 
22
14
  def _get_first_item(items: set[Any]) -> Any:
@@ -35,137 +27,150 @@ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
35
27
  class DrsGenerator(DrsApplication):
36
28
  """
37
29
  Generate a directory, dataset id and file name expression specified by the given project from
38
- a mapping of collection ids and tokens or an unordered bag of tokens.
30
+ a mapping of collection ids and terms or an unordered bag of terms.
39
31
  """
40
-
41
- def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
32
+
33
+ def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
42
34
  """
43
- Generate a directory DRS expression from a mapping of collection ids and tokens.
35
+ Generate a directory DRS expression from a mapping of collection ids and terms.
44
36
 
45
- :param mapping: A mapping of collection ids (keys) and tokens (values).
37
+ :param mapping: A mapping of collection ids (keys) and terms (values).
46
38
  :type mapping: Mapping[str, str]
47
39
  :returns: A generation report.
48
40
  :rtype: DrsGeneratorReport
49
41
  """
50
42
  return self._generate_from_mapping(mapping, self.directory_specs)
51
-
52
- def generate_directory_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
43
+
44
+ def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
53
45
  """
54
- Generate a directory DRS expression from an unordered bag of tokens.
46
+ Generate a directory DRS expression from an unordered bag of terms.
55
47
 
56
- :param tokens: An unordered bag of tokens.
57
- :type tokens: Iterable[str]
48
+ :param terms: An unordered bag of terms.
49
+ :type terms: Iterable[str]
58
50
  :returns: A generation report.
59
51
  :rtype: DrsGeneratorReport
60
52
  """
61
- return self._generate_from_bag_of_tokens(tokens, self.directory_specs)
53
+ return self._generate_from_bag_of_terms(terms, self.directory_specs)
62
54
 
63
- def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
55
+ def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
64
56
  """
65
- Generate a dataset id DRS expression from a mapping of collection ids and tokens.
57
+ Generate a dataset id DRS expression from a mapping of collection ids and terms.
66
58
 
67
- :param mapping: A mapping of collection ids (keys) and tokens (values).
59
+ :param mapping: A mapping of collection ids (keys) and terms (values).
68
60
  :type mapping: Mapping[str, str]
69
61
  :returns: A generation report.
70
62
  :rtype: DrsGeneratorReport
71
63
  """
72
64
  return self._generate_from_mapping(mapping, self.dataset_id_specs)
73
-
74
- def generate_dataset_id_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
65
+
66
+ def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
75
67
  """
76
- Generate a dataset id DRS expression from an unordered bag of tokens.
68
+ Generate a dataset id DRS expression from an unordered bag of terms.
77
69
 
78
- :param tokens: An unordered bag of tokens.
79
- :type tokens: Iterable[str]
70
+ :param terms: An unordered bag of terms.
71
+ :type terms: Iterable[str]
80
72
  :returns: A generation report.
81
73
  :rtype: DrsGeneratorReport
82
74
  """
83
- return self._generate_from_bag_of_tokens(tokens, self.dataset_id_specs)
84
-
75
+ return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
85
76
 
86
- def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
77
+
78
+ def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
87
79
  """
88
- Generate a file name DRS expression from a mapping of collection ids and tokens.
80
+ Generate a file name DRS expression from a mapping of collection ids and terms.
89
81
  The file name extension is append automatically, according to the DRS specification,
90
- so none of the tokens given must include the extension.
82
+ so none of the terms given must include the extension.
91
83
 
92
- :param mapping: A mapping of collection ids (keys) and tokens (values).
84
+ :param mapping: A mapping of collection ids (keys) and terms (values).
93
85
  :type mapping: Mapping[str, str]
94
86
  :returns: A generation report.
95
87
  :rtype: DrsGeneratorReport
96
88
  """
97
89
  report = self._generate_from_mapping(mapping, self.file_name_specs)
98
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
99
- return report
100
-
101
- def generate_file_name_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
90
+ report.generated_drs_expression = report.generated_drs_expression + \
91
+ self._get_full_file_name_extension()
92
+ return report
93
+
94
+ def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
102
95
  """
103
- Generate a file name DRS expression from an unordered bag of tokens.
96
+ Generate a file name DRS expression from an unordered bag of terms.
104
97
  The file name extension is append automatically, according to the DRS specification,
105
- so none of the tokens given must include the extension.
98
+ so none of the terms given must include the extension.
106
99
 
107
- :param tokens: An unordered bag of tokens.
108
- :type tokens: Iterable[str]
100
+ :param terms: An unordered bag of terms.
101
+ :type terms: Iterable[str]
109
102
  :returns: A generation report.
110
103
  :rtype: DrsGeneratorReport
111
104
  """
112
- report = self._generate_from_bag_of_tokens(tokens, self.file_name_specs)
113
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
114
- return report
105
+ report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
106
+ report.generated_drs_expression = report.generated_drs_expression + \
107
+ self._get_full_file_name_extension()
108
+ return report
115
109
 
116
110
  def generate_from_mapping(self, mapping: Mapping[str, str],
117
- drs_type: DrsType|str) -> DrsGeneratorReport:
111
+ drs_type: DrsType|str) -> DrsGenerationReport:
118
112
  """
119
- Generate a DRS expression from a mapping of collection ids and tokens.
113
+ Generate a DRS expression from a mapping of collection ids and terms.
120
114
 
121
- :param mapping: A mapping of collection ids (keys) and tokens (values).
115
+ :param mapping: A mapping of collection ids (keys) and terms (values).
122
116
  :type mapping: Mapping[str, str]
123
117
  :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
124
118
  :type drs_type: DrsType|str
125
119
  :returns: A generation report.
126
120
  :rtype: DrsGeneratorReport
127
121
  """
128
- specs = self._get_specs(drs_type)
129
- report = self._generate_from_mapping(mapping, specs)
130
- if DrsType.FILE_NAME == drs_type:
131
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
132
- return report
133
-
134
- def generate_from_bag_of_tokens(self, tokens: Iterable[str], drs_type: DrsType|str) \
135
- -> DrsGeneratorReport:
122
+ match drs_type:
123
+ case DrsType.DIRECTORY:
124
+ return self.generate_directory_from_mapping(mapping=mapping)
125
+ case DrsType.FILE_NAME:
126
+ return self.generate_file_name_from_mapping(mapping=mapping)
127
+ case DrsType.DATASET_ID:
128
+ return self.generate_dataset_id_from_mapping(mapping=mapping)
129
+ case _:
130
+ raise RuntimeError(f'unsupported drs type {drs_type}')
131
+
132
+ def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType|str) \
133
+ -> DrsGenerationReport:
136
134
  """
137
- Generate a DRS expression from an unordered bag of tokens.
135
+ Generate a DRS expression from an unordered bag of terms.
138
136
 
139
- :param tokens: An unordered bag of tokens.
140
- :type tokens: Iterable[str]
137
+ :param terms: An unordered bag of terms.
138
+ :type terms: Iterable[str]
141
139
  :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
142
140
  :type drs_type: DrsType|str
143
141
  :returns: A generation report.
144
142
  :rtype: DrsGeneratorReport
145
143
  """
146
- specs = self._get_specs(drs_type)
147
- return self._generate_from_bag_of_tokens(tokens, specs)
144
+ match drs_type:
145
+ case DrsType.DIRECTORY:
146
+ return self.generate_directory_from_bag_of_terms(terms=terms)
147
+ case DrsType.FILE_NAME:
148
+ return self.generate_file_name_from_bag_of_terms(terms=terms)
149
+ case DrsType.DATASET_ID:
150
+ return self.generate_dataset_id_from_bag_of_terms(terms=terms)
151
+ case _:
152
+ raise RuntimeError(f'unsupported drs type {drs_type}')
148
153
 
149
154
 
150
155
  def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
151
- -> DrsGeneratorReport:
156
+ -> DrsGenerationReport:
152
157
  drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
153
158
  if self.pedantic:
154
159
  errors.extend(warnings)
155
160
  warnings.clear()
156
- return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
157
- given_mapping_or_bag_of_tokens=mapping,
161
+ return DrsGenerationReport(project_id=self.project_id, type=specs.type,
162
+ given_mapping_or_bag_of_terms=mapping,
158
163
  mapping_used=mapping,
159
164
  generated_drs_expression=drs_expression,
160
- errors=cast(list[DrsIssue], errors),
161
- warnings=cast(list[DrsIssue], warnings))
165
+ errors=cast(list[GenerationError], errors),
166
+ warnings=cast(list[GenerationWarning], warnings))
162
167
 
163
168
  def __generate_from_mapping(self, mapping: Mapping[str, str],
164
169
  specs: DrsSpecification,
165
170
  has_to_valid_terms: bool)\
166
- -> tuple[str, list[GeneratorIssue], list[GeneratorIssue]]:
167
- errors: list[GeneratorIssue] = list()
168
- warnings: list[GeneratorIssue] = list()
171
+ -> tuple[str, list[GenerationIssue], list[GenerationIssue]]:
172
+ errors: list[GenerationIssue] = list()
173
+ warnings: list[GenerationIssue] = list()
169
174
  drs_expression = ""
170
175
  part_position: int = 0
171
176
  for part in specs.parts:
@@ -180,65 +185,65 @@ class DrsGenerator(DrsApplication):
180
185
  self.project_id,
181
186
  collection_id)
182
187
  if not matching_terms:
183
- issue = InvalidToken(token=part_value,
184
- token_position=part_position,
188
+ issue = InvalidTerm(term=part_value,
189
+ term_position=part_position,
185
190
  collection_id_or_constant_value=collection_id)
186
191
  errors.append(issue)
187
- part_value = DrsGeneratorReport.INVALID_TAG
192
+ part_value = DrsGenerationReport.INVALID_TAG
188
193
  else:
189
- other_issue = MissingToken(collection_id=collection_id,
194
+ other_issue = MissingTerm(collection_id=collection_id,
190
195
  collection_position=part_position)
191
196
  if collection_part.is_required:
192
197
  errors.append(other_issue)
193
- part_value = DrsGeneratorReport.MISSING_TAG
198
+ part_value = DrsGenerationReport.MISSING_TAG
194
199
  else:
195
200
  warnings.append(other_issue)
196
201
  continue # The for loop.
197
202
  else:
198
203
  constant_part = cast(DrsConstant, part)
199
204
  part_value = constant_part.value
200
-
205
+
201
206
  drs_expression += part_value + specs.separator
202
-
207
+
203
208
  drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
204
209
  return drs_expression, errors, warnings
205
210
 
206
- def _generate_from_bag_of_tokens(self, tokens: Iterable[str], specs: DrsSpecification) \
207
- -> DrsGeneratorReport:
208
- collection_tokens_mapping: dict[str, set[str]] = dict()
209
- for token in tokens:
210
- matching_terms = projects.valid_term_in_project(token, self.project_id)
211
+ def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
212
+ -> DrsGenerationReport:
213
+ collection_terms_mapping: dict[str, set[str]] = dict()
214
+ for term in terms:
215
+ matching_terms = projects.valid_term_in_project(term, self.project_id)
211
216
  for matching_term in matching_terms:
212
- if matching_term.collection_id not in collection_tokens_mapping:
213
- collection_tokens_mapping[matching_term.collection_id] = set()
214
- collection_tokens_mapping[matching_term.collection_id].add(token)
215
- collection_tokens_mapping, warnings = DrsGenerator._resolve_conflicts(collection_tokens_mapping)
216
- mapping, errors = DrsGenerator._check_collection_tokens_mapping(collection_tokens_mapping)
217
+ if matching_term.collection_id not in collection_terms_mapping:
218
+ collection_terms_mapping[matching_term.collection_id] = set()
219
+ collection_terms_mapping[matching_term.collection_id].add(term)
220
+ collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
221
+ mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
217
222
  drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
218
223
  errors.extend(errs)
219
224
  warnings.extend(warns)
220
225
  if self.pedantic:
221
226
  errors.extend(warnings)
222
227
  warnings.clear()
223
- return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
224
- given_mapping_or_bag_of_tokens=tokens,
228
+ return DrsGenerationReport(project_id=self.project_id, type=specs.type,
229
+ given_mapping_or_bag_of_terms=terms,
225
230
  mapping_used=mapping,generated_drs_expression=drs_expression,
226
- errors=cast(list[DrsIssue], errors),
227
- warnings=cast(list[DrsIssue], warnings))
228
-
231
+ errors=cast(list[GenerationError], errors),
232
+ warnings=cast(list[GenerationWarning], warnings))
233
+
229
234
  @staticmethod
230
- def _resolve_conflicts(collection_tokens_mapping: dict[str, set[str]]) \
231
- -> tuple[dict[str, set[str]], list[GeneratorIssue]]:
232
- warnings: list[GeneratorIssue] = list()
235
+ def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
236
+ -> tuple[dict[str, set[str]], list[GenerationIssue]]:
237
+ warnings: list[GenerationIssue] = list()
233
238
  conflicting_collection_ids_list: list[list[str]] = list()
234
- collection_ids: list[str] = list(collection_tokens_mapping.keys())
239
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
235
240
  len_collection_ids: int = len(collection_ids)
236
-
241
+
237
242
  for l_collection_index in range(0, len_collection_ids - 1):
238
243
  conflicting_collection_ids: list[str] = list()
239
244
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
240
- if collection_tokens_mapping[collection_ids[l_collection_index]].isdisjoint \
241
- (collection_tokens_mapping[collection_ids[r_collection_index]]):
245
+ if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint \
246
+ (collection_terms_mapping[collection_ids[r_collection_index]]):
242
247
  continue
243
248
  else:
244
249
  not_registered = True
@@ -256,105 +261,105 @@ class DrsGenerator(DrsApplication):
256
261
  # Each time a collection is resolved, we must restart the loop so as to check if others can be,
257
262
  # until no progress is made.
258
263
  while True:
259
- # 1. Non-conflicting collections with only one token are assigned.
260
- # Non-conflicting collections with more than one token will be raise an error
264
+ # 1. Non-conflicting collections with only one term are assigned.
265
+ # Non-conflicting collections with more than one term will be raise an error
261
266
  # in the _check method.
262
-
267
+
263
268
  # Nothing to do.
264
269
 
265
- # 2a. Collections with one token that are conflicting to each other will raise an error.
266
- # We don't search for collection with more than one token which token sets are exactly
267
- # the same, because we cannot choose which token will be removed in 2b.
268
- # So stick with one token collections: those collection will be detected in method _check.
270
+ # 2a. Collections with one term that are conflicting to each other will raise an error.
271
+ # We don't search for collection with more than one term which term sets are exactly
272
+ # the same, because we cannot choose which term will be removed in 2b.
273
+ # So stick with one term collections: those collection will be detected in method _check.
269
274
  collection_ids_with_len_eq_1_list: list[list[str]] = list()
270
275
  for collection_ids in conflicting_collection_ids_list:
271
276
  tmp_conflicting_collection_ids: list[str] = list()
272
277
  for collection_id in collection_ids:
273
- if len(collection_tokens_mapping[collection_id]) == 1:
278
+ if len(collection_terms_mapping[collection_id]) == 1:
274
279
  tmp_conflicting_collection_ids.append(collection_id)
275
280
  if len(tmp_conflicting_collection_ids) > 1:
276
281
  collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
277
- # 2b. As it is not possible to resolve collections sharing the same unique token:
278
- # raise errors, remove the faulty collections and their token.
282
+ # 2b. As it is not possible to resolve collections sharing the same unique term:
283
+ # raise errors, remove the faulty collections and their term.
279
284
  if collection_ids_with_len_eq_1_list:
280
285
  for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
281
286
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
282
287
  collection_ids_to_be_removed)
283
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
288
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
284
289
  collection_ids_to_be_removed)
285
290
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
286
291
  # as conflicting collections may be resolved.
287
292
  continue
288
293
 
289
- # 3.a For each collections with only one token, assign their token to the detriment of
290
- # collections with more than one token.
294
+ # 3.a For each collections with only one term, assign their term to the detriment of
295
+ # collections with more than one term.
291
296
  wining_collection_ids: list[str] = list()
292
297
  for collection_ids in conflicting_collection_ids_list:
293
298
  for collection_id in collection_ids:
294
- if len(collection_tokens_mapping[collection_id]) == 1:
299
+ if len(collection_terms_mapping[collection_id]) == 1:
295
300
  wining_collection_ids.append(collection_id)
296
- token = _get_first_item(collection_tokens_mapping[collection_id])
297
- issue = AssignedToken(collection_id=collection_id, token=token)
301
+ term = _get_first_item(collection_terms_mapping[collection_id])
302
+ issue = AssignedTerm(collection_id=collection_id, term=term)
298
303
  warnings.append(issue)
299
304
  # 3.b Update conflicting collections.
300
305
  if wining_collection_ids:
301
306
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
302
307
  wining_collection_ids)
303
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
308
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
304
309
  wining_collection_ids)
305
310
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
306
311
  # as conflicting collections may be resolved.
307
312
  continue
308
313
 
309
- # 4.a For each token set of the remaining conflicting collections, compute their difference.
310
- # If the difference is one token, this token is assigned to the collection that owns it.
311
- wining_id_and_token_pairs: list[tuple[str, str]] = list()
314
+ # 4.a For each term set of the remaining conflicting collections, compute their difference.
315
+ # If the difference is one term, this term is assigned to the collection that owns it.
316
+ wining_id_and_term_pairs: list[tuple[str, str]] = list()
312
317
  for collection_ids in conflicting_collection_ids_list:
313
318
  for collection_index in range(0, len(collection_ids)):
314
- diff: set[str] = collection_tokens_mapping[collection_ids[collection_index]]\
319
+ diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
315
320
  .difference(
316
- *[collection_tokens_mapping[index]
321
+ *[collection_terms_mapping[index]
317
322
  for index in collection_ids[collection_index + 1 :] +\
318
323
  collection_ids[:collection_index]
319
324
  ]
320
325
  )
321
326
  if len(diff) == 1:
322
- wining_id_and_token_pairs.append((collection_ids[collection_index],
327
+ wining_id_and_term_pairs.append((collection_ids[collection_index],
323
328
  _get_first_item(diff)))
324
329
  # 4.b Update conflicting collections.
325
- if wining_id_and_token_pairs:
330
+ if wining_id_and_term_pairs:
326
331
  wining_collection_ids = list()
327
- for collection_id, token in wining_id_and_token_pairs:
332
+ for collection_id, term in wining_id_and_term_pairs:
328
333
  wining_collection_ids.append(collection_id)
329
- collection_tokens_mapping[collection_id].clear()
330
- collection_tokens_mapping[collection_id].add(token)
331
- issue = AssignedToken(collection_id=collection_id, token=token)
334
+ collection_terms_mapping[collection_id].clear()
335
+ collection_terms_mapping[collection_id].add(term)
336
+ issue = AssignedTerm(collection_id=collection_id, term=term)
332
337
  warnings.append(issue)
333
338
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
334
339
  wining_collection_ids)
335
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
340
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
336
341
  wining_collection_ids)
337
342
  continue
338
343
  else:
339
344
  break # Stop the loop when no progress is made.
340
- return collection_tokens_mapping, warnings
345
+ return collection_terms_mapping, warnings
341
346
 
342
347
  @staticmethod
343
- def _check_collection_tokens_mapping(collection_tokens_mapping: dict[str, set[str]]) \
344
- -> tuple[dict[str, str], list[GeneratorIssue]]:
345
- errors: list[GeneratorIssue] = list()
346
- # 1. Looking for collections that share strictly the same token(s).
347
- collection_ids: list[str] = list(collection_tokens_mapping.keys())
348
+ def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
349
+ -> tuple[dict[str, str], list[GenerationIssue]]:
350
+ errors: list[GenerationIssue] = list()
351
+ # 1. Looking for collections that share strictly the same term(s).
352
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
348
353
  len_collection_ids: int = len(collection_ids)
349
354
  faulty_collections_list: list[set[str]] = list()
350
355
  for l_collection_index in range(0, len_collection_ids - 1):
351
356
  l_collection_id = collection_ids[l_collection_index]
352
- l_token_set = collection_tokens_mapping[l_collection_id]
357
+ l_term_set = collection_terms_mapping[l_collection_id]
353
358
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
354
359
  r_collection_id = collection_ids[r_collection_index]
355
- r_token_set = collection_tokens_mapping[r_collection_id]
360
+ r_term_set = collection_terms_mapping[r_collection_id]
356
361
  # check if the set is empty because the difference will always be an empty set!
357
- if l_token_set and (not l_token_set.difference(r_token_set)):
362
+ if l_term_set and (not l_term_set.difference(r_term_set)):
358
363
  not_registered = True
359
364
  for faulty_collections in faulty_collections_list:
360
365
  if l_collection_id in faulty_collections or \
@@ -366,35 +371,35 @@ class DrsGenerator(DrsApplication):
366
371
  if not_registered:
367
372
  faulty_collections_list.append({l_collection_id, r_collection_id})
368
373
  for faulty_collections in faulty_collections_list:
369
- tokens = collection_tokens_mapping[_get_first_item(faulty_collections)]
374
+ terms = collection_terms_mapping[_get_first_item(faulty_collections)]
370
375
  issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
371
- tokens=_transform_set_and_sort(tokens))
376
+ terms=_transform_set_and_sort(terms))
372
377
  errors.append(issue)
373
378
  for collection_id in faulty_collections:
374
- del collection_tokens_mapping[collection_id]
375
-
376
- # 2. Looking for collections with more than one token.
379
+ del collection_terms_mapping[collection_id]
380
+
381
+ # 2. Looking for collections with more than one term.
377
382
  result: dict[str, str] = dict()
378
- for collection_id, token_set in collection_tokens_mapping.items():
379
- len_token_set = len(token_set)
380
- if len_token_set == 1:
381
- result[collection_id] = _get_first_item(token_set)
382
- elif len_token_set > 1:
383
- other_issue = TooManyTokensCollection(collection_id=collection_id,
384
- tokens=_transform_set_and_sort(token_set))
383
+ for collection_id, term_set in collection_terms_mapping.items():
384
+ len_term_set = len(term_set)
385
+ if len_term_set == 1:
386
+ result[collection_id] = _get_first_item(term_set)
387
+ elif len_term_set > 1:
388
+ other_issue = TooManyTermCollection(collection_id=collection_id,
389
+ terms=_transform_set_and_sort(term_set))
385
390
  errors.append(other_issue)
386
391
  #else: Don't add emptied collection to the result.
387
392
  return result, errors
388
393
 
389
394
  @staticmethod
390
- def _remove_token_from_other_token_sets(collection_tokens_mapping: dict[str, set[str]],
395
+ def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
391
396
  collection_ids_to_be_removed: list[str]) -> None:
392
397
  for collection_id_to_be_removed in collection_ids_to_be_removed:
393
- # Should only be one token.
394
- token_to_be_removed: str = _get_first_item(collection_tokens_mapping[collection_id_to_be_removed])
395
- for collection_id in collection_tokens_mapping.keys():
398
+ # Should only be one term.
399
+ term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
400
+ for collection_id in collection_terms_mapping.keys():
396
401
  if (collection_id not in collection_ids_to_be_removed):
397
- collection_tokens_mapping[collection_id].discard(token_to_be_removed)
402
+ collection_terms_mapping[collection_id].discard(term_to_be_removed)
398
403
 
399
404
  @staticmethod
400
405
  def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
@@ -421,4 +426,4 @@ if __name__ == "__main__":
421
426
  'institution_id': 'IPSL',
422
427
  }
423
428
  report = generator.generate_file_name_from_mapping(mapping)
424
- print(report.warnings)
429
+ print(report.warnings)