esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +96 -72
  3. esgvoc/api/data_descriptors/__init__.py +18 -12
  4. esgvoc/api/data_descriptors/activity.py +8 -45
  5. esgvoc/api/data_descriptors/area_label.py +6 -0
  6. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  8. esgvoc/api/data_descriptors/consortium.py +16 -56
  9. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  10. esgvoc/api/data_descriptors/date.py +3 -46
  11. esgvoc/api/data_descriptors/directory_date.py +3 -46
  12. esgvoc/api/data_descriptors/experiment.py +19 -54
  13. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  14. esgvoc/api/data_descriptors/frequency.py +6 -43
  15. esgvoc/api/data_descriptors/grid_label.py +6 -44
  16. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  17. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  18. esgvoc/api/data_descriptors/institution.py +11 -54
  19. esgvoc/api/data_descriptors/license.py +4 -44
  20. esgvoc/api/data_descriptors/mip_era.py +6 -44
  21. esgvoc/api/data_descriptors/model_component.py +7 -45
  22. esgvoc/api/data_descriptors/organisation.py +3 -40
  23. esgvoc/api/data_descriptors/physic_index.py +3 -45
  24. esgvoc/api/data_descriptors/product.py +4 -43
  25. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  26. esgvoc/api/data_descriptors/realm.py +4 -42
  27. esgvoc/api/data_descriptors/resolution.py +6 -44
  28. esgvoc/api/data_descriptors/source.py +18 -53
  29. esgvoc/api/data_descriptors/source_type.py +3 -41
  30. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  31. esgvoc/api/data_descriptors/table.py +6 -48
  32. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  33. esgvoc/api/data_descriptors/time_range.py +3 -27
  34. esgvoc/api/data_descriptors/variable.py +13 -71
  35. esgvoc/api/data_descriptors/variant_label.py +3 -47
  36. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  37. esgvoc/api/project_specs.py +3 -2
  38. esgvoc/api/projects.py +727 -446
  39. esgvoc/api/py.typed +0 -0
  40. esgvoc/api/report.py +29 -16
  41. esgvoc/api/search.py +140 -95
  42. esgvoc/api/universe.py +362 -156
  43. esgvoc/apps/__init__.py +3 -4
  44. esgvoc/apps/drs/constants.py +1 -1
  45. esgvoc/apps/drs/generator.py +185 -198
  46. esgvoc/apps/drs/report.py +272 -136
  47. esgvoc/apps/drs/validator.py +132 -145
  48. esgvoc/apps/py.typed +0 -0
  49. esgvoc/cli/drs.py +32 -21
  50. esgvoc/cli/get.py +35 -31
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +0 -2
  53. esgvoc/cli/status.py +5 -5
  54. esgvoc/cli/valid.py +40 -40
  55. esgvoc/core/constants.py +1 -1
  56. esgvoc/core/db/__init__.py +2 -4
  57. esgvoc/core/db/connection.py +5 -3
  58. esgvoc/core/db/models/project.py +50 -8
  59. esgvoc/core/db/models/universe.py +51 -12
  60. esgvoc/core/db/project_ingestion.py +60 -46
  61. esgvoc/core/db/universe_ingestion.py +58 -29
  62. esgvoc/core/exceptions.py +33 -0
  63. esgvoc/core/logging_handler.py +1 -1
  64. esgvoc/core/repo_fetcher.py +4 -3
  65. esgvoc/core/service/__init__.py +37 -5
  66. esgvoc/core/service/configuration/config_manager.py +188 -0
  67. esgvoc/core/service/configuration/setting.py +88 -0
  68. esgvoc/core/service/state.py +49 -32
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
  70. esgvoc-0.4.0.dist-info/RECORD +80 -0
  71. esgvoc/api/_utils.py +0 -39
  72. esgvoc/cli/config.py +0 -82
  73. esgvoc/core/service/settings.py +0 -73
  74. esgvoc/core/service/settings.toml +0 -17
  75. esgvoc/core/service/settings_default.toml +0 -17
  76. esgvoc-0.2.1.dist-info/RECORD +0 -73
  77. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
  79. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,27 +1,25 @@
1
- from typing import cast, Iterable, Mapping, Any
1
+ from typing import Any, Iterable, Mapping, cast
2
2
 
3
3
  import esgvoc.api.projects as projects
4
-
5
- from esgvoc.api.project_specs import (DrsSpecification,
6
- DrsPartKind,
7
- DrsCollection,
8
- DrsConstant,
9
- DrsType)
10
-
4
+ from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
5
+ from esgvoc.apps.drs.report import (
6
+ AssignedTerm,
7
+ ConflictingCollections,
8
+ DrsGenerationReport,
9
+ GenerationError,
10
+ GenerationIssue,
11
+ GenerationWarning,
12
+ InvalidTerm,
13
+ MissingTerm,
14
+ TooManyTermCollection,
15
+ )
11
16
  from esgvoc.apps.drs.validator import DrsApplication
12
- from esgvoc.apps.drs.report import (DrsGeneratorReport,
13
- DrsIssue,
14
- GeneratorIssue,
15
- TooManyTokensCollection,
16
- InvalidToken,
17
- MissingToken,
18
- ConflictingCollections,
19
- AssignedToken)
17
+ from esgvoc.core.exceptions import EsgvocDbError
20
18
 
21
19
 
22
20
  def _get_first_item(items: set[Any]) -> Any:
23
21
  result = None
24
- for result in items:
22
+ for result in items: # noqa: B007
25
23
  break
26
24
  return result
27
25
 
@@ -35,137 +33,148 @@ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
35
33
  class DrsGenerator(DrsApplication):
36
34
  """
37
35
  Generate a directory, dataset id and file name expression specified by the given project from
38
- a mapping of collection ids and tokens or an unordered bag of tokens.
36
+ a mapping of collection ids and terms or an unordered bag of terms.
39
37
  """
40
-
41
- def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
38
+
39
+ def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
42
40
  """
43
- Generate a directory DRS expression from a mapping of collection ids and tokens.
41
+ Generate a directory DRS expression from a mapping of collection ids and terms.
44
42
 
45
- :param mapping: A mapping of collection ids (keys) and tokens (values).
43
+ :param mapping: A mapping of collection ids (keys) and terms (values).
46
44
  :type mapping: Mapping[str, str]
47
45
  :returns: A generation report.
48
46
  :rtype: DrsGeneratorReport
49
47
  """
50
48
  return self._generate_from_mapping(mapping, self.directory_specs)
51
-
52
- def generate_directory_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
49
+
50
+ def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
53
51
  """
54
- Generate a directory DRS expression from an unordered bag of tokens.
52
+ Generate a directory DRS expression from an unordered bag of terms.
55
53
 
56
- :param tokens: An unordered bag of tokens.
57
- :type tokens: Iterable[str]
54
+ :param terms: An unordered bag of terms.
55
+ :type terms: Iterable[str]
58
56
  :returns: A generation report.
59
57
  :rtype: DrsGeneratorReport
60
58
  """
61
- return self._generate_from_bag_of_tokens(tokens, self.directory_specs)
59
+ return self._generate_from_bag_of_terms(terms, self.directory_specs)
62
60
 
63
- def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
61
+ def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
64
62
  """
65
- Generate a dataset id DRS expression from a mapping of collection ids and tokens.
63
+ Generate a dataset id DRS expression from a mapping of collection ids and terms.
66
64
 
67
- :param mapping: A mapping of collection ids (keys) and tokens (values).
65
+ :param mapping: A mapping of collection ids (keys) and terms (values).
68
66
  :type mapping: Mapping[str, str]
69
67
  :returns: A generation report.
70
68
  :rtype: DrsGeneratorReport
71
69
  """
72
70
  return self._generate_from_mapping(mapping, self.dataset_id_specs)
73
-
74
- def generate_dataset_id_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
71
+
72
+ def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
75
73
  """
76
- Generate a dataset id DRS expression from an unordered bag of tokens.
74
+ Generate a dataset id DRS expression from an unordered bag of terms.
77
75
 
78
- :param tokens: An unordered bag of tokens.
79
- :type tokens: Iterable[str]
76
+ :param terms: An unordered bag of terms.
77
+ :type terms: Iterable[str]
80
78
  :returns: A generation report.
81
79
  :rtype: DrsGeneratorReport
82
80
  """
83
- return self._generate_from_bag_of_tokens(tokens, self.dataset_id_specs)
84
-
81
+ return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
85
82
 
86
- def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
83
+ def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
87
84
  """
88
- Generate a file name DRS expression from a mapping of collection ids and tokens.
85
+ Generate a file name DRS expression from a mapping of collection ids and terms.
89
86
  The file name extension is append automatically, according to the DRS specification,
90
- so none of the tokens given must include the extension.
87
+ so none of the terms given must include the extension.
91
88
 
92
- :param mapping: A mapping of collection ids (keys) and tokens (values).
89
+ :param mapping: A mapping of collection ids (keys) and terms (values).
93
90
  :type mapping: Mapping[str, str]
94
91
  :returns: A generation report.
95
92
  :rtype: DrsGeneratorReport
96
93
  """
97
94
  report = self._generate_from_mapping(mapping, self.file_name_specs)
98
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
99
- return report
100
-
101
- def generate_file_name_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
95
+ report.generated_drs_expression = report.generated_drs_expression + \
96
+ self._get_full_file_name_extension() # noqa E127
97
+ return report
98
+
99
+ def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
102
100
  """
103
- Generate a file name DRS expression from an unordered bag of tokens.
101
+ Generate a file name DRS expression from an unordered bag of terms.
104
102
  The file name extension is append automatically, according to the DRS specification,
105
- so none of the tokens given must include the extension.
103
+ so none of the terms given must include the extension.
106
104
 
107
- :param tokens: An unordered bag of tokens.
108
- :type tokens: Iterable[str]
105
+ :param terms: An unordered bag of terms.
106
+ :type terms: Iterable[str]
109
107
  :returns: A generation report.
110
108
  :rtype: DrsGeneratorReport
111
109
  """
112
- report = self._generate_from_bag_of_tokens(tokens, self.file_name_specs)
113
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
114
- return report
110
+ report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
111
+ report.generated_drs_expression = report.generated_drs_expression + \
112
+ self._get_full_file_name_extension() # noqa E127
113
+ return report
115
114
 
116
115
  def generate_from_mapping(self, mapping: Mapping[str, str],
117
- drs_type: DrsType|str) -> DrsGeneratorReport:
116
+ drs_type: DrsType | str) -> DrsGenerationReport:
118
117
  """
119
- Generate a DRS expression from a mapping of collection ids and tokens.
118
+ Generate a DRS expression from a mapping of collection ids and terms.
120
119
 
121
- :param mapping: A mapping of collection ids (keys) and tokens (values).
120
+ :param mapping: A mapping of collection ids (keys) and terms (values).
122
121
  :type mapping: Mapping[str, str]
123
122
  :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
124
123
  :type drs_type: DrsType|str
125
124
  :returns: A generation report.
126
125
  :rtype: DrsGeneratorReport
127
126
  """
128
- specs = self._get_specs(drs_type)
129
- report = self._generate_from_mapping(mapping, specs)
130
- if DrsType.FILE_NAME == drs_type:
131
- report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
132
- return report
133
-
134
- def generate_from_bag_of_tokens(self, tokens: Iterable[str], drs_type: DrsType|str) \
135
- -> DrsGeneratorReport:
127
+ match drs_type:
128
+ case DrsType.DIRECTORY:
129
+ return self.generate_directory_from_mapping(mapping=mapping)
130
+ case DrsType.FILE_NAME:
131
+ return self.generate_file_name_from_mapping(mapping=mapping)
132
+ case DrsType.DATASET_ID:
133
+ return self.generate_dataset_id_from_mapping(mapping=mapping)
134
+ case _:
135
+ raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
136
+
137
+ def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
138
+ -> DrsGenerationReport: # noqa E127
136
139
  """
137
- Generate a DRS expression from an unordered bag of tokens.
140
+ Generate a DRS expression from an unordered bag of terms.
138
141
 
139
- :param tokens: An unordered bag of tokens.
140
- :type tokens: Iterable[str]
142
+ :param terms: An unordered bag of terms.
143
+ :type terms: Iterable[str]
141
144
  :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
142
145
  :type drs_type: DrsType|str
143
146
  :returns: A generation report.
144
147
  :rtype: DrsGeneratorReport
145
148
  """
146
- specs = self._get_specs(drs_type)
147
- return self._generate_from_bag_of_tokens(tokens, specs)
148
-
149
+ match drs_type:
150
+ case DrsType.DIRECTORY:
151
+ return self.generate_directory_from_bag_of_terms(terms=terms)
152
+ case DrsType.FILE_NAME:
153
+ return self.generate_file_name_from_bag_of_terms(terms=terms)
154
+ case DrsType.DATASET_ID:
155
+ return self.generate_dataset_id_from_bag_of_terms(terms=terms)
156
+ case _:
157
+ raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
149
158
 
150
159
  def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
151
- -> DrsGeneratorReport:
160
+ -> DrsGenerationReport: # noqa E127
152
161
  drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
153
162
  if self.pedantic:
154
163
  errors.extend(warnings)
155
164
  warnings.clear()
156
- return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
157
- given_mapping_or_bag_of_tokens=mapping,
158
- mapping_used=mapping,
159
- generated_drs_expression=drs_expression,
160
- errors=cast(list[DrsIssue], errors),
161
- warnings=cast(list[DrsIssue], warnings))
165
+ return DrsGenerationReport(project_id=self.project_id, type=specs.type,
166
+ given_mapping_or_bag_of_terms=mapping,
167
+ mapping_used=mapping,
168
+ generated_drs_expression=drs_expression,
169
+ errors=cast(list[GenerationError], errors),
170
+ warnings=cast(list[GenerationWarning], warnings))
162
171
 
163
172
  def __generate_from_mapping(self, mapping: Mapping[str, str],
164
173
  specs: DrsSpecification,
165
- has_to_valid_terms: bool)\
166
- -> tuple[str, list[GeneratorIssue], list[GeneratorIssue]]:
167
- errors: list[GeneratorIssue] = list()
168
- warnings: list[GeneratorIssue] = list()
174
+ has_to_valid_terms: bool) \
175
+ -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
176
+ errors: list[GenerationIssue] = list()
177
+ warnings: list[GenerationIssue] = list()
169
178
  drs_expression = ""
170
179
  part_position: int = 0
171
180
  for part in specs.parts:
@@ -180,65 +189,65 @@ class DrsGenerator(DrsApplication):
180
189
  self.project_id,
181
190
  collection_id)
182
191
  if not matching_terms:
183
- issue = InvalidToken(token=part_value,
184
- token_position=part_position,
185
- collection_id_or_constant_value=collection_id)
192
+ issue = InvalidTerm(term=part_value,
193
+ term_position=part_position,
194
+ collection_id_or_constant_value=collection_id)
186
195
  errors.append(issue)
187
- part_value = DrsGeneratorReport.INVALID_TAG
196
+ part_value = DrsGenerationReport.INVALID_TAG
188
197
  else:
189
- other_issue = MissingToken(collection_id=collection_id,
190
- collection_position=part_position)
198
+ other_issue = MissingTerm(collection_id=collection_id,
199
+ collection_position=part_position)
191
200
  if collection_part.is_required:
192
201
  errors.append(other_issue)
193
- part_value = DrsGeneratorReport.MISSING_TAG
202
+ part_value = DrsGenerationReport.MISSING_TAG
194
203
  else:
195
204
  warnings.append(other_issue)
196
- continue # The for loop.
205
+ continue # The for loop.
197
206
  else:
198
207
  constant_part = cast(DrsConstant, part)
199
208
  part_value = constant_part.value
200
-
209
+
201
210
  drs_expression += part_value + specs.separator
202
-
211
+
203
212
  drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
204
213
  return drs_expression, errors, warnings
205
214
 
206
- def _generate_from_bag_of_tokens(self, tokens: Iterable[str], specs: DrsSpecification) \
207
- -> DrsGeneratorReport:
208
- collection_tokens_mapping: dict[str, set[str]] = dict()
209
- for token in tokens:
210
- matching_terms = projects.valid_term_in_project(token, self.project_id)
215
+ def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
216
+ -> DrsGenerationReport: # noqa E127
217
+ collection_terms_mapping: dict[str, set[str]] = dict()
218
+ for term in terms:
219
+ matching_terms = projects.valid_term_in_project(term, self.project_id)
211
220
  for matching_term in matching_terms:
212
- if matching_term.collection_id not in collection_tokens_mapping:
213
- collection_tokens_mapping[matching_term.collection_id] = set()
214
- collection_tokens_mapping[matching_term.collection_id].add(token)
215
- collection_tokens_mapping, warnings = DrsGenerator._resolve_conflicts(collection_tokens_mapping)
216
- mapping, errors = DrsGenerator._check_collection_tokens_mapping(collection_tokens_mapping)
221
+ if matching_term.collection_id not in collection_terms_mapping:
222
+ collection_terms_mapping[matching_term.collection_id] = set()
223
+ collection_terms_mapping[matching_term.collection_id].add(term)
224
+ collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
225
+ mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
217
226
  drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
218
227
  errors.extend(errs)
219
228
  warnings.extend(warns)
220
229
  if self.pedantic:
221
230
  errors.extend(warnings)
222
231
  warnings.clear()
223
- return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
224
- given_mapping_or_bag_of_tokens=tokens,
225
- mapping_used=mapping,generated_drs_expression=drs_expression,
226
- errors=cast(list[DrsIssue], errors),
227
- warnings=cast(list[DrsIssue], warnings))
228
-
232
+ return DrsGenerationReport(project_id=self.project_id, type=specs.type,
233
+ given_mapping_or_bag_of_terms=terms,
234
+ mapping_used=mapping, generated_drs_expression=drs_expression,
235
+ errors=cast(list[GenerationError], errors),
236
+ warnings=cast(list[GenerationWarning], warnings))
237
+
229
238
  @staticmethod
230
- def _resolve_conflicts(collection_tokens_mapping: dict[str, set[str]]) \
231
- -> tuple[dict[str, set[str]], list[GeneratorIssue]]:
232
- warnings: list[GeneratorIssue] = list()
239
+ def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
240
+ -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
241
+ warnings: list[GenerationIssue] = list()
233
242
  conflicting_collection_ids_list: list[list[str]] = list()
234
- collection_ids: list[str] = list(collection_tokens_mapping.keys())
243
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
235
244
  len_collection_ids: int = len(collection_ids)
236
-
245
+
237
246
  for l_collection_index in range(0, len_collection_ids - 1):
238
247
  conflicting_collection_ids: list[str] = list()
239
248
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
240
- if collection_tokens_mapping[collection_ids[l_collection_index]].isdisjoint \
241
- (collection_tokens_mapping[collection_ids[r_collection_index]]):
249
+ if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
250
+ collection_terms_mapping[collection_ids[r_collection_index]]):
242
251
  continue
243
252
  else:
244
253
  not_registered = True
@@ -256,105 +265,102 @@ class DrsGenerator(DrsApplication):
256
265
  # Each time a collection is resolved, we must restart the loop so as to check if others can be,
257
266
  # until no progress is made.
258
267
  while True:
259
- # 1. Non-conflicting collections with only one token are assigned.
260
- # Non-conflicting collections with more than one token will be raise an error
268
+ # 1. Non-conflicting collections with only one term are assigned.
269
+ # Non-conflicting collections with more than one term will be raise an error
261
270
  # in the _check method.
262
-
271
+
263
272
  # Nothing to do.
264
273
 
265
- # 2a. Collections with one token that are conflicting to each other will raise an error.
266
- # We don't search for collection with more than one token which token sets are exactly
267
- # the same, because we cannot choose which token will be removed in 2b.
268
- # So stick with one token collections: those collection will be detected in method _check.
274
+ # 2a. Collections with one term that are conflicting to each other will raise an error.
275
+ # We don't search for collection with more than one term which term sets are exactly
276
+ # the same, because we cannot choose which term will be removed in 2b.
277
+ # So stick with one term collections: those collection will be detected in method _check.
269
278
  collection_ids_with_len_eq_1_list: list[list[str]] = list()
270
279
  for collection_ids in conflicting_collection_ids_list:
271
280
  tmp_conflicting_collection_ids: list[str] = list()
272
281
  for collection_id in collection_ids:
273
- if len(collection_tokens_mapping[collection_id]) == 1:
282
+ if len(collection_terms_mapping[collection_id]) == 1:
274
283
  tmp_conflicting_collection_ids.append(collection_id)
275
284
  if len(tmp_conflicting_collection_ids) > 1:
276
285
  collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
277
- # 2b. As it is not possible to resolve collections sharing the same unique token:
278
- # raise errors, remove the faulty collections and their token.
286
+ # 2b. As it is not possible to resolve collections sharing the same unique term:
287
+ # raise errors, remove the faulty collections and their term.
279
288
  if collection_ids_with_len_eq_1_list:
280
289
  for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
281
290
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
282
291
  collection_ids_to_be_removed)
283
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
284
- collection_ids_to_be_removed)
292
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
293
+ collection_ids_to_be_removed)
285
294
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
286
295
  # as conflicting collections may be resolved.
287
296
  continue
288
297
 
289
- # 3.a For each collections with only one token, assign their token to the detriment of
290
- # collections with more than one token.
298
+ # 3.a For each collections with only one term, assign their term to the detriment of
299
+ # collections with more than one term.
291
300
  wining_collection_ids: list[str] = list()
292
301
  for collection_ids in conflicting_collection_ids_list:
293
302
  for collection_id in collection_ids:
294
- if len(collection_tokens_mapping[collection_id]) == 1:
303
+ if len(collection_terms_mapping[collection_id]) == 1:
295
304
  wining_collection_ids.append(collection_id)
296
- token = _get_first_item(collection_tokens_mapping[collection_id])
297
- issue = AssignedToken(collection_id=collection_id, token=token)
305
+ term = _get_first_item(collection_terms_mapping[collection_id])
306
+ issue = AssignedTerm(collection_id=collection_id, term=term)
298
307
  warnings.append(issue)
299
308
  # 3.b Update conflicting collections.
300
309
  if wining_collection_ids:
301
310
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
302
311
  wining_collection_ids)
303
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
304
- wining_collection_ids)
312
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
313
+ wining_collection_ids)
305
314
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
306
315
  # as conflicting collections may be resolved.
307
316
  continue
308
317
 
309
- # 4.a For each token set of the remaining conflicting collections, compute their difference.
310
- # If the difference is one token, this token is assigned to the collection that owns it.
311
- wining_id_and_token_pairs: list[tuple[str, str]] = list()
318
+ # 4.a For each term set of the remaining conflicting collections, compute their difference.
319
+ # If the difference is one term, this term is assigned to the collection that owns it.
320
+ wining_id_and_term_pairs: list[tuple[str, str]] = list()
312
321
  for collection_ids in conflicting_collection_ids_list:
313
322
  for collection_index in range(0, len(collection_ids)):
314
- diff: set[str] = collection_tokens_mapping[collection_ids[collection_index]]\
315
- .difference(
316
- *[collection_tokens_mapping[index]
317
- for index in collection_ids[collection_index + 1 :] +\
318
- collection_ids[:collection_index]
319
- ]
320
- )
323
+ collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
324
+ diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
325
+ .difference(*[collection_terms_mapping[index] # noqa E127
326
+ for index in collection_set])
321
327
  if len(diff) == 1:
322
- wining_id_and_token_pairs.append((collection_ids[collection_index],
328
+ wining_id_and_term_pairs.append((collection_ids[collection_index],
323
329
  _get_first_item(diff)))
324
330
  # 4.b Update conflicting collections.
325
- if wining_id_and_token_pairs:
331
+ if wining_id_and_term_pairs:
326
332
  wining_collection_ids = list()
327
- for collection_id, token in wining_id_and_token_pairs:
333
+ for collection_id, term in wining_id_and_term_pairs:
328
334
  wining_collection_ids.append(collection_id)
329
- collection_tokens_mapping[collection_id].clear()
330
- collection_tokens_mapping[collection_id].add(token)
331
- issue = AssignedToken(collection_id=collection_id, token=token)
335
+ collection_terms_mapping[collection_id].clear()
336
+ collection_terms_mapping[collection_id].add(term)
337
+ issue = AssignedTerm(collection_id=collection_id, term=term)
332
338
  warnings.append(issue)
333
339
  DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
334
340
  wining_collection_ids)
335
- DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
341
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
336
342
  wining_collection_ids)
337
343
  continue
338
344
  else:
339
- break Stop the loop when no progress is made.
340
- return collection_tokens_mapping, warnings
345
+ break # Stop the loop when no progress is made.
346
+ return collection_terms_mapping, warnings
341
347
 
342
348
  @staticmethod
343
- def _check_collection_tokens_mapping(collection_tokens_mapping: dict[str, set[str]]) \
344
- -> tuple[dict[str, str], list[GeneratorIssue]]:
345
- errors: list[GeneratorIssue] = list()
346
- # 1. Looking for collections that share strictly the same token(s).
347
- collection_ids: list[str] = list(collection_tokens_mapping.keys())
349
+ def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
350
+ -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
351
+ errors: list[GenerationIssue] = list()
352
+ # 1. Looking for collections that share strictly the same term(s).
353
+ collection_ids: list[str] = list(collection_terms_mapping.keys())
348
354
  len_collection_ids: int = len(collection_ids)
349
355
  faulty_collections_list: list[set[str]] = list()
350
356
  for l_collection_index in range(0, len_collection_ids - 1):
351
357
  l_collection_id = collection_ids[l_collection_index]
352
- l_token_set = collection_tokens_mapping[l_collection_id]
358
+ l_term_set = collection_terms_mapping[l_collection_id]
353
359
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
354
360
  r_collection_id = collection_ids[r_collection_index]
355
- r_token_set = collection_tokens_mapping[r_collection_id]
356
- # check if the set is empty because the difference will always be an empty set!
357
- if l_token_set and (not l_token_set.difference(r_token_set)):
361
+ r_term_set = collection_terms_mapping[r_collection_id]
362
+ # Check if the set is empty because the difference will always be an empty set!
363
+ if l_term_set and (not l_term_set.difference(r_term_set)):
358
364
  not_registered = True
359
365
  for faulty_collections in faulty_collections_list:
360
366
  if l_collection_id in faulty_collections or \
@@ -366,35 +372,35 @@ class DrsGenerator(DrsApplication):
366
372
  if not_registered:
367
373
  faulty_collections_list.append({l_collection_id, r_collection_id})
368
374
  for faulty_collections in faulty_collections_list:
369
- tokens = collection_tokens_mapping[_get_first_item(faulty_collections)]
375
+ terms = collection_terms_mapping[_get_first_item(faulty_collections)]
370
376
  issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
371
- tokens=_transform_set_and_sort(tokens))
377
+ terms=_transform_set_and_sort(terms))
372
378
  errors.append(issue)
373
379
  for collection_id in faulty_collections:
374
- del collection_tokens_mapping[collection_id]
375
-
376
- # 2. Looking for collections with more than one token.
380
+ del collection_terms_mapping[collection_id]
381
+
382
+ # 2. Looking for collections with more than one term.
377
383
  result: dict[str, str] = dict()
378
- for collection_id, token_set in collection_tokens_mapping.items():
379
- len_token_set = len(token_set)
380
- if len_token_set == 1:
381
- result[collection_id] = _get_first_item(token_set)
382
- elif len_token_set > 1:
383
- other_issue = TooManyTokensCollection(collection_id=collection_id,
384
- tokens=_transform_set_and_sort(token_set))
384
+ for collection_id, term_set in collection_terms_mapping.items():
385
+ len_term_set = len(term_set)
386
+ if len_term_set == 1:
387
+ result[collection_id] = _get_first_item(term_set)
388
+ elif len_term_set > 1:
389
+ other_issue = TooManyTermCollection(collection_id=collection_id,
390
+ terms=_transform_set_and_sort(term_set))
385
391
  errors.append(other_issue)
386
- #else: Don't add emptied collection to the result.
392
+ # else: Don't add emptied collection to the result.
387
393
  return result, errors
388
394
 
389
395
  @staticmethod
390
- def _remove_token_from_other_token_sets(collection_tokens_mapping: dict[str, set[str]],
396
+ def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
391
397
  collection_ids_to_be_removed: list[str]) -> None:
392
398
  for collection_id_to_be_removed in collection_ids_to_be_removed:
393
- # Should only be one token.
394
- token_to_be_removed: str = _get_first_item(collection_tokens_mapping[collection_id_to_be_removed])
395
- for collection_id in collection_tokens_mapping.keys():
399
+ # Should only be one term.
400
+ term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
401
+ for collection_id in collection_terms_mapping.keys():
396
402
  if (collection_id not in collection_ids_to_be_removed):
397
- collection_tokens_mapping[collection_id].discard(token_to_be_removed)
403
+ collection_terms_mapping[collection_id].discard(term_to_be_removed)
398
404
 
399
405
  @staticmethod
400
406
  def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
@@ -403,22 +409,3 @@ class DrsGenerator(DrsApplication):
403
409
  for conflicting_collection_ids in conflicting_collection_ids_list:
404
410
  if collection_id_to_be_removed in conflicting_collection_ids:
405
411
  conflicting_collection_ids.remove(collection_id_to_be_removed)
406
-
407
-
408
- if __name__ == "__main__":
409
- project_id = 'cmip6plus'
410
- generator = DrsGenerator(project_id)
411
- mapping = \
412
- {
413
- 'member_id': 'r2i2p1f2',
414
- 'activity_id': 'CMIP',
415
- 'source_id': 'MIROC6',
416
- 'mip_era': 'CMIP6Plus',
417
- 'experiment_id': 'amip',
418
- 'variable_id': 'od550aer',
419
- 'table_id': 'ACmon',
420
- 'grid_label': 'gn',
421
- 'institution_id': 'IPSL',
422
- }
423
- report = generator.generate_file_name_from_mapping(mapping)
424
- print(report.warnings)