PyPI - esgvoc - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

esgvoc 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show

esgvoc/__init__.py +3 -1
esgvoc/api/__init__.py +96 -72
esgvoc/api/data_descriptors/__init__.py +18 -12
esgvoc/api/data_descriptors/activity.py +8 -45
esgvoc/api/data_descriptors/area_label.py +6 -0
esgvoc/api/data_descriptors/branded_suffix.py +5 -0
esgvoc/api/data_descriptors/branded_variable.py +5 -0
esgvoc/api/data_descriptors/consortium.py +16 -56
esgvoc/api/data_descriptors/data_descriptor.py +106 -0
esgvoc/api/data_descriptors/date.py +3 -46
esgvoc/api/data_descriptors/directory_date.py +3 -46
esgvoc/api/data_descriptors/experiment.py +19 -54
esgvoc/api/data_descriptors/forcing_index.py +3 -45
esgvoc/api/data_descriptors/frequency.py +6 -43
esgvoc/api/data_descriptors/grid_label.py +6 -44
esgvoc/api/data_descriptors/horizontal_label.py +6 -0
esgvoc/api/data_descriptors/initialisation_index.py +3 -44
esgvoc/api/data_descriptors/institution.py +11 -54
esgvoc/api/data_descriptors/license.py +4 -44
esgvoc/api/data_descriptors/mip_era.py +6 -44
esgvoc/api/data_descriptors/model_component.py +7 -45
esgvoc/api/data_descriptors/organisation.py +3 -40
esgvoc/api/data_descriptors/physic_index.py +3 -45
esgvoc/api/data_descriptors/product.py +4 -43
esgvoc/api/data_descriptors/realisation_index.py +3 -44
esgvoc/api/data_descriptors/realm.py +4 -42
esgvoc/api/data_descriptors/resolution.py +6 -44
esgvoc/api/data_descriptors/source.py +18 -53
esgvoc/api/data_descriptors/source_type.py +3 -41
esgvoc/api/data_descriptors/sub_experiment.py +3 -41
esgvoc/api/data_descriptors/table.py +6 -48
esgvoc/api/data_descriptors/temporal_label.py +6 -0
esgvoc/api/data_descriptors/time_range.py +3 -27
esgvoc/api/data_descriptors/variable.py +13 -71
esgvoc/api/data_descriptors/variant_label.py +3 -47
esgvoc/api/data_descriptors/vertical_label.py +5 -0
esgvoc/api/project_specs.py +3 -2
esgvoc/api/projects.py +727 -446
esgvoc/api/py.typed +0 -0
esgvoc/api/report.py +29 -16
esgvoc/api/search.py +140 -95
esgvoc/api/universe.py +362 -156
esgvoc/apps/__init__.py +3 -4
esgvoc/apps/drs/constants.py +1 -1
esgvoc/apps/drs/generator.py +185 -198
esgvoc/apps/drs/report.py +272 -136
esgvoc/apps/drs/validator.py +132 -145
esgvoc/apps/py.typed +0 -0
esgvoc/cli/drs.py +32 -21
esgvoc/cli/get.py +35 -31
esgvoc/cli/install.py +11 -8
esgvoc/cli/main.py +0 -2
esgvoc/cli/status.py +5 -5
esgvoc/cli/valid.py +40 -40
esgvoc/core/constants.py +1 -1
esgvoc/core/db/__init__.py +2 -4
esgvoc/core/db/connection.py +5 -3
esgvoc/core/db/models/project.py +50 -8
esgvoc/core/db/models/universe.py +51 -12
esgvoc/core/db/project_ingestion.py +60 -46
esgvoc/core/db/universe_ingestion.py +58 -29
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +1 -1
esgvoc/core/repo_fetcher.py +4 -3
esgvoc/core/service/__init__.py +37 -5
esgvoc/core/service/configuration/config_manager.py +188 -0
esgvoc/core/service/configuration/setting.py +88 -0
esgvoc/core/service/state.py +49 -32
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
esgvoc-0.4.0.dist-info/RECORD +80 -0
esgvoc/api/_utils.py +0 -39
esgvoc/cli/config.py +0 -82
esgvoc/core/service/settings.py +0 -73
esgvoc/core/service/settings.toml +0 -17
esgvoc/core/service/settings_default.toml +0 -17
esgvoc-0.2.1.dist-info/RECORD +0 -73
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/apps/drs/generator.py CHANGED Viewed

@@ -1,27 +1,25 @@
-from typing import cast, Iterable, Mapping, Any
+from typing import Any, Iterable, Mapping, cast
 import esgvoc.api.projects as projects
-from esgvoc.api.project_specs import (DrsSpecification,
-                               DrsPartKind,
-                               DrsCollection,
-                               DrsConstant,
-                               DrsType)
+from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
+from esgvoc.apps.drs.report import (
+    AssignedTerm,
+    ConflictingCollections,
+    DrsGenerationReport,
+    GenerationError,
+    GenerationIssue,
+    GenerationWarning,
+    InvalidTerm,
+    MissingTerm,
+    TooManyTermCollection,
+)
 from esgvoc.apps.drs.validator import DrsApplication
-from esgvoc.apps.drs.report import (DrsGeneratorReport,
-                                    DrsIssue,
-                                    GeneratorIssue,
-                                    TooManyTokensCollection,
-                                    InvalidToken,
-                                    MissingToken,
-                                    ConflictingCollections,
-                                    AssignedToken)
+from esgvoc.core.exceptions import EsgvocDbError
 def _get_first_item(items: set[Any]) -> Any:
     result = None
-    for result in items:
+    for result in items:  # noqa: B007
         break
     return result
@@ -35,137 +33,148 @@ def _transform_set_and_sort(_set: set[Any]) -> list[Any]:
 class DrsGenerator(DrsApplication):
     """
     Generate a directory, dataset id and file name expression specified by the given project from
-    a mapping of collection ids and tokens or an unordered bag of tokens.
+    a mapping of collection ids and terms or an unordered bag of terms.
     """
-    def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
+    def generate_directory_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
         """
-        Generate a directory DRS expression from a mapping of collection ids and tokens.
+        Generate a directory DRS expression from a mapping of collection ids and terms.
-        :param mapping: A mapping of collection ids (keys) and tokens (values).
+        :param mapping: A mapping of collection ids (keys) and terms (values).
         :type mapping: Mapping[str, str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
         return self._generate_from_mapping(mapping, self.directory_specs)
-    def generate_directory_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
+    def generate_directory_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
         """
-        Generate a directory DRS expression from an unordered bag of tokens.
+        Generate a directory DRS expression from an unordered bag of terms.
-        :param tokens: An unordered bag of tokens.
-        :type tokens: Iterable[str]
+        :param terms: An unordered bag of terms.
+        :type terms: Iterable[str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
-        return self._generate_from_bag_of_tokens(tokens, self.directory_specs)
+        return self._generate_from_bag_of_terms(terms, self.directory_specs)
-    def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
+    def generate_dataset_id_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
         """
-        Generate a dataset id DRS expression from a mapping of collection ids and tokens.
+        Generate a dataset id DRS expression from a mapping of collection ids and terms.
-        :param mapping: A mapping of collection ids (keys) and tokens (values).
+        :param mapping: A mapping of collection ids (keys) and terms (values).
         :type mapping: Mapping[str, str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
         return self._generate_from_mapping(mapping, self.dataset_id_specs)
-    def generate_dataset_id_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
+    def generate_dataset_id_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
         """
-        Generate a dataset id DRS expression from an unordered bag of tokens.
+        Generate a dataset id DRS expression from an unordered bag of terms.
-        :param tokens: An unordered bag of tokens.
-        :type tokens: Iterable[str]
+        :param terms: An unordered bag of terms.
+        :type terms: Iterable[str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
-        return self._generate_from_bag_of_tokens(tokens, self.dataset_id_specs)
+        return self._generate_from_bag_of_terms(terms, self.dataset_id_specs)
-    def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGeneratorReport:
+    def generate_file_name_from_mapping(self, mapping: Mapping[str, str]) -> DrsGenerationReport:
         """
-        Generate a file name DRS expression from a mapping of collection ids and tokens.
+        Generate a file name DRS expression from a mapping of collection ids and terms.
         The file name extension is append automatically, according to the DRS specification,
-        so none of the tokens given must include the extension.
+        so none of the terms given must include the extension.
-        :param mapping: A mapping of collection ids (keys) and tokens (values).
+        :param mapping: A mapping of collection ids (keys) and terms (values).
         :type mapping: Mapping[str, str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
         report = self._generate_from_mapping(mapping, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
-        return report
-    def generate_file_name_from_bag_of_tokens(self, tokens: Iterable[str]) -> DrsGeneratorReport:
+        report.generated_drs_expression = report.generated_drs_expression + \
+                                          self._get_full_file_name_extension() # noqa E127
+        return report
+    def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
         """
-        Generate a file name DRS expression from an unordered bag of tokens.
+        Generate a file name DRS expression from an unordered bag of terms.
         The file name extension is append automatically, according to the DRS specification,
-        so none of the tokens given must include the extension.
+        so none of the terms given must include the extension.
-        :param tokens: An unordered bag of tokens.
-        :type tokens: Iterable[str]
+        :param terms: An unordered bag of terms.
+        :type terms: Iterable[str]
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
-        report = self._generate_from_bag_of_tokens(tokens, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
-        return report
+        report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
+        report.generated_drs_expression = report.generated_drs_expression + \
+                                          self._get_full_file_name_extension() # noqa E127
+        return report
     def generate_from_mapping(self, mapping: Mapping[str, str],
-                              drs_type: DrsType|str) -> DrsGeneratorReport:
+                              drs_type: DrsType | str) -> DrsGenerationReport:
         """
-        Generate a DRS expression from a mapping of collection ids and tokens.
+        Generate a DRS expression from a mapping of collection ids and terms.
-        :param mapping: A mapping of collection ids (keys) and tokens (values).
+        :param mapping: A mapping of collection ids (keys) and terms (values).
         :type mapping: Mapping[str, str]
         :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
         :type drs_type: DrsType|str
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
-        specs = self._get_specs(drs_type)
-        report = self._generate_from_mapping(mapping, specs)
-        if DrsType.FILE_NAME == drs_type:
-            report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()
-        return report
-    def generate_from_bag_of_tokens(self, tokens: Iterable[str], drs_type: DrsType|str) \
-                                                                              -> DrsGeneratorReport:
+        match drs_type:
+            case DrsType.DIRECTORY:
+                return self.generate_directory_from_mapping(mapping=mapping)
+            case DrsType.FILE_NAME:
+                return self.generate_file_name_from_mapping(mapping=mapping)
+            case DrsType.DATASET_ID:
+                return self.generate_dataset_id_from_mapping(mapping=mapping)
+            case _:
+                raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
+    def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
+                                                                             -> DrsGenerationReport: # noqa E127
         """
-        Generate a DRS expression from an unordered bag of tokens.
+        Generate a DRS expression from an unordered bag of terms.
-        :param tokens: An unordered bag of tokens.
-        :type tokens: Iterable[str]
+        :param terms: An unordered bag of terms.
+        :type terms: Iterable[str]
         :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
         :type drs_type: DrsType|str
         :returns: A generation report.
         :rtype: DrsGeneratorReport
         """
-        specs = self._get_specs(drs_type)
-        return self._generate_from_bag_of_tokens(tokens, specs)
+        match drs_type:
+            case DrsType.DIRECTORY:
+                return self.generate_directory_from_bag_of_terms(terms=terms)
+            case DrsType.FILE_NAME:
+                return self.generate_file_name_from_bag_of_terms(terms=terms)
+            case DrsType.DATASET_ID:
+                return self.generate_dataset_id_from_bag_of_terms(terms=terms)
+            case _:
+                raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
     def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
-                                                                              -> DrsGeneratorReport:
+                                                                            -> DrsGenerationReport: # noqa E127
         drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
-                                  given_mapping_or_bag_of_tokens=mapping,
-                                  mapping_used=mapping,
-                                  generated_drs_expression=drs_expression,
-                                  errors=cast(list[DrsIssue], errors),
-                                  warnings=cast(list[DrsIssue], warnings))
+        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
+                                   given_mapping_or_bag_of_terms=mapping,
+                                   mapping_used=mapping,
+                                   generated_drs_expression=drs_expression,
+                                   errors=cast(list[GenerationError], errors),
+                                   warnings=cast(list[GenerationWarning], warnings))
     def __generate_from_mapping(self, mapping: Mapping[str, str],
                                 specs: DrsSpecification,
-                                has_to_valid_terms: bool)\
-                                          -> tuple[str, list[GeneratorIssue], list[GeneratorIssue]]:
-        errors: list[GeneratorIssue] = list()
-        warnings: list[GeneratorIssue] = list()
+                                has_to_valid_terms: bool) \
+                                        -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
+        errors: list[GenerationIssue] = list()
+        warnings: list[GenerationIssue] = list()
         drs_expression = ""
         part_position: int = 0
         for part in specs.parts:
@@ -180,65 +189,65 @@ class DrsGenerator(DrsApplication):
                                                                            self.project_id,
                                                                            collection_id)
                         if not matching_terms:
-                            issue = InvalidToken(token=part_value,
-                                                 token_position=part_position,
-                                                 collection_id_or_constant_value=collection_id)
+                            issue = InvalidTerm(term=part_value,
+                                                term_position=part_position,
+                                                collection_id_or_constant_value=collection_id)
                             errors.append(issue)
-                            part_value = DrsGeneratorReport.INVALID_TAG
+                            part_value = DrsGenerationReport.INVALID_TAG
                 else:
-                    other_issue = MissingToken(collection_id=collection_id,
-                                               collection_position=part_position)
+                    other_issue = MissingTerm(collection_id=collection_id,
+                                              collection_position=part_position)
                     if collection_part.is_required:
                         errors.append(other_issue)
-                        part_value = DrsGeneratorReport.MISSING_TAG
+                        part_value = DrsGenerationReport.MISSING_TAG
                     else:
                         warnings.append(other_issue)
-                        continue # The for loop.
+                        continue  # The for loop.
             else:
                 constant_part = cast(DrsConstant, part)
                 part_value = constant_part.value
             drs_expression += part_value + specs.separator
         drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
         return drs_expression, errors, warnings
-    def _generate_from_bag_of_tokens(self, tokens: Iterable[str], specs: DrsSpecification) \
-                                                                              -> DrsGeneratorReport:
-        collection_tokens_mapping: dict[str, set[str]] = dict()
-        for token in tokens:
-            matching_terms = projects.valid_term_in_project(token, self.project_id)
+    def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
+                                                                             -> DrsGenerationReport: # noqa E127
+        collection_terms_mapping: dict[str, set[str]] = dict()
+        for term in terms:
+            matching_terms = projects.valid_term_in_project(term, self.project_id)
             for matching_term in matching_terms:
-                if matching_term.collection_id not in collection_tokens_mapping:
-                    collection_tokens_mapping[matching_term.collection_id] = set()
-                collection_tokens_mapping[matching_term.collection_id].add(token)
-        collection_tokens_mapping, warnings = DrsGenerator._resolve_conflicts(collection_tokens_mapping)
-        mapping, errors = DrsGenerator._check_collection_tokens_mapping(collection_tokens_mapping)
+                if matching_term.collection_id not in collection_terms_mapping:
+                    collection_terms_mapping[matching_term.collection_id] = set()
+                collection_terms_mapping[matching_term.collection_id].add(term)
+        collection_terms_mapping, warnings = DrsGenerator._resolve_conflicts(collection_terms_mapping)
+        mapping, errors = DrsGenerator._check_collection_terms_mapping(collection_terms_mapping)
         drs_expression, errs, warns = self.__generate_from_mapping(mapping, specs, False)
         errors.extend(errs)
         warnings.extend(warns)
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGeneratorReport(project_id=self.project_id, type=specs.type,
-                                  given_mapping_or_bag_of_tokens=tokens,
-                                  mapping_used=mapping,generated_drs_expression=drs_expression,
-                                  errors=cast(list[DrsIssue], errors),
-                                  warnings=cast(list[DrsIssue], warnings))
+        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
+                                   given_mapping_or_bag_of_terms=terms,
+                                   mapping_used=mapping, generated_drs_expression=drs_expression,
+                                   errors=cast(list[GenerationError], errors),
+                                   warnings=cast(list[GenerationWarning], warnings))
     @staticmethod
-    def _resolve_conflicts(collection_tokens_mapping: dict[str, set[str]]) \
-                                            -> tuple[dict[str, set[str]], list[GeneratorIssue]]:
-        warnings: list[GeneratorIssue] = list()
+    def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
+                                               -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
+        warnings: list[GenerationIssue] = list()
         conflicting_collection_ids_list: list[list[str]] = list()
-        collection_ids: list[str] = list(collection_tokens_mapping.keys())
+        collection_ids: list[str] = list(collection_terms_mapping.keys())
         len_collection_ids: int = len(collection_ids)
         for l_collection_index in range(0, len_collection_ids - 1):
             conflicting_collection_ids: list[str] = list()
             for r_collection_index in range(l_collection_index + 1, len_collection_ids):
-                if collection_tokens_mapping[collection_ids[l_collection_index]].isdisjoint \
-                       (collection_tokens_mapping[collection_ids[r_collection_index]]):
+                if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
+                       collection_terms_mapping[collection_ids[r_collection_index]]):
                     continue
                 else:
                     not_registered = True
@@ -256,105 +265,102 @@ class DrsGenerator(DrsApplication):
         # Each time a collection is resolved, we must restart the loop so as to check if others can be,
         # until no progress is made.
         while True:
-            # 1. Non-conflicting collections with only one token are assigned.
-            #    Non-conflicting collections with more than one token will be raise an error
+            # 1. Non-conflicting collections with only one term are assigned.
+            #    Non-conflicting collections with more than one term will be raise an error
             #    in the _check method.
             #    Nothing to do.
-            # 2a. Collections with one token that are conflicting to each other will raise an error.
-            #     We don't search for collection with more than one token which token sets are exactly
-            #     the same, because we cannot choose which token will be removed in 2b.
-            #     So stick with one token collections: those collection will be detected in method _check.
+            # 2a. Collections with one term that are conflicting to each other will raise an error.
+            #     We don't search for collection with more than one term which term sets are exactly
+            #     the same, because we cannot choose which term will be removed in 2b.
+            #     So stick with one term collections: those collection will be detected in method _check.
             collection_ids_with_len_eq_1_list: list[list[str]] = list()
             for collection_ids in conflicting_collection_ids_list:
                 tmp_conflicting_collection_ids: list[str] = list()
                 for collection_id in collection_ids:
-                    if len(collection_tokens_mapping[collection_id]) == 1:
+                    if len(collection_terms_mapping[collection_id]) == 1:
                         tmp_conflicting_collection_ids.append(collection_id)
                 if len(tmp_conflicting_collection_ids) > 1:
                     collection_ids_with_len_eq_1_list.append(tmp_conflicting_collection_ids)
-            # 2b. As it is not possible to resolve collections sharing the same unique token:
-            #     raise errors, remove the faulty collections and their token.
+            # 2b. As it is not possible to resolve collections sharing the same unique term:
+            #     raise errors, remove the faulty collections and their term.
             if collection_ids_with_len_eq_1_list:
                 for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
                     DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
                                                             collection_ids_to_be_removed)
-                    DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
-                                                      collection_ids_to_be_removed)
+                    DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
+                                                                   collection_ids_to_be_removed)
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
-            # 3.a For each collections with only one token, assign their token to the detriment of
-            #    collections with more than one token.
+            # 3.a For each collections with only one term, assign their term to the detriment of
+            #    collections with more than one term.
             wining_collection_ids: list[str] = list()
             for collection_ids in conflicting_collection_ids_list:
                 for collection_id in collection_ids:
-                    if len(collection_tokens_mapping[collection_id]) == 1:
+                    if len(collection_terms_mapping[collection_id]) == 1:
                         wining_collection_ids.append(collection_id)
-                        token = _get_first_item(collection_tokens_mapping[collection_id])
-                        issue = AssignedToken(collection_id=collection_id, token=token)
+                        term = _get_first_item(collection_terms_mapping[collection_id])
+                        issue = AssignedTerm(collection_id=collection_id, term=term)
                         warnings.append(issue)
             # 3.b Update conflicting collections.
             if wining_collection_ids:
                 DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
                                                         wining_collection_ids)
-                DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
-                                                  wining_collection_ids)
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
+                                                               wining_collection_ids)
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
-            # 4.a For each token set of the remaining conflicting collections, compute their difference.
-            #    If the difference is one token, this token is assigned to the collection that owns it.
-            wining_id_and_token_pairs: list[tuple[str, str]] = list()
+            # 4.a For each term set of the remaining conflicting collections, compute their difference.
+            #    If the difference is one term, this term is assigned to the collection that owns it.
+            wining_id_and_term_pairs: list[tuple[str, str]] = list()
             for collection_ids in conflicting_collection_ids_list:
                 for collection_index in range(0, len(collection_ids)):
-                    diff: set[str] = collection_tokens_mapping[collection_ids[collection_index]]\
-                                         .difference(
-                                                     *[collection_tokens_mapping[index]
-                                               for index in collection_ids[collection_index + 1 :] +\
-                                                        collection_ids[:collection_index]
-                                                      ]
-                                                    )
+                    collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
+                    diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
+                                         .difference(*[collection_terms_mapping[index] # noqa E127
+                                                     for index in collection_set])
                     if len(diff) == 1:
-                        wining_id_and_token_pairs.append((collection_ids[collection_index],
+                        wining_id_and_term_pairs.append((collection_ids[collection_index],
                                                          _get_first_item(diff)))
             # 4.b Update conflicting collections.
-            if wining_id_and_token_pairs:
+            if wining_id_and_term_pairs:
                 wining_collection_ids = list()
-                for collection_id, token in wining_id_and_token_pairs:
+                for collection_id, term in wining_id_and_term_pairs:
                     wining_collection_ids.append(collection_id)
-                    collection_tokens_mapping[collection_id].clear()
-                    collection_tokens_mapping[collection_id].add(token)
-                    issue = AssignedToken(collection_id=collection_id, token=token)
+                    collection_terms_mapping[collection_id].clear()
+                    collection_terms_mapping[collection_id].add(term)
+                    issue = AssignedTerm(collection_id=collection_id, term=term)
                     warnings.append(issue)
                 DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
                                                         wining_collection_ids)
-                DrsGenerator._remove_token_from_other_token_sets(collection_tokens_mapping,
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
                                                                wining_collection_ids)
                 continue
             else:
-                break # Stop the loop when no progress is made.
-        return collection_tokens_mapping, warnings
+                break  # Stop the loop when no progress is made.
+        return collection_terms_mapping, warnings
     @staticmethod
-    def _check_collection_tokens_mapping(collection_tokens_mapping: dict[str, set[str]]) \
-                                                     -> tuple[dict[str, str], list[GeneratorIssue]]:
-        errors: list[GeneratorIssue] = list()
-        # 1. Looking for collections that share strictly the same token(s).
-        collection_ids: list[str] = list(collection_tokens_mapping.keys())
+    def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
+                                                    -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
+        errors: list[GenerationIssue] = list()
+        # 1. Looking for collections that share strictly the same term(s).
+        collection_ids: list[str] = list(collection_terms_mapping.keys())
         len_collection_ids: int = len(collection_ids)
         faulty_collections_list: list[set[str]] = list()
         for l_collection_index in range(0, len_collection_ids - 1):
             l_collection_id = collection_ids[l_collection_index]
-            l_token_set = collection_tokens_mapping[l_collection_id]
+            l_term_set = collection_terms_mapping[l_collection_id]
             for r_collection_index in range(l_collection_index + 1, len_collection_ids):
                 r_collection_id = collection_ids[r_collection_index]
-                r_token_set = collection_tokens_mapping[r_collection_id]
-                # check if the set is empty because the difference will always be an empty set!
-                if l_token_set and (not l_token_set.difference(r_token_set)):
+                r_term_set = collection_terms_mapping[r_collection_id]
+                # Check if the set is empty because the difference will always be an empty set!
+                if l_term_set and (not l_term_set.difference(r_term_set)):
                     not_registered = True
                     for faulty_collections in faulty_collections_list:
                         if l_collection_id in faulty_collections or \
@@ -366,35 +372,35 @@ class DrsGenerator(DrsApplication):
                     if not_registered:
                         faulty_collections_list.append({l_collection_id, r_collection_id})
         for faulty_collections in faulty_collections_list:
-            tokens = collection_tokens_mapping[_get_first_item(faulty_collections)]
+            terms = collection_terms_mapping[_get_first_item(faulty_collections)]
             issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
-                                           tokens=_transform_set_and_sort(tokens))
+                                           terms=_transform_set_and_sort(terms))
             errors.append(issue)
             for collection_id in faulty_collections:
-                del collection_tokens_mapping[collection_id]
-        # 2. Looking for collections with more than one token.
+                del collection_terms_mapping[collection_id]
+        # 2. Looking for collections with more than one term.
         result: dict[str, str] = dict()
-        for collection_id, token_set in collection_tokens_mapping.items():
-            len_token_set = len(token_set)
-            if len_token_set == 1:
-                result[collection_id] = _get_first_item(token_set)
-            elif len_token_set > 1:
-                other_issue = TooManyTokensCollection(collection_id=collection_id,
-                                                     tokens=_transform_set_and_sort(token_set))
+        for collection_id, term_set in collection_terms_mapping.items():
+            len_term_set = len(term_set)
+            if len_term_set == 1:
+                result[collection_id] = _get_first_item(term_set)
+            elif len_term_set > 1:
+                other_issue = TooManyTermCollection(collection_id=collection_id,
+                                                    terms=_transform_set_and_sort(term_set))
                 errors.append(other_issue)
-            #else: Don't add emptied collection to the result.
+            # else: Don't add emptied collection to the result.
         return result, errors
     @staticmethod
-    def _remove_token_from_other_token_sets(collection_tokens_mapping: dict[str, set[str]],
+    def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
                                           collection_ids_to_be_removed: list[str]) -> None:
         for collection_id_to_be_removed in collection_ids_to_be_removed:
-            # Should only be one token.
-            token_to_be_removed: str = _get_first_item(collection_tokens_mapping[collection_id_to_be_removed])
-            for collection_id in collection_tokens_mapping.keys():
+            # Should only be one term.
+            term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
+            for collection_id in collection_terms_mapping.keys():
                 if (collection_id not in collection_ids_to_be_removed):
-                    collection_tokens_mapping[collection_id].discard(token_to_be_removed)
+                    collection_terms_mapping[collection_id].discard(term_to_be_removed)
     @staticmethod
     def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
@@ -403,22 +409,3 @@ class DrsGenerator(DrsApplication):
             for conflicting_collection_ids in conflicting_collection_ids_list:
                 if collection_id_to_be_removed in conflicting_collection_ids:
                     conflicting_collection_ids.remove(collection_id_to_be_removed)
-if __name__ == "__main__":
-    project_id = 'cmip6plus'
-    generator = DrsGenerator(project_id)
-    mapping = \
-    {
-        'member_id': 'r2i2p1f2',
-        'activity_id': 'CMIP',
-        'source_id': 'MIROC6',
-        'mip_era': 'CMIP6Plus',
-        'experiment_id': 'amip',
-        'variable_id': 'od550aer',
-        'table_id': 'ACmon',
-        'grid_label': 'gn',
-        'institution_id': 'IPSL',
-    }
-    report = generator.generate_file_name_from_mapping(mapping)
-    print(report.warnings)

esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl