PyPI - esgvoc - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

esgvoc 1.0.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (43) hide show

esgvoc/__init__.py +1 -1
esgvoc/api/__init__.py +0 -6
esgvoc/api/data_descriptors/__init__.py +8 -0
esgvoc/api/data_descriptors/archive.py +5 -0
esgvoc/api/data_descriptors/citation_url.py +5 -0
esgvoc/api/data_descriptors/experiment.py +2 -2
esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
esgvoc/api/data_descriptors/member_id.py +9 -0
esgvoc/api/data_descriptors/regex.py +5 -0
esgvoc/api/data_descriptors/vertical_label.py +2 -2
esgvoc/api/project_specs.py +48 -130
esgvoc/api/projects.py +185 -66
esgvoc/apps/drs/generator.py +103 -85
esgvoc/apps/drs/validator.py +22 -38
esgvoc/apps/jsg/json_schema_generator.py +255 -130
esgvoc/apps/jsg/templates/template.jinja +249 -0
esgvoc/apps/test_cv/README.md +214 -0
esgvoc/apps/test_cv/cv_tester.py +1368 -0
esgvoc/apps/test_cv/example_usage.py +216 -0
esgvoc/apps/vr/__init__.py +12 -0
esgvoc/apps/vr/build_variable_registry.py +71 -0
esgvoc/apps/vr/example_usage.py +60 -0
esgvoc/apps/vr/vr_app.py +333 -0
esgvoc/cli/config.py +671 -86
esgvoc/cli/drs.py +39 -21
esgvoc/cli/main.py +2 -0
esgvoc/cli/test_cv.py +257 -0
esgvoc/core/constants.py +10 -7
esgvoc/core/data_handler.py +24 -22
esgvoc/core/db/connection.py +7 -0
esgvoc/core/db/project_ingestion.py +34 -9
esgvoc/core/db/universe_ingestion.py +1 -2
esgvoc/core/service/configuration/setting.py +192 -21
esgvoc/core/service/data_merger.py +1 -1
esgvoc/core/service/state.py +18 -2
{esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -3
{esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/RECORD +41 -30
esgvoc/apps/jsg/cmip6_template.json +0 -74
esgvoc/apps/jsg/cmip6plus_template.json +0 -74
/esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
{esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
{esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
{esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/api/projects.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import itertools
 import re
-from typing import Iterable, Sequence
+from typing import Iterable, Sequence, cast
 from sqlalchemy import text
 from sqlmodel import Session, and_, col, select
@@ -48,22 +49,36 @@ def _get_project_session_with_exception(project_id: str) -> Session:
         raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
-def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
-    # First find the term in the universe than in the current project
-    term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
-    term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
-    uterm = universe._get_term_in_data_descriptor(
-        data_descriptor_id=term_type, term_id=term_id, session=universe_session
-    )
-    if uterm:
-        return uterm
-    else:
-        pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
-    if pterm:
-        return pterm
+def _resolve_composite_term_part(composite_term_part: dict,
+                                 universe_session: Session,
+                                 project_session: Session) -> UTerm | PTerm | Sequence[UTerm | PTerm]:
+    if constants.TERM_ID_JSON_KEY in composite_term_part:
+        # First find the term in the universe than in the current project
+        term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
+        term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
+        uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
+                                                      term_id=term_id, session=universe_session)
+        if uterm:
+            return uterm
+        else:
+            pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
+        if pterm:
+            return pterm
+        else:
+            msg = f"unable to find the term '{term_id}' in '{term_type}'"
+            raise EsgvocNotFoundError(msg)
     else:
-        msg = f"unable to find the term '{term_id}' in '{term_type}'"
-        raise EsgvocNotFoundError(msg)
+        term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
+        data_descriptor = universe._get_data_descriptor_in_universe(term_type, universe_session)
+        if data_descriptor is not None:
+            return data_descriptor.terms
+        else:
+            collection = _get_collection_in_project(term_type, project_session)
+            if collection is not None:
+                return collection.terms
+            else:
+                msg = f"unable to find the terms of '{term_type}'"
+                raise EsgvocNotFoundError(msg)
 def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
@@ -72,42 +87,82 @@ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]
     return separator, parts
-# TODO: support optionality of parts of composite.
-# It is backtrack possible for more than one missing parts.
 def _valid_value_composite_term_with_separator(
     value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
 ) -> list[UniverseTermError | ProjectTermError]:
-    result = list()
     separator, parts = _get_composite_term_separator_parts(term)
-    if separator in value:
-        splits = value.split(separator)
-        if len(splits) == len(parts):
-            for index in range(0, len(splits)):
-                given_value = splits[index]
-                if "id" not in parts[index].keys():
-                    terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
-                    parts[index]["id"] = [term.id for term in terms]
-                if type(parts[index]["id"]) is str:
-                    parts[index]["id"] = [parts[index]["id"]]
-                errors_list = list()
-                for id in parts[index]["id"]:
-                    part_parts = dict(parts[index])
-                    part_parts["id"] = id
-                    resolved_term = _resolve_term(part_parts, universe_session, project_session)
-                    errors = _valid_value(given_value, resolved_term, universe_session, project_session)
-                    if len(errors) == 0:
-                        errors_list = errors
-                        break
-                    else:
-                        errors_list.extend(errors)
-                else:
-                    result.append(_create_term_error(value, term))
-        else:
-            result.append(_create_term_error(value, term))
-    else:
-        result.append(_create_term_error(value, term))
-    return result
+    required_indices = {i for i, p in enumerate(parts) if p.get("is_required", False)}
+    splits = value.split(separator)
+    nb_splits = len(splits)
+    nb_parts = len(parts)
+    if nb_splits > nb_parts:
+        return [_create_term_error(value, term)]
+    # Generate all possible assignments of split values into parts
+    # Only keep those that include all required parts
+    all_positions = [i for i in range(nb_parts)]
+    valid_combinations = [
+        comb for comb in itertools.combinations(all_positions, nb_splits) if required_indices.issubset(comb)
+    ]
+    for positions in valid_combinations:
+        candidate = [None] * nb_parts
+        for idx, pos in enumerate(positions):
+            candidate[pos] = splits[idx]
+        # Separator structure validation:
+        # - No leading separator if the first part is None
+        # - No trailing separator if the last part is None
+        # - No double separators where two adjacent optional parts are missing
+        if candidate[0] is None and value.startswith(separator):
+            continue
+        if candidate[-1] is None and value.endswith(separator):
+            continue
+        if any(
+            candidate[i] is None and candidate[i + 1] is None and separator * 2 in value for i in range(nb_parts - 1)
+        ):
+            continue  # invalid double separator between two missing parts
+        # Validate each filled part value
+        all_valid = True
+        for i, given_value in enumerate(candidate):
+            if given_value is None:
+                if parts[i].get("is_required", False):
+                    all_valid = False
+                    break
+                continue  # optional and missing part is allowed
+            part = parts[i]
+            # Resolve term ID list if not present
+            if "id" not in part:
+                terms = universe.get_all_terms_in_data_descriptor(part["type"], None)
+                part["id"] = [term.id for term in terms]
+            if isinstance(part["id"], str):
+                part["id"] = [part["id"]]
+            # Try all possible term IDs to find a valid match
+            valid_for_this_part = False
+            for id in part["id"]:
+                part_copy = dict(part)
+                part_copy["id"] = id
+                resolved_term = _resolve_composite_term_part(part_copy, universe_session, project_session)
+                # resolved_term can't be a list of terms here.
+                resolved_term = cast(UTerm | PTerm, resolved_term)
+                errors = _valid_value(given_value, resolved_term, universe_session, project_session)
+                if not errors:
+                    valid_for_this_part = True
+                    break
+            if not valid_for_this_part:
+                all_valid = False
+                break
+        if all_valid:
+            return []  # At least one valid combination found
+    return [_create_term_error(value, term)]  # No valid combination found
 def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
@@ -123,8 +178,13 @@ def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, projec
             separator, parts = _get_composite_term_separator_parts(term)
             result = ""
             for part in parts:
-                resolved_term = _resolve_term(part, universe_session, project_session)
-                pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
+                resolved_term = _resolve_composite_term_part(part, universe_session, project_session)
+                if isinstance(resolved_term, Sequence):
+                    pattern = ""
+                    for r_term in resolved_term:
+                        pattern += _transform_to_pattern(r_term, universe_session, project_session)
+                else:
+                    pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
                 result = f"{result}{pattern}{separator}"
             result = result.rstrip(separator)
         case _:
@@ -452,7 +512,52 @@ def get_all_terms_in_collection(
 def _get_all_collections_in_project(session: Session) -> list[PCollection]:
     project = session.get(Project, constants.SQLITE_FIRST_PK)
     # Project can't be missing if session exists.
-    return project.collections  # type: ignore
+    try:
+        return project.collections  # type: ignore
+    except Exception as e:
+        # Enhanced error context for collection retrieval failures
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.error(f"Failed to retrieve collections for project '{project.id}': {str(e)}")
+        # Use raw SQL to inspect collections without Pydantic validation
+        from sqlalchemy import text
+        try:
+            # Query raw data to identify problematic collections
+            raw_query = text("""
+                SELECT id, term_kind, data_descriptor_id
+                FROM pcollections
+                WHERE project_pk = :project_pk
+            """)
+            result = session.execute(raw_query, {"project_pk": project.pk})
+            problematic_collections = []
+            for row in result:
+                collection_id, term_kind_value, data_descriptor_id = row
+                # Only empty string is invalid - indicates ingestion couldn't determine termkind
+                if term_kind_value == '' or term_kind_value is None:
+                    problematic_collections.append((collection_id, term_kind_value, data_descriptor_id))
+                    msg = f"Collection '{collection_id}' has empty term_kind (data_descriptor: " + \
+                          f"{data_descriptor_id}) - CV ingestion failed to determine termkind"
+                    logger.error(msg)
+            if problematic_collections:
+                error_details = []
+                for col_id, _, data_desc in problematic_collections:
+                    error_details.append(f"  • Collection '{col_id}' (data_descriptor: {data_desc}): EMPTY termkind")
+                error_msg = (
+                    f"Found {len(problematic_collections)} collections with empty term_kind:\n" +
+                    "\n".join(error_details)
+                )
+                raise ValueError(error_msg) from e
+        except Exception as inner_e:
+            logger.error(f"Failed to analyze problematic collections using raw SQL: {inner_e}")
+        raise e
 def get_all_collections_in_project(project_id: str) -> list[str]:
@@ -469,10 +574,24 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
     """
     result = list()
     if connection := _get_project_connection(project_id):
-        with connection.create_session() as session:
-            collections = _get_all_collections_in_project(session)
-            for collection in collections:
-                result.append(collection.id)
+        try:
+            with connection.create_session() as session:
+                collections = _get_all_collections_in_project(session)
+                for collection in collections:
+                    result.append(collection.id)
+        except Exception as e:
+            # Enhanced error context for project collection retrieval
+            import logging
+            logger = logging.getLogger(__name__)
+            logger.error(f"Failed to get collections for project '{project_id}': {str(e)}")
+            # Re-raise with enhanced context
+            raise ValueError(
+                f"Failed to retrieve collections for project '{project_id}'. "
+                f"This may be due to invalid termkind values in the database. "
+                f"Check the project database for collections with empty or invalid termkind values. "
+                f"Original error: {str(e)}"
+            ) from e
     return result
@@ -1113,16 +1232,16 @@ def find_items_in_project(
                 collection_column = col(PCollectionFTS5.id)  # TODO: use specs when implemented!
                 term_column = col(PTermFTS5.specs)  # type: ignore
             collection_where_condition = collection_column.match(processed_expression)
-            collection_statement = select(PCollectionFTS5.id,
-                                          text("'collection' AS TYPE"),
-                                          text(f"'{project_id}' AS TYPE"),
-                                          text('rank')).where(collection_where_condition)
+            collection_statement = select(
+                PCollectionFTS5.id, text("'collection' AS TYPE"), text(f"'{project_id}' AS TYPE"), text("rank")
+            ).where(collection_where_condition)
             term_where_condition = term_column.match(processed_expression)
-            term_statement = select(PTermFTS5.id,
-                                    text("'term' AS TYPE"),
-                                    PCollection.id,
-                                    text('rank')).join(PCollection) \
-                                                 .where(term_where_condition)
-            result = execute_find_item_statements(session, processed_expression, collection_statement,
-                                                  term_statement, limit, offset)
+            term_statement = (
+                select(PTermFTS5.id, text("'term' AS TYPE"), PCollection.id, text("rank"))
+                .join(PCollection)
+                .where(term_where_condition)
+            )
+            result = execute_find_item_statements(
+                session, processed_expression, collection_statement, term_statement, limit, offset
+            )
     return result

esgvoc/apps/drs/generator.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from typing import Any, Iterable, Mapping, cast
 import esgvoc.api.projects as projects
-from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
+from esgvoc.api.project_specs import DrsSpecification, DrsType
+from esgvoc.api.search import MatchingTerm
 from esgvoc.apps.drs.report import (
     AssignedTerm,
     ConflictingCollections,
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
         :rtype: DrsGeneratorReport
         """
         report = self._generate_from_mapping(mapping, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + \
-                                          self._get_full_file_name_extension() # noqa E127
+        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()  # noqa E127
         return report
     def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
         :rtype: DrsGeneratorReport
         """
         report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + \
-                                          self._get_full_file_name_extension() # noqa E127
+        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()  # noqa E127
         return report
-    def generate_from_mapping(self, mapping: Mapping[str, str],
-                              drs_type: DrsType | str) -> DrsGenerationReport:
+    def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
         """
         Generate a DRS expression from a mapping of collection ids and terms.
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
             case _:
                 raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
-    def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
-                                                                             -> DrsGenerationReport: # noqa E127
+    def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport:  # noqa E127
         """
         Generate a DRS expression from an unordered bag of terms.
@@ -156,67 +153,78 @@ class DrsGenerator(DrsApplication):
             case _:
                 raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
-    def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
-                                                                            -> DrsGenerationReport: # noqa E127
+    def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport:  # noqa E127
         drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
-                                   given_mapping_or_bag_of_terms=mapping,
-                                   mapping_used=mapping,
-                                   generated_drs_expression=drs_expression,
-                                   errors=cast(list[GenerationError], errors),
-                                   warnings=cast(list[GenerationWarning], warnings))
-    def __generate_from_mapping(self, mapping: Mapping[str, str],
-                                specs: DrsSpecification,
-                                has_to_valid_terms: bool) \
-                                        -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
+        return DrsGenerationReport(
+            project_id=self.project_id,
+            type=specs.type,
+            given_mapping_or_bag_of_terms=mapping,
+            mapping_used=mapping,
+            generated_drs_expression=drs_expression,
+            errors=cast(list[GenerationError], errors),
+            warnings=cast(list[GenerationWarning], warnings),
+        )
+    def __generate_from_mapping(
+        self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
+    ) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]:  # noqa E127
         errors: list[GenerationIssue] = list()
         warnings: list[GenerationIssue] = list()
         drs_expression = ""
         part_position: int = 0
         for part in specs.parts:
             part_position += 1
-            if part.kind == DrsPartKind.COLLECTION:
-                collection_part = cast(DrsCollection, part)
-                collection_id = collection_part.collection_id
-                if collection_id in mapping:
-                    part_value = mapping[collection_id]
-                    if has_to_valid_terms:
+            collection_id = part.source_collection
+            if collection_id in mapping:
+                part_value = mapping[collection_id]
+                if has_to_valid_terms:
+                    if part.source_collection_term is None:
                         matching_terms = projects.valid_term_in_collection(part_value,
                                                                            self.project_id,
                                                                            collection_id)
-                        if not matching_terms:
-                            issue = InvalidTerm(term=part_value,
-                                                term_position=part_position,
-                                                collection_id_or_constant_value=collection_id)
-                            errors.append(issue)
-                            part_value = DrsGenerationReport.INVALID_TAG
-                else:
-                    other_issue = MissingTerm(collection_id=collection_id,
-                                              collection_position=part_position)
-                    if collection_part.is_required:
-                        errors.append(other_issue)
-                        part_value = DrsGenerationReport.MISSING_TAG
                     else:
-                        warnings.append(other_issue)
-                        continue  # The for loop.
+                        matching_terms = projects.valid_term(
+                            part_value,
+                            self.project_id,
+                            collection_id,
+                            part.source_collection_term).validated
+                    if not matching_terms:
+                        issue = InvalidTerm(term=part_value,
+                                            term_position=part_position,
+                                            collection_id_or_constant_value=collection_id)
+                        errors.append(issue)
+                        part_value = DrsGenerationReport.INVALID_TAG
             else:
-                constant_part = cast(DrsConstant, part)
-                part_value = constant_part.value
+                other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
+                if part.is_required:
+                    errors.append(other_issue)
+                    part_value = DrsGenerationReport.MISSING_TAG
+                else:
+                    warnings.append(other_issue)
+                    continue  # The for loop.
             drs_expression += part_value + specs.separator
-        drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
+        drs_expression = drs_expression[0: len(drs_expression) - len(specs.separator)]
         return drs_expression, errors, warnings
-    def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
-                                                                             -> DrsGenerationReport: # noqa E127
+    def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport:  # noqa E127
         collection_terms_mapping: dict[str, set[str]] = dict()
         for term in terms:
-            matching_terms = projects.valid_term_in_project(term, self.project_id)
+            matching_terms: list[MatchingTerm] = list()
+            for part in specs.parts:
+                if part.source_collection_term is None:
+                    matching_terms.extend(projects.valid_term_in_collection(term, self.project_id,
+                                                                            part.source_collection))
+                else:
+                    if projects.valid_term(term, self.project_id, part.source_collection,
+                                           part.source_collection_term).validated:
+                        matching_terms.append(MatchingTerm(project_id=self.project_id,
+                                                           collection_id=part.source_collection,
+                                                           term_id=part.source_collection_term))
             for matching_term in matching_terms:
                 if matching_term.collection_id not in collection_terms_mapping:
                     collection_terms_mapping[matching_term.collection_id] = set()
@@ -229,15 +237,18 @@ class DrsGenerator(DrsApplication):
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
+        return DrsGenerationReport(project_id=self.project_id,
+                                   type=specs.type,
                                    given_mapping_or_bag_of_terms=terms,
-                                   mapping_used=mapping, generated_drs_expression=drs_expression,
+                                   mapping_used=mapping,
+                                   generated_drs_expression=drs_expression,
                                    errors=cast(list[GenerationError], errors),
                                    warnings=cast(list[GenerationWarning], warnings))
     @staticmethod
-    def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
-                                               -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
+    def _resolve_conflicts(
+        collection_terms_mapping: dict[str, set[str]],
+    ) -> tuple[dict[str, set[str]], list[GenerationIssue]]:  # noqa E127
         warnings: list[GenerationIssue] = list()
         conflicting_collection_ids_list: list[list[str]] = list()
         collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -247,13 +258,16 @@ class DrsGenerator(DrsApplication):
             conflicting_collection_ids: list[str] = list()
             for r_collection_index in range(l_collection_index + 1, len_collection_ids):
                 if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
-                       collection_terms_mapping[collection_ids[r_collection_index]]):
+                    collection_terms_mapping[collection_ids[r_collection_index]]
+                ):
                     continue
                 else:
                     not_registered = True
                     for cc_ids in conflicting_collection_ids_list:
-                        if collection_ids[l_collection_index] in cc_ids and \
-                           collection_ids[r_collection_index] in cc_ids:
+                        if (
+                            collection_ids[l_collection_index] in cc_ids
+                            and collection_ids[r_collection_index] in cc_ids
+                        ):
                             not_registered = False
                             break
                     if not_registered:
@@ -287,10 +301,12 @@ class DrsGenerator(DrsApplication):
             #     raise errors, remove the faulty collections and their term.
             if collection_ids_with_len_eq_1_list:
                 for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
-                    DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                            collection_ids_to_be_removed)
-                    DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                                   collection_ids_to_be_removed)
+                    DrsGenerator._remove_ids_from_conflicts(
+                        conflicting_collection_ids_list, collection_ids_to_be_removed
+                    )
+                    DrsGenerator._remove_term_from_other_term_sets(
+                        collection_terms_mapping, collection_ids_to_be_removed
+                    )
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
@@ -307,10 +323,8 @@ class DrsGenerator(DrsApplication):
                         warnings.append(issue)
             # 3.b Update conflicting collections.
             if wining_collection_ids:
-                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                        wining_collection_ids)
-                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                               wining_collection_ids)
+                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
@@ -321,12 +335,14 @@ class DrsGenerator(DrsApplication):
             for collection_ids in conflicting_collection_ids_list:
                 for collection_index in range(0, len(collection_ids)):
                     collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
-                    diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
-                                         .difference(*[collection_terms_mapping[index] # noqa E127
-                                                     for index in collection_set])
+                    diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
+                        *[
+                            collection_terms_mapping[index]  # noqa E127
+                            for index in collection_set
+                        ]
+                    )
                     if len(diff) == 1:
-                        wining_id_and_term_pairs.append((collection_ids[collection_index],
-                                                         _get_first_item(diff)))
+                        wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
             # 4.b Update conflicting collections.
             if wining_id_and_term_pairs:
                 wining_collection_ids = list()
@@ -336,18 +352,17 @@ class DrsGenerator(DrsApplication):
                     collection_terms_mapping[collection_id].add(term)
                     issue = AssignedTerm(collection_id=collection_id, term=term)
                     warnings.append(issue)
-                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                        wining_collection_ids)
-                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                               wining_collection_ids)
+                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
                 continue
             else:
                 break  # Stop the loop when no progress is made.
         return collection_terms_mapping, warnings
     @staticmethod
-    def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
-                                                    -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
+    def _check_collection_terms_mapping(
+        collection_terms_mapping: dict[str, set[str]],
+    ) -> tuple[dict[str, str], list[GenerationIssue]]:  # noqa E127
         errors: list[GenerationIssue] = list()
         # 1. Looking for collections that share strictly the same term(s).
         collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -363,8 +378,7 @@ class DrsGenerator(DrsApplication):
                 if l_term_set and (not l_term_set.difference(r_term_set)):
                     not_registered = True
                     for faulty_collections in faulty_collections_list:
-                        if l_collection_id in faulty_collections or \
-                           r_collection_id in faulty_collections:
+                        if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
                             faulty_collections.add(l_collection_id)
                             faulty_collections.add(r_collection_id)
                             not_registered = False
@@ -373,8 +387,9 @@ class DrsGenerator(DrsApplication):
                         faulty_collections_list.append({l_collection_id, r_collection_id})
         for faulty_collections in faulty_collections_list:
             terms = collection_terms_mapping[_get_first_item(faulty_collections)]
-            issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
-                                           terms=_transform_set_and_sort(terms))
+            issue = ConflictingCollections(
+                collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
+            )
             errors.append(issue)
             for collection_id in faulty_collections:
                 del collection_terms_mapping[collection_id]
@@ -386,25 +401,28 @@ class DrsGenerator(DrsApplication):
             if len_term_set == 1:
                 result[collection_id] = _get_first_item(term_set)
             elif len_term_set > 1:
-                other_issue = TooManyTermCollection(collection_id=collection_id,
-                                                    terms=_transform_set_and_sort(term_set))
+                other_issue = TooManyTermCollection(
+                    collection_id=collection_id, terms=_transform_set_and_sort(term_set)
+                )
                 errors.append(other_issue)
             # else: Don't add emptied collection to the result.
         return result, errors
     @staticmethod
-    def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
-                                          collection_ids_to_be_removed: list[str]) -> None:
+    def _remove_term_from_other_term_sets(
+        collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
+    ) -> None:
         for collection_id_to_be_removed in collection_ids_to_be_removed:
             # Should only be one term.
             term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
             for collection_id in collection_terms_mapping.keys():
-                if (collection_id not in collection_ids_to_be_removed):
+                if collection_id not in collection_ids_to_be_removed:
                     collection_terms_mapping[collection_id].discard(term_to_be_removed)
     @staticmethod
-    def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
-                                   collection_ids_to_be_removed: list[str]) -> None:
+    def _remove_ids_from_conflicts(
+        conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
+    ) -> None:
         for collection_id_to_be_removed in collection_ids_to_be_removed:
             for conflicting_collection_ids in conflicting_collection_ids_list:
                 if collection_id_to_be_removed in conflicting_collection_ids:

esgvoc 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

Potentially problematic release.

esgvoc 1.0.0py3-none-any.whl → 1.1.1py3-none-any.whl