PyPI - esgvoc - Versions diffs - 0.4.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

esgvoc 0.4.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (74) hide show

esgvoc/__init__.py +1 -1
esgvoc/api/data_descriptors/__init__.py +52 -28
esgvoc/api/data_descriptors/activity.py +3 -3
esgvoc/api/data_descriptors/area_label.py +16 -1
esgvoc/api/data_descriptors/branded_suffix.py +20 -0
esgvoc/api/data_descriptors/branded_variable.py +12 -0
esgvoc/api/data_descriptors/consortium.py +14 -13
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +6 -0
esgvoc/api/data_descriptors/creation_date.py +5 -0
esgvoc/api/data_descriptors/data_descriptor.py +14 -9
esgvoc/api/data_descriptors/data_specs_version.py +5 -0
esgvoc/api/data_descriptors/date.py +1 -1
esgvoc/api/data_descriptors/directory_date.py +1 -1
esgvoc/api/data_descriptors/experiment.py +13 -11
esgvoc/api/data_descriptors/forcing_index.py +1 -1
esgvoc/api/data_descriptors/frequency.py +3 -3
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid_label.py +2 -2
esgvoc/api/data_descriptors/horizontal_label.py +15 -1
esgvoc/api/data_descriptors/initialisation_index.py +1 -1
esgvoc/api/data_descriptors/institution.py +8 -5
esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
esgvoc/api/data_descriptors/license.py +3 -3
esgvoc/api/data_descriptors/member_id.py +9 -0
esgvoc/api/data_descriptors/mip_era.py +1 -1
esgvoc/api/data_descriptors/model_component.py +1 -1
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +1 -1
esgvoc/api/data_descriptors/physic_index.py +1 -1
esgvoc/api/data_descriptors/product.py +2 -2
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realisation_index.py +1 -1
esgvoc/api/data_descriptors/realm.py +1 -1
esgvoc/api/data_descriptors/region.py +5 -0
esgvoc/api/data_descriptors/resolution.py +3 -3
esgvoc/api/data_descriptors/source.py +9 -5
esgvoc/api/data_descriptors/source_type.py +1 -1
esgvoc/api/data_descriptors/table.py +3 -2
esgvoc/api/data_descriptors/temporal_label.py +15 -1
esgvoc/api/data_descriptors/time_range.py +4 -3
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +5 -0
esgvoc/api/data_descriptors/variable.py +25 -12
esgvoc/api/data_descriptors/variant_label.py +3 -3
esgvoc/api/data_descriptors/vertical_label.py +14 -0
esgvoc/api/project_specs.py +117 -2
esgvoc/api/projects.py +328 -287
esgvoc/api/search.py +30 -3
esgvoc/api/universe.py +42 -27
esgvoc/apps/drs/generator.py +87 -74
esgvoc/apps/jsg/cmip6_template.json +74 -0
esgvoc/apps/jsg/json_schema_generator.py +194 -0
esgvoc/cli/config.py +500 -0
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +43 -38
esgvoc/cli/main.py +10 -3
esgvoc/cli/status.py +27 -18
esgvoc/cli/valid.py +10 -15
esgvoc/core/db/models/project.py +11 -11
esgvoc/core/db/models/universe.py +3 -3
esgvoc/core/db/project_ingestion.py +40 -40
esgvoc/core/db/universe_ingestion.py +36 -33
esgvoc/core/logging_handler.py +24 -2
esgvoc/core/repo_fetcher.py +61 -59
esgvoc/core/service/data_merger.py +47 -34
esgvoc/core/service/state.py +107 -83
{esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/METADATA +5 -20
esgvoc-1.0.1.dist-info/RECORD +95 -0
esgvoc/core/logging.conf +0 -21
esgvoc-0.4.0.dist-info/RECORD +0 -80
{esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/WHEEL +0 -0
{esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/entry_points.txt +0 -0
{esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/api/search.py CHANGED Viewed

@@ -76,18 +76,45 @@ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
         list_to_populate.append(term)
+def process_expression(expression: str) -> str:
+    """
+    Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
+    """
+    # 1. Remove single and double quotes.
+    result = expression.replace('"', '')
+    result = result.replace("'", '')
+    # 2. Escape keywords.
+    result = result.replace('NEAR', '"NEAR"')
+    result = result.replace('+', '"+"')
+    result = result.replace('-', '"-"')
+    result = result.replace(':', '":"')
+    result = result.replace('^', '"^"')
+    result = result.replace('(', '"("')
+    result = result.replace(')', '")"')
+    result = result.replace(',', '","')
+    # 3. Make single word request a prefix search.
+    if not result.endswith('*'):
+        tokens = result.split(sep=None)
+        if len(tokens) == 1:
+            result += '*'
+    return result
 def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
                                 type[PTermFTS5] | type[PCollectionFTS5],
                                 expression: str,
                                 only_id: bool) -> ColumnElement[bool]:
+    processed_expression = process_expression(expression)
     # TODO: fix this when specs will ba available in collections and Data descriptors.
     if cls is PTermFTS5 or cls is UTermFTS5:
         if only_id:
-            result = col(cls.id).match(expression)
+            result = col(cls.id).match(processed_expression)
         else:
-            result = col(cls.specs).match(expression)  # type: ignore
+            result = col(cls.specs).match(processed_expression)  # type: ignore
     else:
-        result = col(cls.id).match(expression)
+        result = col(cls.id).match(processed_expression)
     return result

esgvoc/api/universe.py CHANGED Viewed

@@ -13,6 +13,7 @@ from esgvoc.api.search import (
     handle_rank_limit_offset,
     instantiate_pydantic_term,
     instantiate_pydantic_terms,
+    process_expression,
 )
 from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
@@ -211,12 +212,15 @@ def find_data_descriptors_in_universe(expression: str,
                                       offset: int | None = None) -> list[tuple[str, dict]]:
     """
     Find data descriptors in the universe based on a full text search defined by the given `expression`.
-    The `expression` comes from the powerful
-    `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
-    and corresponds to the expression of the `MATCH` operator.
-    It can be composed of one or multiple keywords combined with boolean
-    operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
-    with the wildcard `*`.
+    The `expression` can be composed of one or multiple keywords.
+    The keywords can combined with boolean operators: `AND`,
+    `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
+    if no boolean operators is provided, whitespaces are handled as if there were
+    an implicit AND operator between each pair of keywords. Note that this
+    function does not provide any priority operator (parenthesis).
+    Keywords can define prefixes when adding a `*` at the end of them.
+    If the expression is composed of only one keyword, the function
+    automatically defines it as a prefix.
     The function returns a list of data descriptor ids and contexts, sorted according to the
     bm25 ranking metric (list index `0` has the highest rank).
     If the provided `expression` does not hit any data descriptor, the function returns an empty list.
@@ -266,12 +270,15 @@ def find_terms_in_universe(expression: str,
                            selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
     """
     Find terms in the universe based on a full-text search defined by the given `expression`.
-    The `expression` comes from the powerful
-    `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
-    and corresponds to the expression of the `MATCH` operator.
-    It can be composed of one or multiple keywords combined with boolean
-    operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
-    with the wildcard `*`.
+    The `expression` can be composed of one or multiple keywords.
+    The keywords can combined with boolean operators: `AND`,
+    `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
+    if no boolean operators is provided, whitespaces are handled as if there were
+    an implicit AND operator between each pair of keywords. Note that this
+    function does not provide any priority operator (parenthesis).
+    Keywords can define prefixes when adding a `*` at the end of them.
+    If the expression is composed of only one keyword, the function
+    automatically defines it as a prefix.
     The function returns a list of term instances sorted according to the
     bm25 ranking metric (list index `0` has the highest rank).
     If the provided `expression` does not hit any term, the function returns an empty list.
@@ -323,12 +330,15 @@ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
                                                                             -> list[DataDescriptor]:
     """
     Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
-    The `expression` comes from the powerful
-    `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
-    and corresponds to the expression of the `MATCH` operator.
-    It can be composed of one or multiple keywords combined with boolean
-    operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
-    with the wildcard `*`.
+    The `expression` can be composed of one or multiple keywords.
+    The keywords can combined with boolean operators: `AND`,
+    `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
+    if no boolean operators is provided, whitespaces are handled as if there were
+    an implicit AND operator between each pair of keywords. Note that this
+    function does not provide any priority operator (parenthesis).
+    Keywords can define prefixes when adding a `*` at the end of them.
+    If the expression is composed of only one keyword, the function
+    automatically defines it as a prefix.
     The function returns a list of term instances sorted according to the
     bm25 ranking metric (list index `0` has the highest rank).
     This function performs an exact match on the `data_descriptor_id`,
@@ -370,12 +380,16 @@ def find_items_in_universe(expression: str,
                            offset: int | None = None) -> list[Item]:
     """
     Find items, at the moment terms and data descriptors, in the universe based on a full-text
-    search defined by the given `expression`. The `expression` comes from the powerful
-    `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
-    and corresponds to the expression of the `MATCH` operator.
-    It can be composed of one or multiple keywords combined with boolean
-    operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
-    with the wildcard `*`.
+    search defined by the given `expression`.
+    The `expression` can be composed of one or multiple keywords.
+    The keywords can combined with boolean operators: `AND`,
+    `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
+    if no boolean operators is provided, whitespaces are handled as if there were
+    an implicit AND operator between each pair of keywords. Note that this
+    function does not provide any priority operator (parenthesis).
+    Keywords can define prefixes when adding a `*` at the end of them.
+    If the expression is composed of only one keyword, the function
+    automatically defines it as a prefix.
     The function returns a list of item instances sorted according to the
     bm25 ranking metric (list index `0` has the highest rank).
     If the provided `expression` does not hit any item, the function returns an empty list.
@@ -401,23 +415,24 @@ def find_items_in_universe(expression: str,
     # TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
     result = list()
     with get_universe_session() as session:
+        processed_expression = process_expression(expression)
         if only_id:
             dd_column = col(UDataDescriptorFTS5.id)
             term_column = col(UTermFTS5.id)
         else:
             dd_column = col(UDataDescriptorFTS5.id)  # TODO: use specs when implemented!
             term_column = col(UTermFTS5.specs)  # type: ignore
-        dd_where_condition = dd_column.match(expression)
+        dd_where_condition = dd_column.match(processed_expression)
         dd_statement = select(UDataDescriptorFTS5.id,
                               text("'data_descriptor' AS TYPE"),
                               text("'universe' AS TYPE"),
                               text('rank')).where(dd_where_condition)
-        term_where_condition = term_column.match(expression)
+        term_where_condition = term_column.match(processed_expression)
         term_statement = select(UTermFTS5.id,
                                 text("'term' AS TYPE"),
                                 UDataDescriptor.id,
                                 text('rank')).join(UDataDescriptor) \
                                              .where(term_where_condition)
-        result = execute_find_item_statements(session, expression, dd_statement,
+        result = execute_find_item_statements(session, processed_expression, dd_statement,
                                               term_statement, limit, offset)
         return result

esgvoc/apps/drs/generator.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any, Iterable, Mapping, cast
 import esgvoc.api.projects as projects
 from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
+from esgvoc.api.search import MatchingTerm
 from esgvoc.apps.drs.report import (
     AssignedTerm,
     ConflictingCollections,
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
         :rtype: DrsGeneratorReport
         """
         report = self._generate_from_mapping(mapping, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + \
-                                          self._get_full_file_name_extension() # noqa E127
+        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()  # noqa E127
         return report
     def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
         :rtype: DrsGeneratorReport
         """
         report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
-        report.generated_drs_expression = report.generated_drs_expression + \
-                                          self._get_full_file_name_extension() # noqa E127
+        report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension()  # noqa E127
         return report
-    def generate_from_mapping(self, mapping: Mapping[str, str],
-                              drs_type: DrsType | str) -> DrsGenerationReport:
+    def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
         """
         Generate a DRS expression from a mapping of collection ids and terms.
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
             case _:
                 raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
-    def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
-                                                                             -> DrsGenerationReport: # noqa E127
+    def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport:  # noqa E127
         """
         Generate a DRS expression from an unordered bag of terms.
@@ -156,23 +153,24 @@ class DrsGenerator(DrsApplication):
             case _:
                 raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
-    def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
-                                                                            -> DrsGenerationReport: # noqa E127
+    def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport:  # noqa E127
         drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
-                                   given_mapping_or_bag_of_terms=mapping,
-                                   mapping_used=mapping,
-                                   generated_drs_expression=drs_expression,
-                                   errors=cast(list[GenerationError], errors),
-                                   warnings=cast(list[GenerationWarning], warnings))
-    def __generate_from_mapping(self, mapping: Mapping[str, str],
-                                specs: DrsSpecification,
-                                has_to_valid_terms: bool) \
-                                        -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
+        return DrsGenerationReport(
+            project_id=self.project_id,
+            type=specs.type,
+            given_mapping_or_bag_of_terms=mapping,
+            mapping_used=mapping,
+            generated_drs_expression=drs_expression,
+            errors=cast(list[GenerationError], errors),
+            warnings=cast(list[GenerationWarning], warnings),
+        )
+    def __generate_from_mapping(
+        self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
+    ) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]:  # noqa E127
         errors: list[GenerationIssue] = list()
         warnings: list[GenerationIssue] = list()
         drs_expression = ""
@@ -185,18 +183,17 @@ class DrsGenerator(DrsApplication):
                 if collection_id in mapping:
                     part_value = mapping[collection_id]
                     if has_to_valid_terms:
-                        matching_terms = projects.valid_term_in_collection(part_value,
-                                                                           self.project_id,
-                                                                           collection_id)
+                        matching_terms = projects.valid_term_in_collection(part_value, self.project_id, collection_id)
                         if not matching_terms:
-                            issue = InvalidTerm(term=part_value,
-                                                term_position=part_position,
-                                                collection_id_or_constant_value=collection_id)
+                            issue = InvalidTerm(
+                                term=part_value,
+                                term_position=part_position,
+                                collection_id_or_constant_value=collection_id,
+                            )
                             errors.append(issue)
                             part_value = DrsGenerationReport.INVALID_TAG
                 else:
-                    other_issue = MissingTerm(collection_id=collection_id,
-                                              collection_position=part_position)
+                    other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
                     if collection_part.is_required:
                         errors.append(other_issue)
                         part_value = DrsGenerationReport.MISSING_TAG
@@ -209,14 +206,18 @@ class DrsGenerator(DrsApplication):
             drs_expression += part_value + specs.separator
-        drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
+        drs_expression = drs_expression[0 : len(drs_expression) - len(specs.separator)]
         return drs_expression, errors, warnings
-    def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
-                                                                             -> DrsGenerationReport: # noqa E127
+    def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport:  # noqa E127
         collection_terms_mapping: dict[str, set[str]] = dict()
         for term in terms:
-            matching_terms = projects.valid_term_in_project(term, self.project_id)
+            matching_terms: list[MatchingTerm] = []
+            for col in [part.collection_id for part in specs.parts if part.kind == DrsPartKind.COLLECTION]:
+                matching_terms_in_col = projects.valid_term_in_collection(term, self.project_id, col)
+                for mtic in matching_terms_in_col:
+                    matching_terms.append(mtic)
+            # matching_terms = projects.valid_term_in_project(term, self.project_id)
             for matching_term in matching_terms:
                 if matching_term.collection_id not in collection_terms_mapping:
                     collection_terms_mapping[matching_term.collection_id] = set()
@@ -229,15 +230,20 @@ class DrsGenerator(DrsApplication):
         if self.pedantic:
             errors.extend(warnings)
             warnings.clear()
-        return DrsGenerationReport(project_id=self.project_id, type=specs.type,
-                                   given_mapping_or_bag_of_terms=terms,
-                                   mapping_used=mapping, generated_drs_expression=drs_expression,
-                                   errors=cast(list[GenerationError], errors),
-                                   warnings=cast(list[GenerationWarning], warnings))
+        return DrsGenerationReport(
+            project_id=self.project_id,
+            type=specs.type,
+            given_mapping_or_bag_of_terms=terms,
+            mapping_used=mapping,
+            generated_drs_expression=drs_expression,
+            errors=cast(list[GenerationError], errors),
+            warnings=cast(list[GenerationWarning], warnings),
+        )
     @staticmethod
-    def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
-                                               -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
+    def _resolve_conflicts(
+        collection_terms_mapping: dict[str, set[str]],
+    ) -> tuple[dict[str, set[str]], list[GenerationIssue]]:  # noqa E127
         warnings: list[GenerationIssue] = list()
         conflicting_collection_ids_list: list[list[str]] = list()
         collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -247,13 +253,16 @@ class DrsGenerator(DrsApplication):
             conflicting_collection_ids: list[str] = list()
             for r_collection_index in range(l_collection_index + 1, len_collection_ids):
                 if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
-                       collection_terms_mapping[collection_ids[r_collection_index]]):
+                    collection_terms_mapping[collection_ids[r_collection_index]]
+                ):
                     continue
                 else:
                     not_registered = True
                     for cc_ids in conflicting_collection_ids_list:
-                        if collection_ids[l_collection_index] in cc_ids and \
-                           collection_ids[r_collection_index] in cc_ids:
+                        if (
+                            collection_ids[l_collection_index] in cc_ids
+                            and collection_ids[r_collection_index] in cc_ids
+                        ):
                             not_registered = False
                             break
                     if not_registered:
@@ -287,10 +296,12 @@ class DrsGenerator(DrsApplication):
             #     raise errors, remove the faulty collections and their term.
             if collection_ids_with_len_eq_1_list:
                 for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
-                    DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                            collection_ids_to_be_removed)
-                    DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                                   collection_ids_to_be_removed)
+                    DrsGenerator._remove_ids_from_conflicts(
+                        conflicting_collection_ids_list, collection_ids_to_be_removed
+                    )
+                    DrsGenerator._remove_term_from_other_term_sets(
+                        collection_terms_mapping, collection_ids_to_be_removed
+                    )
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
@@ -307,10 +318,8 @@ class DrsGenerator(DrsApplication):
                         warnings.append(issue)
             # 3.b Update conflicting collections.
             if wining_collection_ids:
-                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                        wining_collection_ids)
-                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                               wining_collection_ids)
+                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
                 # Every time conflicting_collection_ids_list is modified, we must restart the loop,
                 # as conflicting collections may be resolved.
                 continue
@@ -320,13 +329,15 @@ class DrsGenerator(DrsApplication):
             wining_id_and_term_pairs: list[tuple[str, str]] = list()
             for collection_ids in conflicting_collection_ids_list:
                 for collection_index in range(0, len(collection_ids)):
-                    collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
-                    diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
-                                         .difference(*[collection_terms_mapping[index] # noqa E127
-                                                     for index in collection_set])
+                    collection_set = collection_ids[collection_index + 1 :] + collection_ids[:collection_index]
+                    diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
+                        *[
+                            collection_terms_mapping[index]  # noqa E127
+                            for index in collection_set
+                        ]
+                    )
                     if len(diff) == 1:
-                        wining_id_and_term_pairs.append((collection_ids[collection_index],
-                                                         _get_first_item(diff)))
+                        wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
             # 4.b Update conflicting collections.
             if wining_id_and_term_pairs:
                 wining_collection_ids = list()
@@ -336,18 +347,17 @@ class DrsGenerator(DrsApplication):
                     collection_terms_mapping[collection_id].add(term)
                     issue = AssignedTerm(collection_id=collection_id, term=term)
                     warnings.append(issue)
-                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
-                                                        wining_collection_ids)
-                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
-                                                               wining_collection_ids)
+                DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
+                DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
                 continue
             else:
                 break  # Stop the loop when no progress is made.
         return collection_terms_mapping, warnings
     @staticmethod
-    def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
-                                                    -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
+    def _check_collection_terms_mapping(
+        collection_terms_mapping: dict[str, set[str]],
+    ) -> tuple[dict[str, str], list[GenerationIssue]]:  # noqa E127
         errors: list[GenerationIssue] = list()
         # 1. Looking for collections that share strictly the same term(s).
         collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -363,8 +373,7 @@ class DrsGenerator(DrsApplication):
                 if l_term_set and (not l_term_set.difference(r_term_set)):
                     not_registered = True
                     for faulty_collections in faulty_collections_list:
-                        if l_collection_id in faulty_collections or \
-                           r_collection_id in faulty_collections:
+                        if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
                             faulty_collections.add(l_collection_id)
                             faulty_collections.add(r_collection_id)
                             not_registered = False
@@ -373,8 +382,9 @@ class DrsGenerator(DrsApplication):
                         faulty_collections_list.append({l_collection_id, r_collection_id})
         for faulty_collections in faulty_collections_list:
             terms = collection_terms_mapping[_get_first_item(faulty_collections)]
-            issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
-                                           terms=_transform_set_and_sort(terms))
+            issue = ConflictingCollections(
+                collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
+            )
             errors.append(issue)
             for collection_id in faulty_collections:
                 del collection_terms_mapping[collection_id]
@@ -386,25 +396,28 @@ class DrsGenerator(DrsApplication):
             if len_term_set == 1:
                 result[collection_id] = _get_first_item(term_set)
             elif len_term_set > 1:
-                other_issue = TooManyTermCollection(collection_id=collection_id,
-                                                    terms=_transform_set_and_sort(term_set))
+                other_issue = TooManyTermCollection(
+                    collection_id=collection_id, terms=_transform_set_and_sort(term_set)
+                )
                 errors.append(other_issue)
             # else: Don't add emptied collection to the result.
         return result, errors
     @staticmethod
-    def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
-                                          collection_ids_to_be_removed: list[str]) -> None:
+    def _remove_term_from_other_term_sets(
+        collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
+    ) -> None:
         for collection_id_to_be_removed in collection_ids_to_be_removed:
             # Should only be one term.
             term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
             for collection_id in collection_terms_mapping.keys():
-                if (collection_id not in collection_ids_to_be_removed):
+                if collection_id not in collection_ids_to_be_removed:
                     collection_terms_mapping[collection_id].discard(term_to_be_removed)
     @staticmethod
-    def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
-                                   collection_ids_to_be_removed: list[str]) -> None:
+    def _remove_ids_from_conflicts(
+        conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
+    ) -> None:
         for collection_id_to_be_removed in collection_ids_to_be_removed:
             for conflicting_collection_ids in conflicting_collection_ids_list:
                 if collection_id_to_be_removed in conflicting_collection_ids:

esgvoc/apps/jsg/cmip6_template.json ADDED Viewed

@@ -0,0 +1,74 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
+  "title": "CMIP6 Extension",
+  "description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
+  "type": "object",
+  "required": [
+    "stac_extensions"
+  ],
+  "properties": {
+    "stac_extensions": {
+      "type": "array",
+      "contains": {
+        "const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
+      }
+    }
+  },
+  "oneOf": [
+    {
+      "$comment": "This is the schema for STAC Items.",
+      "type": "object",
+      "required": [
+        "type",
+        "properties"
+      ],
+      "properties": {
+        "type": {
+          "const": "Feature"
+        },
+        "properties": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/require_any"
+            },
+            {
+              "$ref": "#/definitions/fields"
+            }
+          ]
+        }
+      }
+    },
+    {
+      "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
+      "type": "object",
+      "required": [
+        "type",
+        "summaries"
+      ],
+      "properties": {
+        "type": {
+          "const": "Collection"
+        },
+        "summaries": {
+          "$ref": "#/definitions/require_any"
+        }
+      }
+    }
+  ],
+  "definitions": {
+    "require_any": {
+      "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
+    },
+    "fields": {
+      "$comment": " Don't require fields here, do that above in the corresponding schema.",
+      "type": "object",
+      "properties": {
+      },
+      "patternProperties": {
+        "^(?!cmip6:)": {}
+      },
+      "additionalProperties": false
+    }
+  }
+}

esgvoc 0.4.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

Potentially problematic release.

esgvoc 0.4.0py3-none-any.whl → 1.0.1py3-none-any.whl