PyPI - esgvoc - Versions diffs - 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

esgvoc 0.3.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (87) hide show

esgvoc/__init__.py +1 -1
esgvoc/api/__init__.py +95 -60
esgvoc/api/data_descriptors/__init__.py +50 -28
esgvoc/api/data_descriptors/activity.py +3 -3
esgvoc/api/data_descriptors/area_label.py +16 -1
esgvoc/api/data_descriptors/branded_suffix.py +20 -0
esgvoc/api/data_descriptors/branded_variable.py +12 -0
esgvoc/api/data_descriptors/consortium.py +14 -13
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +6 -0
esgvoc/api/data_descriptors/creation_date.py +5 -0
esgvoc/api/data_descriptors/data_descriptor.py +14 -9
esgvoc/api/data_descriptors/data_specs_version.py +5 -0
esgvoc/api/data_descriptors/date.py +1 -1
esgvoc/api/data_descriptors/directory_date.py +1 -1
esgvoc/api/data_descriptors/experiment.py +13 -11
esgvoc/api/data_descriptors/forcing_index.py +1 -1
esgvoc/api/data_descriptors/frequency.py +3 -3
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid_label.py +2 -2
esgvoc/api/data_descriptors/horizontal_label.py +15 -1
esgvoc/api/data_descriptors/initialisation_index.py +1 -1
esgvoc/api/data_descriptors/institution.py +8 -5
esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
esgvoc/api/data_descriptors/license.py +3 -3
esgvoc/api/data_descriptors/mip_era.py +1 -1
esgvoc/api/data_descriptors/model_component.py +1 -1
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +1 -1
esgvoc/api/data_descriptors/physic_index.py +1 -1
esgvoc/api/data_descriptors/product.py +2 -2
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realisation_index.py +1 -1
esgvoc/api/data_descriptors/realm.py +1 -1
esgvoc/api/data_descriptors/region.py +5 -0
esgvoc/api/data_descriptors/resolution.py +3 -3
esgvoc/api/data_descriptors/source.py +9 -5
esgvoc/api/data_descriptors/source_type.py +1 -1
esgvoc/api/data_descriptors/table.py +3 -2
esgvoc/api/data_descriptors/temporal_label.py +15 -1
esgvoc/api/data_descriptors/time_range.py +4 -3
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +5 -0
esgvoc/api/data_descriptors/variable.py +25 -12
esgvoc/api/data_descriptors/variant_label.py +3 -3
esgvoc/api/data_descriptors/vertical_label.py +14 -0
esgvoc/api/project_specs.py +120 -4
esgvoc/api/projects.py +733 -505
esgvoc/api/py.typed +0 -0
esgvoc/api/report.py +12 -8
esgvoc/api/search.py +168 -98
esgvoc/api/universe.py +368 -157
esgvoc/apps/drs/constants.py +1 -1
esgvoc/apps/drs/generator.py +51 -69
esgvoc/apps/drs/report.py +60 -15
esgvoc/apps/drs/validator.py +60 -71
esgvoc/apps/jsg/cmip6_template.json +74 -0
esgvoc/apps/jsg/cmip6plus_template.json +74 -0
esgvoc/apps/jsg/json_schema_generator.py +185 -0
esgvoc/apps/py.typed +0 -0
esgvoc/cli/config.py +500 -0
esgvoc/cli/drs.py +3 -2
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +46 -38
esgvoc/cli/main.py +10 -3
esgvoc/cli/status.py +27 -18
esgvoc/cli/valid.py +10 -15
esgvoc/core/constants.py +1 -1
esgvoc/core/db/__init__.py +2 -4
esgvoc/core/db/connection.py +5 -3
esgvoc/core/db/models/project.py +57 -15
esgvoc/core/db/models/universe.py +49 -10
esgvoc/core/db/project_ingestion.py +79 -65
esgvoc/core/db/universe_ingestion.py +71 -40
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +24 -2
esgvoc/core/repo_fetcher.py +61 -59
esgvoc/core/service/data_merger.py +47 -34
esgvoc/core/service/state.py +107 -83
{esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
esgvoc-1.0.0.dist-info/RECORD +95 -0
esgvoc/api/_utils.py +0 -53
esgvoc/core/logging.conf +0 -21
esgvoc-0.3.0.dist-info/RECORD +0 -78
{esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
{esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
{esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/api/py.typed ADDED Viewed

File without changes

esgvoc/api/report.py CHANGED Viewed

@@ -30,11 +30,12 @@ class ValidationError(BaseModel, ABC):
     """JSON specification of the term."""
     term_kind: TermKind
     """The kind of term."""
-    @computed_field # type: ignore
+    @computed_field  # type: ignore
     @property
     def class_name(self) -> str:
-       """The class name of the issue for JSON serialization."""
-       return self.__class__.__name__
+        """The class name of the issue for JSON serialization."""
+        return self.__class__.__name__
     @abstractmethod
     def accept(self, visitor: ValidationErrorVisitor) -> Any:
         """
@@ -47,6 +48,7 @@ class ValidationError(BaseModel, ABC):
         """
         pass
 class UniverseTermError(ValidationError):
     """
     A validation error on a term from the universe.
@@ -60,9 +62,10 @@ class UniverseTermError(ValidationError):
     def __str__(self) -> str:
         term_id = self.term[api_settings.TERM_ID_JSON_KEY]
-        result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} "+\
+        result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
                  f"does not validate the given value '{self.value}'"
         return result
     def __repr__(self) -> str:
         return self.__str__()
@@ -80,9 +83,10 @@ class ProjectTermError(ValidationError):
     def __str__(self) -> str:
         term_id = self.term[api_settings.TERM_ID_JSON_KEY]
-        result = f"The term {term_id} from the collection {self.collection_id} "+\
+        result = f"The term {term_id} from the collection {self.collection_id} " + \
                  f"does not validate the given value '{self.value}'"
         return result
     def __repr__(self) -> str:
         return self.__str__()
@@ -95,16 +99,16 @@ class ValidationReport(BaseModel):
     expression: str
     """The given expression."""
-    errors: list[UniverseTermError|ProjectTermError]
+    errors: list[UniverseTermError | ProjectTermError]
     """The validation errors."""
-    @computed_field # type: ignore
+    @computed_field  # type: ignore
     @property
     def nb_errors(self) -> int:
         """The number of validation errors."""
         return len(self.errors) if self.errors else 0
-    @computed_field # type: ignore
+    @computed_field  # type: ignore
     @property
     def validated(self) -> bool:
         """The expression is validated or not."""

esgvoc/api/search.py CHANGED Viewed

@@ -1,8 +1,173 @@
-from typing import Iterable
 from enum import Enum
+from typing import Any, Iterable, MutableSequence, Sequence
+import sqlalchemy as sa
 from pydantic import BaseModel
-from sqlalchemy import ColumnElement, func
-from sqlmodel import col
+from sqlalchemy import ColumnElement
+from sqlalchemy.exc import OperationalError
+from sqlalchemy.sql.expression import Select
+from sqlalchemy.sql.selectable import ExecutableReturnsRows
+from sqlmodel import Column, Field, Session, col
+import esgvoc.core.constants as api_settings
+import esgvoc.core.service as service
+from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
+from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, DataDescriptorSubSet
+from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
+from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
+from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
+class ItemKind(Enum):
+    DATA_DESCRIPTOR = "data_descriptor"
+    """Corresponds to a data descriptor"""
+    COLLECTION = "collection"
+    """Corresponds to a collection"""
+    TERM = "term"
+    """Corresponds to a term"""
+class Item(BaseModel):
+    """An item from the universe or a project (data descriptor, collection or term)."""
+    id: str
+    """The id of the item."""
+    kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
+    """The kind of the item."""
+    parent_id: str
+    """The id of the parent of the item."""
+def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type[DataDescriptor]:
+    if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
+        return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
+    else:
+        raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
+def get_universe_session() -> Session:
+    UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
+    if UNIVERSE_DB_CONNECTION:
+        return UNIVERSE_DB_CONNECTION.create_session()
+    else:
+        raise EsgvocDbError('universe connection is not initialized')
+def instantiate_pydantic_term(term: UTerm | PTerm,
+                              selected_term_fields: Iterable[str] | None) -> DataDescriptor:
+    type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
+    if selected_term_fields is not None:
+        subset = DataDescriptorSubSet(id=term.id, type=type)
+        for field in selected_term_fields:
+            setattr(subset, field, term.specs.get(field, None))
+        for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
+            setattr(subset, field, term.specs.get(field, None))
+        return subset
+    else:
+        term_class = get_pydantic_class(type)
+        return term_class(**term.specs)
+def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
+                               list_to_populate: MutableSequence[DataDescriptor],
+                               selected_term_fields: Iterable[str] | None) -> None:
+    for db_term in db_terms:
+        term = instantiate_pydantic_term(db_term, selected_term_fields)
+        list_to_populate.append(term)
+def process_expression(expression: str) -> str:
+    """
+    Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
+    """
+    # 1. Remove single and double quotes.
+    result = expression.replace('"', '')
+    result = result.replace("'", '')
+    # 2. Escape keywords.
+    result = result.replace('NEAR', '"NEAR"')
+    result = result.replace('+', '"+"')
+    result = result.replace('-', '"-"')
+    result = result.replace(':', '":"')
+    result = result.replace('^', '"^"')
+    result = result.replace('(', '"("')
+    result = result.replace(')', '")"')
+    result = result.replace(',', '","')
+    # 3. Make single word request a prefix search.
+    if not result.endswith('*'):
+        tokens = result.split(sep=None)
+        if len(tokens) == 1:
+            result += '*'
+    return result
+def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
+                                type[PTermFTS5] | type[PCollectionFTS5],
+                                expression: str,
+                                only_id: bool) -> ColumnElement[bool]:
+    processed_expression = process_expression(expression)
+    # TODO: fix this when specs will ba available in collections and Data descriptors.
+    if cls is PTermFTS5 or cls is UTermFTS5:
+        if only_id:
+            result = col(cls.id).match(processed_expression)
+        else:
+            result = col(cls.specs).match(processed_expression)  # type: ignore
+    else:
+        result = col(cls.id).match(processed_expression)
+    return result
+def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
+    statement = statement.order_by(sa.text('rank'))
+    if limit and limit > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+        statement = statement.limit(limit)
+    if offset and offset > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+        statement = statement.offset(offset)
+    return statement
+def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) \
+                                                                                        -> Sequence:
+    try:
+        raw_results = session.exec(statement)  # type: ignore
+        # raw_results.all() returns a list of sqlalquemy rows.
+        results = [result[0] for result in raw_results.all()]
+        return results
+    except OperationalError as e:
+        raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
+def execute_find_item_statements(session: Session,
+                                 expression: str,
+                                 first_statement: Select,
+                                 second_statement: Select,
+                                 limit: int | None,
+                                 offset: int | None) -> list[Item]:
+    try:
+        # Items found are kind of tuple with an object, a kindness, a parent id and a rank.
+        first_statement_found = session.exec(first_statement).all()  # type: ignore
+        second_statement_found = session.exec(second_statement).all()  # type: ignore
+        tmp_result: list[Any] = list()
+        tmp_result.extend(first_statement_found)
+        tmp_result.extend(second_statement_found)
+        # According to https://sqlite.org/fts5.html#the_bm25_function,
+        # "the better matches are assigned numerically lower scores."
+        # Sort on the rank column (index 3).
+        sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
+        if offset and offset > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+            start = offset
+        else:
+            start = 0
+        if limit and limit > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+            stop = start + limit
+            framed_tmp_result = sorted_tmp_result[start: stop]  # is OK if stop > len of the list.
+        else:
+            framed_tmp_result = sorted_tmp_result[start:]
+        result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
+    except OperationalError as e:
+        raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
+    return result
 class MatchingTerm(BaseModel):
@@ -15,98 +180,3 @@ class MatchingTerm(BaseModel):
     """The collection id to which the term belongs."""
     term_id: str
     """The term id."""
-class SearchType(Enum):
-    """
-    The search types used for to find terms.
-    """
-    EXACT = "exact"
-    """Performs exact match."""
-    LIKE = "like"  # can interpret %
-    """As SQL operator, it can interpret % as a wildcard."""
-    STARTS_WITH = "starts_with"  # can interpret %
-    """Prefix based search."""
-    ENDS_WITH = "ends_with"  # can interpret %
-    """Suffix based search."""
-    REGEX = "regex"
-    """Search based on regex."""
-class SearchSettings(BaseModel):
-    """
-    Search configuration.
-    """
-    type: SearchType = SearchType.EXACT
-    """The type of search."""
-    case_sensitive: bool = True
-    """Enable case sensitivity or not."""
-    not_operator: bool = False
-    """Give the opposite result like the NOT SQL operator."""
-    selected_term_fields: Iterable[str]|None = None
-    """Term fields to select"""
-def _create_str_comparison_expression(field: str,
-                                      value: str,
-                                      settings: SearchSettings|None) -> ColumnElement:
-    '''
-    SQLite LIKE is case insensitive (and so STARTS/ENDS_WITH which are implemented with LIKE).
-    So the case sensitive LIKE is implemented with REGEX.
-    The i versions of SQLAlchemy operators (icontains, etc.) are not useful
-    (but other dbs than SQLite should use them).
-    If the provided `settings` is None, this functions returns an exact search expression.
-    '''
-    does_wild_cards_in_value_have_to_be_interpreted = False
-    # Shortcut.
-    if settings is None:
-        return col(field).is_(other=value)
-    else:
-        match settings.type:
-            # Early return because not operator is not implement with tilde symbol.
-            case SearchType.EXACT:
-                if settings.case_sensitive:
-                    if settings.not_operator:
-                        return col(field).is_not(other=value)
-                    else:
-                        return col(field).is_(other=value)
-                else:
-                    if settings.not_operator:
-                        return func.lower(field) != func.lower(value)
-                    else:
-                        return func.lower(field) == func.lower(value)
-            case SearchType.LIKE:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f".*{value}.*")
-                else:
-                    result = col(field).contains(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.STARTS_WITH:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f"^{value}.*")
-                else:
-                    result = col(field).startswith(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.ENDS_WITH:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f"{value}$")
-                else:
-                    result = col(field).endswith(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.REGEX:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=value)
-                else:
-                    raise NotImplementedError(
-                        "regex string comparison case insensitive is not implemented"
-                    )
-        if settings.not_operator:
-            return ~result
-        else:
-            return result

esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.3.0py3-none-any.whl → 1.0.0py3-none-any.whl