PyPI - esgvoc - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

esgvoc 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show

esgvoc/__init__.py +3 -1
esgvoc/api/__init__.py +96 -72
esgvoc/api/data_descriptors/__init__.py +18 -12
esgvoc/api/data_descriptors/activity.py +8 -45
esgvoc/api/data_descriptors/area_label.py +6 -0
esgvoc/api/data_descriptors/branded_suffix.py +5 -0
esgvoc/api/data_descriptors/branded_variable.py +5 -0
esgvoc/api/data_descriptors/consortium.py +16 -56
esgvoc/api/data_descriptors/data_descriptor.py +106 -0
esgvoc/api/data_descriptors/date.py +3 -46
esgvoc/api/data_descriptors/directory_date.py +3 -46
esgvoc/api/data_descriptors/experiment.py +19 -54
esgvoc/api/data_descriptors/forcing_index.py +3 -45
esgvoc/api/data_descriptors/frequency.py +6 -43
esgvoc/api/data_descriptors/grid_label.py +6 -44
esgvoc/api/data_descriptors/horizontal_label.py +6 -0
esgvoc/api/data_descriptors/initialisation_index.py +3 -44
esgvoc/api/data_descriptors/institution.py +11 -54
esgvoc/api/data_descriptors/license.py +4 -44
esgvoc/api/data_descriptors/mip_era.py +6 -44
esgvoc/api/data_descriptors/model_component.py +7 -45
esgvoc/api/data_descriptors/organisation.py +3 -40
esgvoc/api/data_descriptors/physic_index.py +3 -45
esgvoc/api/data_descriptors/product.py +4 -43
esgvoc/api/data_descriptors/realisation_index.py +3 -44
esgvoc/api/data_descriptors/realm.py +4 -42
esgvoc/api/data_descriptors/resolution.py +6 -44
esgvoc/api/data_descriptors/source.py +18 -53
esgvoc/api/data_descriptors/source_type.py +3 -41
esgvoc/api/data_descriptors/sub_experiment.py +3 -41
esgvoc/api/data_descriptors/table.py +6 -48
esgvoc/api/data_descriptors/temporal_label.py +6 -0
esgvoc/api/data_descriptors/time_range.py +3 -27
esgvoc/api/data_descriptors/variable.py +13 -71
esgvoc/api/data_descriptors/variant_label.py +3 -47
esgvoc/api/data_descriptors/vertical_label.py +5 -0
esgvoc/api/project_specs.py +3 -2
esgvoc/api/projects.py +727 -446
esgvoc/api/py.typed +0 -0
esgvoc/api/report.py +29 -16
esgvoc/api/search.py +140 -95
esgvoc/api/universe.py +362 -156
esgvoc/apps/__init__.py +3 -4
esgvoc/apps/drs/constants.py +1 -1
esgvoc/apps/drs/generator.py +185 -198
esgvoc/apps/drs/report.py +272 -136
esgvoc/apps/drs/validator.py +132 -145
esgvoc/apps/py.typed +0 -0
esgvoc/cli/drs.py +32 -21
esgvoc/cli/get.py +35 -31
esgvoc/cli/install.py +11 -8
esgvoc/cli/main.py +0 -2
esgvoc/cli/status.py +5 -5
esgvoc/cli/valid.py +40 -40
esgvoc/core/constants.py +1 -1
esgvoc/core/db/__init__.py +2 -4
esgvoc/core/db/connection.py +5 -3
esgvoc/core/db/models/project.py +50 -8
esgvoc/core/db/models/universe.py +51 -12
esgvoc/core/db/project_ingestion.py +60 -46
esgvoc/core/db/universe_ingestion.py +58 -29
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +1 -1
esgvoc/core/repo_fetcher.py +4 -3
esgvoc/core/service/__init__.py +37 -5
esgvoc/core/service/configuration/config_manager.py +188 -0
esgvoc/core/service/configuration/setting.py +88 -0
esgvoc/core/service/state.py +49 -32
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
esgvoc-0.4.0.dist-info/RECORD +80 -0
esgvoc/api/_utils.py +0 -39
esgvoc/cli/config.py +0 -82
esgvoc/core/service/settings.py +0 -73
esgvoc/core/service/settings.toml +0 -17
esgvoc/core/service/settings_default.toml +0 -17
esgvoc-0.2.1.dist-info/RECORD +0 -73
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
{esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/api/py.typed ADDED Viewed

File without changes

esgvoc/api/report.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from pydantic import BaseModel, computed_field
 from abc import ABC, abstractmethod
 from typing import Any, Protocol
+from pydantic import BaseModel, computed_field
 import esgvoc.core.constants as api_settings
 from esgvoc.core.db.models.mixins import TermKind
@@ -29,7 +30,12 @@ class ValidationError(BaseModel, ABC):
     """JSON specification of the term."""
     term_kind: TermKind
     """The kind of term."""
+    @computed_field  # type: ignore
+    @property
+    def class_name(self) -> str:
+        """The class name of the issue for JSON serialization."""
+        return self.__class__.__name__
     @abstractmethod
     def accept(self, visitor: ValidationErrorVisitor) -> Any:
         """
@@ -42,22 +48,24 @@ class ValidationError(BaseModel, ABC):
         """
         pass
 class UniverseTermError(ValidationError):
     """
     A validation error on a term from the universe.
     """
     data_descriptor_id: str
     """The data descriptor that the term belongs."""
     def accept(self, visitor: ValidationErrorVisitor) -> Any:
         return visitor.visit_universe_term_error(self)
     def __str__(self) -> str:
         term_id = self.term[api_settings.TERM_ID_JSON_KEY]
-        result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} "+\
+        result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
                  f"does not validate the given value '{self.value}'"
         return result
     def __repr__(self) -> str:
         return self.__str__()
@@ -66,18 +74,19 @@ class ProjectTermError(ValidationError):
     """
     A validation error on a term from a project.
     """
     collection_id: str
     """The collection id that the term belongs"""
     def accept(self, visitor: ValidationErrorVisitor) -> Any:
         return visitor.visit_project_term_error(self)
     def __str__(self) -> str:
         term_id = self.term[api_settings.TERM_ID_JSON_KEY]
-        result = f"The term {term_id} from the collection {self.collection_id} "+\
+        result = f"The term {term_id} from the collection {self.collection_id} " + \
                  f"does not validate the given value '{self.value}'"
         return result
     def __repr__(self) -> str:
         return self.__str__()
@@ -86,29 +95,33 @@ class ValidationReport(BaseModel):
     """
     Term validation report.
     """
     expression: str
     """The given expression."""
-    errors: list[ValidationError]
+    errors: list[UniverseTermError | ProjectTermError]
     """The validation errors."""
-    @computed_field # type: ignore
+    @computed_field  # type: ignore
     @property
     def nb_errors(self) -> int:
         """The number of validation errors."""
         return len(self.errors) if self.errors else 0
-    @computed_field # type: ignore
+    @computed_field  # type: ignore
     @property
     def validated(self) -> bool:
         """The expression is validated or not."""
         return False if self.errors else True
     def __len__(self) -> int:
         return self.nb_errors
     def __bool__(self) -> bool:
         return self.validated
     def __str__(self) -> str:
         return f"'{self.expression}' has {self.nb_errors} error(s)"
     def __repr__(self) -> str:
-        return self.__str__()
+        return self.__str__()

esgvoc/api/search.py CHANGED Viewed

@@ -1,8 +1,146 @@
 from enum import Enum
+from typing import Any, Iterable, MutableSequence, Sequence
+import sqlalchemy as sa
 from pydantic import BaseModel
-from sqlalchemy import ColumnElement, func
-from sqlmodel import col
+from sqlalchemy import ColumnElement
+from sqlalchemy.exc import OperationalError
+from sqlalchemy.sql.expression import Select
+from sqlalchemy.sql.selectable import ExecutableReturnsRows
+from sqlmodel import Column, Field, Session, col
+import esgvoc.core.constants as api_settings
+import esgvoc.core.service as service
+from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
+from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, DataDescriptorSubSet
+from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
+from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
+from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
+class ItemKind(Enum):
+    DATA_DESCRIPTOR = "data_descriptor"
+    """Corresponds to a data descriptor"""
+    COLLECTION = "collection"
+    """Corresponds to a collection"""
+    TERM = "term"
+    """Corresponds to a term"""
+class Item(BaseModel):
+    """An item from the universe or a project (data descriptor, collection or term)."""
+    id: str
+    """The id of the item."""
+    kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
+    """The kind of the item."""
+    parent_id: str
+    """The id of the parent of the item."""
+def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type[DataDescriptor]:
+    if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
+        return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
+    else:
+        raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
+def get_universe_session() -> Session:
+    UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
+    if UNIVERSE_DB_CONNECTION:
+        return UNIVERSE_DB_CONNECTION.create_session()
+    else:
+        raise EsgvocDbError('universe connection is not initialized')
+def instantiate_pydantic_term(term: UTerm | PTerm,
+                              selected_term_fields: Iterable[str] | None) -> DataDescriptor:
+    type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
+    if selected_term_fields is not None:
+        subset = DataDescriptorSubSet(id=term.id, type=type)
+        for field in selected_term_fields:
+            setattr(subset, field, term.specs.get(field, None))
+        for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
+            setattr(subset, field, term.specs.get(field, None))
+        return subset
+    else:
+        term_class = get_pydantic_class(type)
+        return term_class(**term.specs)
+def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
+                               list_to_populate: MutableSequence[DataDescriptor],
+                               selected_term_fields: Iterable[str] | None) -> None:
+    for db_term in db_terms:
+        term = instantiate_pydantic_term(db_term, selected_term_fields)
+        list_to_populate.append(term)
+def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
+                                type[PTermFTS5] | type[PCollectionFTS5],
+                                expression: str,
+                                only_id: bool) -> ColumnElement[bool]:
+    # TODO: fix this when specs will ba available in collections and Data descriptors.
+    if cls is PTermFTS5 or cls is UTermFTS5:
+        if only_id:
+            result = col(cls.id).match(expression)
+        else:
+            result = col(cls.specs).match(expression)  # type: ignore
+    else:
+        result = col(cls.id).match(expression)
+    return result
+def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
+    statement = statement.order_by(sa.text('rank'))
+    if limit and limit > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+        statement = statement.limit(limit)
+    if offset and offset > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+        statement = statement.offset(offset)
+    return statement
+def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) \
+                                                                                        -> Sequence:
+    try:
+        raw_results = session.exec(statement)  # type: ignore
+        # raw_results.all() returns a list of sqlalquemy rows.
+        results = [result[0] for result in raw_results.all()]
+        return results
+    except OperationalError as e:
+        raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
+def execute_find_item_statements(session: Session,
+                                 expression: str,
+                                 first_statement: Select,
+                                 second_statement: Select,
+                                 limit: int | None,
+                                 offset: int | None) -> list[Item]:
+    try:
+        # Items found are kind of tuple with an object, a kindness, a parent id and a rank.
+        first_statement_found = session.exec(first_statement).all()  # type: ignore
+        second_statement_found = session.exec(second_statement).all()  # type: ignore
+        tmp_result: list[Any] = list()
+        tmp_result.extend(first_statement_found)
+        tmp_result.extend(second_statement_found)
+        # According to https://sqlite.org/fts5.html#the_bm25_function,
+        # "the better matches are assigned numerically lower scores."
+        # Sort on the rank column (index 3).
+        sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
+        if offset and offset > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+            start = offset
+        else:
+            start = 0
+        if limit and limit > 0:  # False if == 0 and is None ; True if != 0 and is not None.
+            stop = start + limit
+            framed_tmp_result = sorted_tmp_result[start: stop]  # is OK if stop > len of the list.
+        else:
+            framed_tmp_result = sorted_tmp_result[start:]
+        result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
+    except OperationalError as e:
+        raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
+    return result
 class MatchingTerm(BaseModel):
@@ -15,96 +153,3 @@ class MatchingTerm(BaseModel):
     """The collection id to which the term belongs."""
     term_id: str
     """The term id."""
-class SearchType(Enum):
-    """
-    The search types used for to find terms.
-    """
-    EXACT = "exact"
-    """Performs exact match."""
-    LIKE = "like"  # can interpret %
-    """As SQL operator, it can interpret % as a wildcard."""
-    STARTS_WITH = "starts_with"  # can interpret %
-    """Prefix based search."""
-    ENDS_WITH = "ends_with"  # can interpret %
-    """Suffix based search."""
-    REGEX = "regex"
-    """Search based on regex."""
-class SearchSettings(BaseModel):
-    """
-    Search configuration.
-    """
-    type: SearchType = SearchType.EXACT
-    """The type of search."""
-    case_sensitive: bool = True
-    """Enable case sensitivity or not."""
-    not_operator: bool = False
-    """Give the opposite result like the NOT SQL operator."""
-def _create_str_comparison_expression(field: str,
-                                      value: str,
-                                      settings: SearchSettings|None) -> ColumnElement:
-    '''
-    SQLite LIKE is case insensitive (and so STARTS/ENDS_WITH which are implemented with LIKE).
-    So the case sensitive LIKE is implemented with REGEX.
-    The i versions of SQLAlchemy operators (icontains, etc.) are not useful
-    (but other dbs than SQLite should use them).
-    If the provided `settings` is None, this functions returns an exact search expression.
-    '''
-    does_wild_cards_in_value_have_to_be_interpreted = False
-    # Shortcut.
-    if settings is None:
-        return col(field).is_(other=value)
-    else:
-        match settings.type:
-            # Early return because not operator is not implement with tilde symbol.
-            case SearchType.EXACT:
-                if settings.case_sensitive:
-                    if settings.not_operator:
-                        return col(field).is_not(other=value)
-                    else:
-                        return col(field).is_(other=value)
-                else:
-                    if settings.not_operator:
-                        return func.lower(field) != func.lower(value)
-                    else:
-                        return func.lower(field) == func.lower(value)
-            case SearchType.LIKE:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f".*{value}.*")
-                else:
-                    result = col(field).contains(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.STARTS_WITH:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f"^{value}.*")
-                else:
-                    result = col(field).startswith(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.ENDS_WITH:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=f"{value}$")
-                else:
-                    result = col(field).endswith(
-                        other=value,
-                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
-                    )
-            case SearchType.REGEX:
-                if settings.case_sensitive:
-                    result = col(field).regexp_match(pattern=value)
-                else:
-                    raise NotImplementedError(
-                        "regex string comparison case insensitive is not implemented"
-                    )
-        if settings.not_operator:
-            return ~result
-        else:
-            return result

esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl