PyPI - esgvoc - Versions diffs - 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

esgvoc 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (78) hide show

esgvoc/__init__.py +3 -1
esgvoc/api/__init__.py +30 -30
esgvoc/api/_utils.py +28 -14
esgvoc/api/data_descriptors/__init__.py +19 -10
esgvoc/api/data_descriptors/activity.py +8 -45
esgvoc/api/data_descriptors/area_label.py +6 -0
esgvoc/api/data_descriptors/branded_suffix.py +5 -0
esgvoc/api/data_descriptors/branded_variable.py +5 -0
esgvoc/api/data_descriptors/consortium.py +16 -56
esgvoc/api/data_descriptors/data_descriptor.py +106 -0
esgvoc/api/data_descriptors/date.py +3 -46
esgvoc/api/data_descriptors/directory_date.py +5 -0
esgvoc/api/data_descriptors/experiment.py +19 -54
esgvoc/api/data_descriptors/forcing_index.py +3 -45
esgvoc/api/data_descriptors/frequency.py +6 -43
esgvoc/api/data_descriptors/grid_label.py +6 -44
esgvoc/api/data_descriptors/horizontal_label.py +6 -0
esgvoc/api/data_descriptors/initialisation_index.py +3 -44
esgvoc/api/data_descriptors/institution.py +11 -54
esgvoc/api/data_descriptors/license.py +4 -44
esgvoc/api/data_descriptors/mip_era.py +6 -44
esgvoc/api/data_descriptors/model_component.py +7 -45
esgvoc/api/data_descriptors/organisation.py +3 -40
esgvoc/api/data_descriptors/physic_index.py +3 -45
esgvoc/api/data_descriptors/product.py +4 -43
esgvoc/api/data_descriptors/realisation_index.py +3 -44
esgvoc/api/data_descriptors/realm.py +4 -42
esgvoc/api/data_descriptors/resolution.py +6 -44
esgvoc/api/data_descriptors/source.py +18 -53
esgvoc/api/data_descriptors/source_type.py +3 -41
esgvoc/api/data_descriptors/sub_experiment.py +3 -41
esgvoc/api/data_descriptors/table.py +6 -48
esgvoc/api/data_descriptors/temporal_label.py +6 -0
esgvoc/api/data_descriptors/time_range.py +3 -27
esgvoc/api/data_descriptors/variable.py +13 -71
esgvoc/api/data_descriptors/variant_label.py +3 -47
esgvoc/api/data_descriptors/vertical_label.py +5 -0
esgvoc/api/project_specs.py +82 -0
esgvoc/api/projects.py +284 -238
esgvoc/api/report.py +89 -52
esgvoc/api/search.py +31 -11
esgvoc/api/universe.py +57 -48
esgvoc/apps/__init__.py +6 -0
esgvoc/apps/drs/__init__.py +0 -16
esgvoc/apps/drs/constants.py +2 -0
esgvoc/apps/drs/generator.py +429 -0
esgvoc/apps/drs/report.py +492 -0
esgvoc/apps/drs/validator.py +330 -0
esgvoc/cli/drs.py +248 -0
esgvoc/cli/get.py +26 -25
esgvoc/cli/install.py +11 -8
esgvoc/cli/main.py +4 -5
esgvoc/cli/status.py +14 -2
esgvoc/cli/valid.py +41 -45
esgvoc/core/db/models/mixins.py +7 -0
esgvoc/core/db/models/project.py +3 -8
esgvoc/core/db/models/universe.py +3 -3
esgvoc/core/db/project_ingestion.py +4 -1
esgvoc/core/db/universe_ingestion.py +8 -7
esgvoc/core/logging_handler.py +1 -1
esgvoc/core/repo_fetcher.py +4 -3
esgvoc/core/service/__init__.py +37 -5
esgvoc/core/service/configuration/config_manager.py +188 -0
esgvoc/core/service/configuration/setting.py +88 -0
esgvoc/core/service/state.py +66 -42
esgvoc-0.3.0.dist-info/METADATA +89 -0
esgvoc-0.3.0.dist-info/RECORD +78 -0
esgvoc-0.3.0.dist-info/licenses/LICENSE.txt +519 -0
esgvoc/apps/drs/models.py +0 -43
esgvoc/apps/drs/parser.py +0 -27
esgvoc/cli/config.py +0 -79
esgvoc/core/service/settings.py +0 -64
esgvoc/core/service/settings.toml +0 -12
esgvoc/core/service/settings_default.toml +0 -20
esgvoc-0.1.2.dist-info/METADATA +0 -54
esgvoc-0.1.2.dist-info/RECORD +0 -66
{esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
{esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0

esgvoc/api/projects.py CHANGED Viewed

@@ -1,39 +1,51 @@
 import re
-from typing import Sequence
+from collections.abc import Iterable, Sequence
+from sqlmodel import Session, and_, select
 import esgvoc.api.universe as universe
-import esgvoc.core.constants
+import esgvoc.core.constants as constants
 import esgvoc.core.service as service
-from esgvoc.api._utils import (get_universe_session, instantiate_pydantic_term,
+from esgvoc.api._utils import (APIException, get_universe_session,
+                               instantiate_pydantic_term,
                                instantiate_pydantic_terms)
+from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
+from esgvoc.api.project_specs import ProjectSpecs
 from esgvoc.api.report import (ProjectTermError, UniverseTermError,
-                               ValidationError, ValidationReport)
-from esgvoc.api.search import MatchingTerm, SearchSettings, create_str_comparison_expression
+                               ValidationReport)
+from esgvoc.api.search import (MatchingTerm, SearchSettings,
+                               _create_str_comparison_expression)
 from esgvoc.core.db.connection import DBConnection
 from esgvoc.core.db.models.mixins import TermKind
 from esgvoc.core.db.models.project import Collection, Project, PTerm
 from esgvoc.core.db.models.universe import UTerm
-from pydantic import BaseModel
-from sqlmodel import Session, and_, select
+# [OPTIMIZATION]
+_VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
+_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
 def _get_project_connection(project_id: str) -> DBConnection|None:
-    return service.state_service.projects[project_id].db_connection
+    if project_id in service.current_state.projects:
+        return service.current_state.projects[project_id].db_connection
+    else:
+        return None
 def _get_project_session_with_exception(project_id: str) -> Session:
     if connection:=_get_project_connection(project_id):
         project_session = connection.create_session()
         return project_session
     else:
-        raise ValueError(f'unable to find project {project_id}')
+        raise APIException(f'unable to find project {project_id}')
-def _resolve_term(term_composite_part: dict,
+def _resolve_term(composite_term_part: dict,
                   universe_session: Session,
                   project_session: Session) -> UTerm|PTerm:
-    '''First find the term in the universe than in the current project'''
-    term_id = term_composite_part[esgvoc.core.constants.TERM_ID_JSON_KEY]
-    term_type = term_composite_part[esgvoc.core.constants.TERM_TYPE_JSON_KEY]
+    # First find the term in the universe than in the current project
+    term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
+    term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
     uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
                                                      term_id=term_id,
                                                      session=universe_session,
@@ -49,24 +61,24 @@ def _resolve_term(term_composite_part: dict,
         return pterms[0]
     else:
         msg = f'unable to find the term {term_id} in {term_type}'
-        raise RuntimeError(msg)
+        raise RuntimeError(msg)
-def _get_term_composite_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
-    separator = term.specs[esgvoc.core.constants.COMPOSITE_SEPARATOR_JSON_KEY]
-    parts = term.specs[esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY]
+def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
+    separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
+    parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
     return separator, parts
 # TODO: support optionality of parts of composite.
 # It is backtrack possible for more than one missing parts.
-def _valid_value_term_composite_with_separator(value: str,
+def _valid_value_composite_term_with_separator(value: str,
                                                term: UTerm|PTerm,
                                                universe_session: Session,
                                                project_session: Session)\
-                                                   -> list[ValidationError]:
+                                                   -> list[UniverseTermError|ProjectTermError]:
     result = list()
-    separator, parts = _get_term_composite_separator_parts(term)
+    separator, parts = _get_composite_term_separator_parts(term)
     if separator in value:
         splits = value.split(separator)
         if len(splits) == len(parts):
@@ -92,11 +104,15 @@ def _transform_to_pattern(term: UTerm|PTerm,
                           project_session: Session) -> str:
     match term.kind:
         case TermKind.PLAIN:
-            result = term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY]
+            if constants.DRS_SPECS_JSON_KEY in term.specs:
+                result = term.specs[constants.DRS_SPECS_JSON_KEY]
+            else:
+                raise APIException(f"the term {term.id} doesn't have drs name. " +
+                                    "Can't validate it.")
         case TermKind.PATTERN:
-            result = term.specs[esgvoc.core.constants.PATTERN_JSON_KEY]
+            result = term.specs[constants.PATTERN_JSON_KEY]
         case TermKind.COMPOSITE:
-            separator, parts =  _get_term_composite_separator_parts(term)
+            separator, parts =  _get_composite_term_separator_parts(term)
             result = ""
             for part in parts:
                 resolved_term = _resolve_term(part, universe_session, project_session)
@@ -104,22 +120,22 @@ def _transform_to_pattern(term: UTerm|PTerm,
                 result = f'{result}{pattern}{separator}'
             result = result.rstrip(separator)
         case _:
-            raise NotImplementedError(f'unsupported term kind {term.kind}')
+            raise RuntimeError(f'unsupported term kind {term.kind}')
     return result
 # TODO: support optionality of parts of composite.
 # It is backtrack possible for more than one missing parts.
-def _valid_value_term_composite_separator_less(value: str,
+def _valid_value_composite_term_separator_less(value: str,
                                                term: UTerm|PTerm,
                                                universe_session: Session,
                                                project_session: Session)\
-                                                   -> list[ValidationError]:
+                                                   -> list[UniverseTermError|ProjectTermError]:
     result = list()
     try:
         pattern = _transform_to_pattern(term, universe_session, project_session)
         try:
-            # Term patterns are meant to be validated individually.
+            # Patterns terms are meant to be validated individually.
             # So their regex are defined as a whole (begins by a ^, ends by a $).
             # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
             # The later, must be removed.
@@ -127,70 +143,74 @@ def _valid_value_term_composite_separator_less(value: str,
             pattern = f'^{pattern}$'
             regex = re.compile(pattern)
         except Exception as e:
-            msg = f'regex compilation error:\n{e}'
-            raise ValueError(msg) from e
+            msg = f'regex compilation error while processing term {term.id}:\n{e}'
+            raise RuntimeError(msg) from e
         match = regex.match(value)
         if match is None:
             result.append(_create_term_error(value, term))
         return result
     except Exception as e:
-        msg = f'cannot validate separator less composite term {term.id}:\n{e}'
+        msg = f'cannot validate separator less composite term {term.id}:\n{e}'
         raise RuntimeError(msg) from e
-def _valid_value_for_term_composite(value: str,
+def _valid_value_for_composite_term(value: str,
                                     term: UTerm|PTerm,
                                     universe_session: Session,
                                     project_session: Session)\
-                                        -> list[ValidationError]:
+                                        -> list[UniverseTermError|ProjectTermError]:
     result = list()
-    separator, _ = _get_term_composite_separator_parts(term)
+    separator, _ = _get_composite_term_separator_parts(term)
     if separator:
-        result = _valid_value_term_composite_with_separator(value, term, universe_session,
+        result = _valid_value_composite_term_with_separator(value, term, universe_session,
                                                             project_session)
     else:
-        result = _valid_value_term_composite_separator_less(value, term, universe_session,
+        result = _valid_value_composite_term_separator_less(value, term, universe_session,
                                                             project_session)
     return result
-def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
+def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
     if isinstance(term, UTerm):
-        return UniverseTermError(value, term)
+        return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
+                                 data_descriptor_id=term.data_descriptor.id)
     else:
-        return ProjectTermError(value, term)
+        return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
+                                collection_id=term.collection.id)
 def _valid_value(value: str,
                  term: UTerm|PTerm,
                  universe_session: Session,
-                 project_session: Session) -> list[ValidationError]:
+                 project_session: Session) -> list[UniverseTermError|ProjectTermError]:
     result = list()
     match term.kind:
         case TermKind.PLAIN:
-            if term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] != value:
-                result.append(_create_term_error(value, term))
+            if constants.DRS_SPECS_JSON_KEY in term.specs:
+                if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
+                    result.append(_create_term_error(value, term))
+            else:
+                raise APIException(f"the term {term.id} doesn't have drs name. " +
+                                    "Can't validate it.")
         case TermKind.PATTERN:
             # OPTIM: Pattern can be compiled and stored for further matching.
-            pattern_match = re.match(term.specs[esgvoc.core.constants.PATTERN_JSON_KEY], value)
+            pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
             if pattern_match is None:
                 result.append(_create_term_error(value, term))
         case TermKind.COMPOSITE:
-            result.extend(_valid_value_for_term_composite(value, term,
+            result.extend(_valid_value_for_composite_term(value, term,
                                                           universe_session,
                                                           project_session))
         case _:
-            raise NotImplementedError(f'unsupported term kind {term.kind}')
+            raise RuntimeError(f'unsupported term kind {term.kind}')
     return result
-def _check_and_strip_value(value: str) -> str:
-    if not value:
-        raise ValueError('value should be set')
-    if result:= value.strip():
-        return result
+def _check_value(value: str) -> str:
+    if not value or value.isspace():
+        raise APIException('value should be set')
     else:
-        raise ValueError('value should not be empty')
+        return value
 def _search_plain_term_and_valid_value(value: str,
@@ -198,7 +218,7 @@ def _search_plain_term_and_valid_value(value: str,
                                        project_session: Session) \
                                         -> str|None:
     where_expression = and_(Collection.id == collection_id,
-                            PTerm.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
+                            PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
     statement = select(PTerm).join(Collection).where(where_expression)
     term = project_session.exec(statement).one_or_none()
     return term.id if term else None
@@ -223,12 +243,17 @@ def _valid_value_against_all_terms_of_collection(value: str,
 def _valid_value_against_given_term(value: str,
+                                    project_id: str,
                                     collection_id: str,
                                     term_id: str,
                                     universe_session: Session,
                                     project_session: Session)\
-                                        -> list[ValidationError]:
-    try:
+                                        -> list[UniverseTermError|ProjectTermError]:
+    # [OPTIMIZATION]
+    key = value + project_id + collection_id + term_id
+    if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
+        result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
+    else:
         terms = _find_terms_in_collection(collection_id,
                                           term_id,
                                           project_session,
@@ -237,12 +262,9 @@ def _valid_value_against_given_term(value: str,
             term = terms[0]
             result = _valid_value(value, term, universe_session, project_session)
         else:
-            raise ValueError(f'unable to find term {term_id} ' +
-                             f'in collection {collection_id}')
-    except Exception as e:
-        msg = f'unable to valid term {term_id} ' +\
-              f'in collection {collection_id}'
-        raise RuntimeError(msg) from e
+            raise APIException(f'unable to find term {term_id} ' +
+                               f'in collection {collection_id}')
+        _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
     return result
@@ -254,19 +276,19 @@ def valid_term(value: str,
     """
     Check if the given value may or may not represent the given term. The functions returns
     a report that contains the possible errors.
     Behavior based on the nature of the term:
-    - plain term: the function try to match the value on the drs_name field.
-    - term pattern: the function try to match the value on the pattern field (regex).
-    - term composite:
-        - if the composite has got a separator, the function splits the value according to the
-          separator of the term then it try to match every part of the composite
-          with every split of the value.
-        - if the composite hasn't got a separator, the function aggregates the parts of the composite
-          so as to compare it as a regex to the value.
+        - plain term: the function try to match the value on the drs_name field.
+        - pattern term: the function try to match the value on the pattern field (regex).
+        - composite term:
+            - if the composite has got a separator, the function splits the value according to the\
+              separator of the term then it try to match every part of the composite\
+              with every split of the value.
+            - if the composite hasn't got a separator, the function aggregates the parts of the \
+              composite so as to compare it as a regex to the value.
     If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
-    the function raises a ValueError.
+    the function raises a APIException.
     :param value: A value to be validated
     :type value: str
@@ -278,14 +300,14 @@ def valid_term(value: str,
     :type term_id: str
     :returns: A validation report that contains the possible errors
     :rtype: ValidationReport
-    :raises ValueError: If any of the provided ids is not found
+    :raises APIException: If any of the provided ids is not found
     """
-    value = _check_and_strip_value(value)
+    value = _check_value(value)
     with get_universe_session() as universe_session, \
          _get_project_session_with_exception(project_id) as project_session:
-        errors = _valid_value_against_given_term(value, collection_id, term_id,
+        errors = _valid_value_against_given_term(value, project_id, collection_id, term_id,
                                                  universe_session, project_session)
-        return ValidationReport(value, errors)
+        return ValidationReport(expression=value, errors=errors)
 def _valid_term_in_collection(value: str,
@@ -294,28 +316,38 @@ def _valid_term_in_collection(value: str,
                               universe_session: Session,
                               project_session: Session) \
                                 -> list[MatchingTerm]:
-    value = _check_and_strip_value(value)
-    result = list()
-    collections = _find_collections_in_project(collection_id,
-                                               project_session,
-                                               None)
-    if collections:
-        collection = collections[0]
-        match collection.term_kind:
-            case TermKind.PLAIN:
-                term_id_found = _search_plain_term_and_valid_value(value, collection_id,
-                                                                   project_session)
-                if term_id_found:
-                    result.append(MatchingTerm(project_id, collection_id, term_id_found))
-            case _:
-                term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
-                                                                              universe_session,
-                                                                              project_session)
-                for term_id_found in term_ids_found:
-                    result.append(MatchingTerm(project_id, collection_id, term_id_found))
+    # [OPTIMIZATION]
+    key = value + project_id + collection_id
+    if key in _VALID_TERM_IN_COLLECTION_CACHE:
+        result = _VALID_TERM_IN_COLLECTION_CACHE[key]
     else:
-        msg = f'unable to find collection {collection_id}'
-        raise ValueError(msg)
+        value = _check_value(value)
+        result = list()
+        collections = _find_collections_in_project(collection_id,
+                                                   project_session,
+                                                   None)
+        if collections:
+            collection = collections[0]
+            match collection.term_kind:
+                case TermKind.PLAIN:
+                    term_id_found = _search_plain_term_and_valid_value(value, collection_id,
+                                                                       project_session)
+                    if term_id_found:
+                        result.append(MatchingTerm(project_id=project_id,
+                                                   collection_id=collection_id,
+                                                   term_id=term_id_found))
+                case _:
+                    term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
+                                                                                  universe_session,
+                                                                                  project_session)
+                    for term_id_found in term_ids_found:
+                        result.append(MatchingTerm(project_id=project_id,
+                                                   collection_id=collection_id,
+                                                   term_id=term_id_found))
+        else:
+            msg = f'unable to find collection {collection_id}'
+            raise APIException(msg)
+        _VALID_TERM_IN_COLLECTION_CACHE[key] = result
     return result
@@ -326,19 +358,19 @@ def valid_term_in_collection(value: str,
     """
     Check if the given value may or may not represent a term in the given collection. The function
     returns the terms that the value matches.
     Behavior based on the nature of the term:
-    - plain term: the function try to match the value on the drs_name field.
-    - term pattern: the function try to match the value on the pattern field (regex).
-    - term composite:
-        - if the composite has got a separator, the function splits the value according to the
-          separator of the term then it try to match every part of the composite
-          with every split of the value.
-        - if the composite hasn't got a separator, the function aggregates the parts of the composite
-          so as to compare it as a regex to the value.
+        - plain term: the function try to match the value on the drs_name field.
+        - pattern term: the function try to match the value on the pattern field (regex).
+        - composite term:
+            - if the composite has got a separator, the function splits the value according to the \
+              separator of the term then it try to match every part of the composite \
+              with every split of the value.
+            - if the composite hasn't got a separator, the function aggregates the parts of the \
+              composite so as to compare it as a regex to the value.
     If any of the provided ids (`project_id` or `collection_id`) is not found,
-    the function raises a ValueError.
+    the function raises a APIException.
     :param value: A value to be validated
     :type value: str
@@ -348,7 +380,7 @@ def valid_term_in_collection(value: str,
     :type collection_id: str
     :returns: The list of terms that the value matches.
     :rtype: list[MatchingTerm]
-    :raises ValueError: If any of the provided ids is not found
+    :raises APIException: If any of the provided ids is not found
     """
     with get_universe_session() as universe_session, \
          _get_project_session_with_exception(project_id) as project_session:
@@ -372,18 +404,18 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
     """
     Check if the given value may or may not represent a term in the given project. The function
     returns the terms that the value matches.
     Behavior based on the nature of the term:
-    - plain term: the function try to match the value on the drs_name field.
-    - term pattern: the function try to match the value on the pattern field (regex).
-    - term composite:
-        - if the composite has got a separator, the function splits the value according to the
-          separator of the term then it try to match every part of the composite
-          with every split of the value.
-        - if the composite hasn't got a separator, the function aggregates the parts of the composite
-          so as to compare it as a regex to the value.
+        - plain term: the function try to match the value on the drs_name field.
+        - pattern term: the function try to match the value on the pattern field (regex).
+        - composite term:
+            - if the composite has got a separator, the function splits the value according to the \
+              separator of the term then it try to match every part of the composite \
+              with every split of the value.
+            - if the composite hasn't got a separator, the function aggregates the parts of the \
+              composite so as to compare it as a regex to the value.
-    If the `project_id` is not found, the function raises a ValueError.
+    If the `project_id` is not found, the function raises a APIException.
     :param value: A value to be validated
     :type value: str
@@ -391,7 +423,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
     :type project_id: str
     :returns: The list of terms that the value matches.
     :rtype: list[MatchingTerm]
-    :raises ValueError: If the `project_id` is not found
+    :raises APIException: If the `project_id` is not found
     """
     with get_universe_session() as universe_session, \
          _get_project_session_with_exception(project_id) as project_session:
@@ -402,16 +434,16 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
     """
     Check if the given value may or may not represent a term in all projects. The function
     returns the terms that the value matches.
     Behavior based on the nature of the term:
-    - plain term: the function try to match the value on the drs_name field.
-    - term pattern: the function try to match the value on the pattern field (regex).
-    - term composite:
-        - if the composite has got a separator, the function splits the value according to the
-          separator of the term then it try to match every part of the composite
-          with every split of the value.
-        - if the composite hasn't got a separator, the function aggregates the parts of the composite
-          so as to compare it as a regex to the value.
+        - plain term: the function try to match the value on the drs_name field.
+        - pattern term: the function try to match the value on the pattern field (regex).
+        - composite term:
+            - if the composite has got a separator, the function splits the value according to the \
+              separator of the term then it try to match every part of the composite \
+              with every split of the value.
+            - if the composite hasn't got a separator, the function aggregates the parts of the \
+              composite so as to compare it as a regex to the value.
     :param value: A value to be validated
     :type value: str
@@ -431,14 +463,14 @@ def _find_terms_in_collection(collection_id: str,
                               term_id: str,
                               session: Session,
                               settings: SearchSettings|None = None) -> Sequence[PTerm]:
-    """Settings only apply on the term_id comparison."""
-    where_expression = create_str_comparison_expression(field=PTerm.id,
-                                                        value=term_id,
-                                                        settings=settings)
+    # Settings only apply on the term_id comparison.
+    where_expression = _create_str_comparison_expression(field=PTerm.id,
+                                                         value=term_id,
+                                                         settings=settings)
     statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
                                                      where_expression)
     results = session.exec(statement)
-    result = results.all()
+    result = results.all()
     return result
@@ -446,21 +478,21 @@ def find_terms_in_collection(project_id:str,
                              collection_id: str,
                              term_id: str,
                              settings: SearchSettings|None = None) \
-                                -> list[BaseModel]:
+                                -> list[DataDescriptor]:
     """
     Finds one or more terms, based on the specified search settings, in the given collection of a project.
-    This function performs an exact match on the `project_id` and `collection_id`,
+    This function performs an exact match on the `project_id` and `collection_id`,
     and does **not** search for similar or related projects and collections.
     The given `term_id` is searched according to the search type specified in the parameter `settings`,
     which allows a flexible matching (e.g., `LIKE` may return multiple results).
     If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
     If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
     the function returns an empty list.
     Behavior based on search type:
-    - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
-    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
-      Pydantic term instances in the list.
+        - `EXACT` and absence of `settings`: returns zero or one term instance in the list.
+        - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
+          term instances in the list.
     :param project_id: A project id
     :type project_id: str
@@ -470,14 +502,15 @@ def find_terms_in_collection(project_id:str,
     :type term_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
-    :rtype: list[BaseModel]
+    :returns: A list of term instances. Returns an empty list if no matches are found.
+    :rtype: list[DataDescriptor]
     """
-    result: list[BaseModel] = list()
+    result: list[DataDescriptor] = list()
     if connection:=_get_project_connection(project_id):
         with connection.create_session() as session:
             terms = _find_terms_in_collection(collection_id, term_id, session, settings)
-            instantiate_pydantic_terms(terms, result)
+            instantiate_pydantic_terms(terms, result,
+                                       settings.selected_term_fields if settings else None)
     return result
@@ -486,14 +519,14 @@ def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
                                                 session: Session,
                                                 settings: SearchSettings|None = None) \
                                                    -> Sequence[PTerm]:
-    """Settings only apply on the term_id comparison."""
-    where_expression = create_str_comparison_expression(field=PTerm.id,
-                                                        value=term_id,
-                                                        settings=settings)
+    # Settings only apply on the term_id comparison.
+    where_expression = _create_str_comparison_expression(field=PTerm.id,
+                                                         value=term_id,
+                                                         settings=settings)
     statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
                                                      where_expression)
     results = session.exec(statement)
-    result = results.all()
+    result = results.all()
     return result
@@ -501,23 +534,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
                                                data_descriptor_id: str,
                                                term_id: str,
                                                settings: SearchSettings|None = None) \
-                                                  -> list[tuple[BaseModel, str]]:
+                                                  -> list[tuple[DataDescriptor, str]]:
     """
     Finds one or more terms in the given project which are instances of the given data descriptor
     in the universe, based on the specified search settings, in the given collection of a project.
-    This function performs an exact match on the `project_id` and `data_descriptor_id`,
+    This function performs an exact match on the `project_id` and `data_descriptor_id`,
     and does **not** search for similar or related projects and data descriptors.
     The given `term_id` is searched according to the search type specified in the parameter `settings`,
     which allows a flexible matching (e.g., `LIKE` may return multiple results).
     If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
     If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
     the function returns an empty list.
     Behavior based on search type:
-    - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and
-      collection id in the list.
-    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
-      Pydantic term instances and collection ids in the list.
+        - `EXACT` and absence of `settings`: returns zero or one term instance and \
+          collection id in the list.
+        - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
+          term instances and collection ids in the list.
     :param project_id: A project id
     :type project_id: str
@@ -527,9 +560,9 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
     :type term_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of tuple of Pydantic term instances and related collection ids.
+    :returns: A list of tuple of term instances and related collection ids. \
     Returns an empty list if no matches are found.
-    :rtype: list[tuple[BaseModel, str]]
+    :rtype: list[tuple[DataDescriptor, str]]
     """
     result = list()
     if connection:=_get_project_connection(project_id):
@@ -540,7 +573,8 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
                                                                 settings)
             for pterm in terms:
                 collection_id = pterm.collection.id
-                term = instantiate_pydantic_term(pterm)
+                term = instantiate_pydantic_term(pterm,
+                                                 settings.selected_term_fields if settings else None)
                 result.append((term, collection_id))
     return result
@@ -548,23 +582,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
 def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
                                                     term_id: str,
                                                     settings: SearchSettings|None = None) \
-                                                       -> list[tuple[BaseModel, str]]:
+                                                    -> list[tuple[list[tuple[DataDescriptor, str]], str]]:
     """
     Finds one or more terms in all projects which are instances of the given data descriptor
     in the universe, based on the specified search settings, in the given collection of a project.
-    This function performs an exact match on the `data_descriptor_id`,
+    This function performs an exact match on the `data_descriptor_id`,
     and does **not** search for similar or related data descriptors.
     The given `term_id` is searched according to the search type specified in the parameter `settings`,
     which allows a flexible matching (e.g., `LIKE` may return multiple results).
     If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
     If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
     the function returns an empty list.
     Behavior based on search type:
-    - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and
-      collection id in the list.
-    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
-      Pydantic term instances and collection ids in the list.
+        - `EXACT` and absence of `settings`: returns zero or one term instance and \
+          collection id in the list.
+        - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
+          term instances and collection ids in the list.
     :param data_descriptor_id: A data descriptor
     :type data_descriptor_id: str
@@ -572,26 +606,28 @@ def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
     :type term_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of tuple of Pydantic term instances and related collection ids.
+    :returns: A list of tuple of matching terms with their collection id, per project. \
     Returns an empty list if no matches are found.
-    :rtype: list[tuple[BaseModel, str]]
+    :rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
     """
     project_ids = get_all_projects()
-    result = list()
+    result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
     for project_id in project_ids:
-        result.extend(find_terms_from_data_descriptor_in_project(project_id,
-                                                                 data_descriptor_id,
-                                                                 term_id,
-                                                                 settings))
+        matching_terms = find_terms_from_data_descriptor_in_project(project_id,
+                                                                    data_descriptor_id,
+                                                                    term_id,
+                                                                    settings)
+        if matching_terms:
+            result.append((matching_terms, project_id))
     return result
 def _find_terms_in_project(term_id: str,
                            session: Session,
                            settings: SearchSettings|None) -> Sequence[PTerm]:
-    where_expression = create_str_comparison_expression(field=PTerm.id,
-                                                        value=term_id,
-                                                        settings=settings)
+    where_expression = _create_str_comparison_expression(field=PTerm.id,
+                                                         value=term_id,
+                                                         settings=settings)
     statement = select(PTerm).where(where_expression)
     results = session.exec(statement).all()
     return results
@@ -599,7 +635,7 @@ def _find_terms_in_project(term_id: str,
 def find_terms_in_all_projects(term_id: str,
                                settings: SearchSettings|None = None) \
-                                  -> list[BaseModel]:
+                                  -> list[DataDescriptor]:
     """
     Finds one or more terms, based on the specified search settings, in all projects.
     The given `term_id` is searched according to the search type specified in the parameter `settings`,
@@ -612,8 +648,8 @@ def find_terms_in_all_projects(term_id: str,
     :type term_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
-    :rtype: list[BaseModel]
+    :returns: A list of term instances. Returns an empty list if no matches are found.
+    :rtype: list[DataDescriptor]
     """
     project_ids = get_all_projects()
     result = list()
@@ -625,10 +661,10 @@ def find_terms_in_all_projects(term_id: str,
 def find_terms_in_project(project_id: str,
                           term_id: str,
                           settings: SearchSettings|None = None) \
-                            -> list[BaseModel]:
+                            -> list[DataDescriptor]:
     """
     Finds one or more terms, based on the specified search settings, in a project.
-    This function performs an exact match on the `project_id` and
+    This function performs an exact match on the `project_id` and
     does **not** search for similar or related projects.
     The given `term_id` is searched according to the search type specified in the parameter `settings`,
     which allows a flexible matching (e.g., `LIKE` may return multiple results).
@@ -643,20 +679,22 @@ def find_terms_in_project(project_id: str,
     :type term_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
-    :rtype: list[BaseModel]
+    :returns: A list of term instances. Returns an empty list if no matches are found.
+    :rtype: list[DataDescriptor]
     """
-    result: list[BaseModel] = list()
+    result: list[DataDescriptor] = list()
     if connection:=_get_project_connection(project_id):
         with connection.create_session() as session:
             terms = _find_terms_in_project(term_id, session, settings)
-            instantiate_pydantic_terms(terms, result)
+            instantiate_pydantic_terms(terms, result,
+                                       settings.selected_term_fields if settings else None)
     return result
 def get_all_terms_in_collection(project_id: str,
-                                collection_id: str)\
-                                   -> list[BaseModel]:
+                                collection_id: str,
+                                selected_term_fields: Iterable[str]|None = None)\
+                                   -> list[DataDescriptor]:
     """
     Gets all terms of the given collection of a project.
     This function performs an exact match on the `project_id` and `collection_id`,
@@ -668,9 +706,11 @@ def get_all_terms_in_collection(project_id: str,
     :type project_id: str
     :param collection_id: A collection id
     :type collection_id: str
-    :returns: a list of Pydantic term instances.
-    Returns an empty list if no matches are found.
-    :rtype: list[BaseModel]
+    :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
+    fields of the terms are returned.
+    :type selected_term_fields: Iterable[str]|None
+    :returns: a list of term instances. Returns an empty list if no matches are found.
+    :rtype: list[DataDescriptor]
     """
     result = list()
     if connection:=_get_project_connection(project_id):
@@ -680,7 +720,7 @@ def get_all_terms_in_collection(project_id: str,
                                                        None)
             if collections:
                 collection = collections[0]
-                result = _get_all_terms_in_collection(collection)
+                result = _get_all_terms_in_collection(collection, selected_term_fields)
     return result
@@ -688,9 +728,9 @@ def _find_collections_in_project(collection_id: str,
                                  session: Session,
                                  settings: SearchSettings|None) \
                                     -> Sequence[Collection]:
-    where_exp = create_str_comparison_expression(field=Collection.id,
-                                                 value=collection_id,
-                                                 settings=settings)
+    where_exp = _create_str_comparison_expression(field=Collection.id,
+                                                  value=collection_id,
+                                                  settings=settings)
     statement = select(Collection).where(where_exp)
     results = session.exec(statement)
     result = results.all()
@@ -703,19 +743,19 @@ def find_collections_in_project(project_id: str,
                                     -> list[dict]:
     """
     Finds one or more collections of the given project.
-    This function performs an exact match on the `project_id` and
+    This function performs an exact match on the `project_id` and
     does **not** search for similar or related projects.
-    The given `collection_id` is searched according to the search type specified in
+    The given `collection_id` is searched according to the search type specified in
     the parameter `settings`,
     which allows a flexible matching (e.g., `LIKE` may return multiple results).
     If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
     If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
     an empty list.
     Behavior based on search type:
-    - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
-    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
-      collection contexts in the list.
+        - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
+        - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
+          collection contexts in the list.
     :param project_id: A project id
     :type project_id: str
@@ -723,8 +763,7 @@ def find_collections_in_project(project_id: str,
     :type collection_id: str
     :param settings: The search settings
     :type settings: SearchSettings|None
-    :returns: A list of collection contexts.
-    Returns an empty list if no matches are found.
+    :returns: A list of collection contexts. Returns an empty list if no matches are found.
     :rtype: list[dict]
     """
     result = list()
@@ -739,7 +778,7 @@ def find_collections_in_project(project_id: str,
 def _get_all_collections_in_project(session: Session) -> list[Collection]:
-    project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
+    project = session.get(Project, constants.SQLITE_FIRST_PK)
     # Project can't be missing if session exists.
     return project.collections # type: ignore
@@ -747,14 +786,13 @@ def _get_all_collections_in_project(session: Session) -> list[Collection]:
 def get_all_collections_in_project(project_id: str) -> list[str]:
     """
     Gets all collections of the given project.
-    This function performs an exact match on the `project_id` and
+    This function performs an exact match on the `project_id` and
     does **not** search for similar or related projects.
     If the provided `project_id` is not found, the function returns an empty list.
     :param project_id: A project id
     :type project_id: str
-    :returns: A list of collection ids.
-    Returns an empty list if no matches are found.
+    :returns: A list of collection ids. Returns an empty list if no matches are found.
     :rtype: list[str]
     """
     result = list()
@@ -766,25 +804,29 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
     return result
-def _get_all_terms_in_collection(collection: Collection) -> list[BaseModel]:
-    result: list[BaseModel] = list()
-    instantiate_pydantic_terms(collection.terms, result)
+def _get_all_terms_in_collection(collection: Collection,
+                                 selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
+    result: list[DataDescriptor] = list()
+    instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
     return result
-def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
+def get_all_terms_in_project(project_id: str,
+                             selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
     """
     Gets all terms of the given project.
-    This function performs an exact match on the `project_id` and
+    This function performs an exact match on the `project_id` and
     does **not** search for similar or related projects.
     Terms are unique within a collection but may have some synonyms in a project.
     If the provided `project_id` is not found, the function returns an empty list.
     :param project_id: A project id
     :type project_id: str
-    :returns: A list of Pydantic term instances.
-    Returns an empty list if no matches are found.
-    :rtype: list[BaseModel]
+    :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
+    fields of the terms are returned.
+    :type selected_term_fields: Iterable[str]|None
+    :returns: A list of term instances. Returns an empty list if no matches are found.
+    :rtype: list[DataDescriptor]
     """
     result = list()
     if connection:=_get_project_connection(project_id):
@@ -792,63 +834,67 @@ def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
             collections = _get_all_collections_in_project(session)
             for collection in collections:
                 # Term may have some synonyms in a project.
-                result.extend(_get_all_terms_in_collection(collection))
+                result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
     return result
-def get_all_terms_in_all_projects() -> list[BaseModel]:
+def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
+                                                          -> list[tuple[str, list[DataDescriptor]]]:
     """
     Gets all terms of all projects.
-    :returns: A list of Pydantic term instances.
-    :rtype: list[BaseModel]
+    :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
+    fields of the terms are returned.
+    :type selected_term_fields: Iterable[str]|None
+    :returns: A list of tuple project_id and term instances of that project.
+    :rtype: list[tuple[str, list[DataDescriptor]]]
     """
     project_ids = get_all_projects()
     result = list()
     for project_id in project_ids:
-        result.extend(get_all_terms_in_project(project_id))
+        terms = get_all_terms_in_project(project_id, selected_term_fields)
+        result.append((project_id, terms))
     return result
-def find_project(project_id: str) -> dict|None:
+def find_project(project_id: str) -> ProjectSpecs|None:
     """
-    Finds a project.
-    This function performs an exact match on the `project_id` and
+    Finds a project and returns its specifications.
+    This function performs an exact match on the `project_id` and
     does **not** search for similar or related projects.
     If the provided `project_id` is not found, the function returns `None`.
     :param project_id: A project id to be found
     :type project_id: str
-    :returns: The specs of the project found.
-    Returns `None` if no matches are found.
-    :rtype: dict|None
+    :returns: The specs of the project found. Returns `None` if no matches are found.
+    :rtype: ProjectSpecs|None
     """
-    result = None
+    result: ProjectSpecs|None = None
     if connection:=_get_project_connection(project_id):
         with connection.create_session() as session:
-            project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
-            # Project can't be missing if session exists.
-            result = project.specs # type: ignore
+            project = session.get(Project, constants.SQLITE_FIRST_PK)
+            try:
+                # Project can't be missing if session exists.
+                result = ProjectSpecs(**project.specs) # type: ignore
+            except Exception as e:
+                msg = f'Unable to read specs in project {project_id}'
+                raise RuntimeError(msg) from e
     return result
 def get_all_projects() -> list[str]:
     """
     Gets all projects.
     :returns: A list of project ids.
     :rtype: list[str]
     """
-    return list(service.state_service.projects.keys())
+    return list(service.current_state.projects.keys())
 if __name__ == "__main__":
-    vr = valid_term('r1i1p1f111', 'cmip6plus', 'member_id', 'ripf')
-    if vr:
-        print('OK')
-    else:
-        print(vr)
-        from esgvoc.api import BasicValidationErrorVisitor
-        visitor = BasicValidationErrorVisitor()
-        for error in vr.errors:
-            print(error.accept(visitor))
+    settings = SearchSettings()
+    settings.selected_term_fields = ('id', 'drs_name')
+    settings.case_sensitive = False
+    matching_terms = find_terms_from_data_descriptor_in_all_projects('organisation', 'IpsL', settings)
+    print(matching_terms)

esgvoc 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl