PyPI - esgvoc - Versions diffs - 0.1.2__py3-none-any.whl - Mend

esgvoc 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (66) hide show

esgvoc/__init__.py +1 -0
esgvoc/api/__init__.py +62 -0
esgvoc/api/_utils.py +39 -0
esgvoc/api/data_descriptors/__init__.py +60 -0
esgvoc/api/data_descriptors/activity.py +51 -0
esgvoc/api/data_descriptors/consortium.py +66 -0
esgvoc/api/data_descriptors/date.py +48 -0
esgvoc/api/data_descriptors/experiment.py +60 -0
esgvoc/api/data_descriptors/forcing_index.py +47 -0
esgvoc/api/data_descriptors/frequency.py +45 -0
esgvoc/api/data_descriptors/grid_label.py +46 -0
esgvoc/api/data_descriptors/initialisation_index.py +46 -0
esgvoc/api/data_descriptors/institution.py +58 -0
esgvoc/api/data_descriptors/license.py +47 -0
esgvoc/api/data_descriptors/mip_era.py +46 -0
esgvoc/api/data_descriptors/model_component.py +47 -0
esgvoc/api/data_descriptors/organisation.py +42 -0
esgvoc/api/data_descriptors/physic_index.py +47 -0
esgvoc/api/data_descriptors/product.py +45 -0
esgvoc/api/data_descriptors/realisation_index.py +46 -0
esgvoc/api/data_descriptors/realm.py +44 -0
esgvoc/api/data_descriptors/resolution.py +46 -0
esgvoc/api/data_descriptors/source.py +57 -0
esgvoc/api/data_descriptors/source_type.py +43 -0
esgvoc/api/data_descriptors/sub_experiment.py +43 -0
esgvoc/api/data_descriptors/table.py +50 -0
esgvoc/api/data_descriptors/time_range.py +28 -0
esgvoc/api/data_descriptors/variable.py +77 -0
esgvoc/api/data_descriptors/variant_label.py +49 -0
esgvoc/api/projects.py +854 -0
esgvoc/api/report.py +86 -0
esgvoc/api/search.py +92 -0
esgvoc/api/universe.py +218 -0
esgvoc/apps/drs/__init__.py +16 -0
esgvoc/apps/drs/models.py +43 -0
esgvoc/apps/drs/parser.py +27 -0
esgvoc/cli/config.py +79 -0
esgvoc/cli/get.py +142 -0
esgvoc/cli/install.py +14 -0
esgvoc/cli/main.py +22 -0
esgvoc/cli/status.py +26 -0
esgvoc/cli/valid.py +156 -0
esgvoc/core/constants.py +13 -0
esgvoc/core/convert.py +0 -0
esgvoc/core/data_handler.py +133 -0
esgvoc/core/db/__init__.py +5 -0
esgvoc/core/db/connection.py +31 -0
esgvoc/core/db/models/mixins.py +18 -0
esgvoc/core/db/models/project.py +65 -0
esgvoc/core/db/models/universe.py +59 -0
esgvoc/core/db/project_ingestion.py +152 -0
esgvoc/core/db/universe_ingestion.py +120 -0
esgvoc/core/logging.conf +21 -0
esgvoc/core/logging_handler.py +4 -0
esgvoc/core/repo_fetcher.py +259 -0
esgvoc/core/service/__init__.py +8 -0
esgvoc/core/service/data_merger.py +83 -0
esgvoc/core/service/esg_voc.py +79 -0
esgvoc/core/service/settings.py +64 -0
esgvoc/core/service/settings.toml +12 -0
esgvoc/core/service/settings_default.toml +20 -0
esgvoc/core/service/state.py +222 -0
esgvoc-0.1.2.dist-info/METADATA +54 -0
esgvoc-0.1.2.dist-info/RECORD +66 -0
esgvoc-0.1.2.dist-info/WHEEL +4 -0
esgvoc-0.1.2.dist-info/entry_points.txt +2 -0

esgvoc/api/report.py ADDED Viewed

@@ -0,0 +1,86 @@
+from abc import ABC, abstractmethod
+from typing import Any
+import esgvoc.core.constants as api_settings
+from esgvoc.core.db.models.mixins import TermKind
+from esgvoc.core.db.models.project import PTerm
+from esgvoc.core.db.models.universe import UTerm
+class ValidationErrorVisitor(ABC):
+    @abstractmethod
+    def visit_universe_term_error(self, error: "UniverseTermError") -> Any:
+        pass
+    @abstractmethod
+    def visit_project_term_error(self, error: "ProjectTermError") -> Any:
+        pass
+class BasicValidationErrorVisitor(ValidationErrorVisitor):
+    def visit_universe_term_error(self, error: "UniverseTermError") -> Any:
+        term_id = error.term[api_settings.TERM_ID_JSON_KEY]
+        result = f"The term {term_id} from the data descriptor {error.data_descriptor_id} "+\
+                 f"does not validate the given value '{error.value}'"
+        return result
+    def visit_project_term_error(self, error: "ProjectTermError") -> Any:
+        term_id = error.term[api_settings.TERM_ID_JSON_KEY]
+        result = f"The term {term_id} from the collection {error.collection_id} "+\
+                 f"does not validate the given value '{error.value}'"
+        return result
+class ValidationError(ABC):
+    def __init__(self,
+                 value: str):
+        self.value: str = value
+    @abstractmethod
+    def accept(self, visitor: ValidationErrorVisitor) -> Any:
+        pass
+class UniverseTermError(ValidationError):
+    def __init__(self,
+                 value: str,
+                 term: UTerm):
+        super().__init__(value)
+        self.term: dict = term.specs
+        self.term_kind: TermKind = term.kind
+        self.data_descriptor_id: str = term.data_descriptor.id
+    def accept(self, visitor: ValidationErrorVisitor) -> Any:
+        return visitor.visit_universe_term_error(self)
+class ProjectTermError(ValidationError):
+    def __init__(self,
+                 value: str,
+                 term: PTerm):
+        super().__init__(value)
+        self.term: dict = term.specs
+        self.term_kind: TermKind = term.kind
+        self.collection_id: str = term.collection.id
+    def accept(self, visitor: ValidationErrorVisitor) -> Any:
+        return visitor.visit_project_term_error(self)
+class ValidationReport:
+    def __init__(self,
+                 given_expression: str,
+                 errors: list[ValidationError]):
+        self.expression: str = given_expression
+        self.errors: list[ValidationError] = errors
+        self.nb_errors = len(self.errors) if self.errors else 0
+        self.validated: bool = False if errors else True
+        self.message = f"'{self.expression}' has {self.nb_errors} error(s)"
+    def __len__(self) -> int:
+        return self.nb_errors
+    def __bool__(self) -> bool:
+        return self.validated
+    def __repr__(self) -> str:
+        return self.message

esgvoc/api/search.py ADDED Viewed

@@ -0,0 +1,92 @@
+from dataclasses import dataclass
+from enum import Enum
+from pydantic import BaseModel
+from sqlalchemy import ColumnElement, func
+from sqlmodel import col
+@dataclass
+class MatchingTerm:
+    project_id: str
+    collection_id: str
+    term_id: str
+class SearchType(Enum):
+    EXACT = ("exact",)
+    LIKE = ("like",)  # can interpret %
+    STARTS_WITH = ("starts_with",)  # can interpret %
+    ENDS_WITH = "ends_with"  # can interpret %
+    REGEX = ("regex",)
+class SearchSettings(BaseModel):
+    type: SearchType = SearchType.EXACT
+    case_sensitive: bool = True
+    not_operator: bool = False
+def create_str_comparison_expression(field: str,
+                                     value: str,
+                                     settings: SearchSettings|None) -> ColumnElement:
+    '''
+    SQLite LIKE is case insensitive (and so STARTS/ENDS_WITH which are implemented with LIKE).
+    So the case sensitive LIKE is implemented with REGEX.
+    The i versions of SQLAlchemy operators (icontains, etc.) are not useful
+    (but other dbs than SQLite should use them).
+    If the provided `settings` is None, this functions returns an exact search expression.
+    '''
+    does_wild_cards_in_value_have_to_be_interpreted = False
+    # Shortcut.
+    if settings is None:
+        return col(field).is_(other=value)
+    else:
+        match settings.type:
+            # Early return because not operator is not implement with tilde symbol.
+            case SearchType.EXACT:
+                if settings.case_sensitive:
+                    if settings.not_operator:
+                        return col(field).is_not(other=value)
+                    else:
+                        return col(field).is_(other=value)
+                else:
+                    if settings.not_operator:
+                        return func.lower(field) != func.lower(value)
+                    else:
+                        return func.lower(field) == func.lower(value)
+            case SearchType.LIKE:
+                if settings.case_sensitive:
+                    result = col(field).regexp_match(pattern=f".*{value}.*")
+                else:
+                    result = col(field).contains(
+                        other=value,
+                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
+                    )
+            case SearchType.STARTS_WITH:
+                if settings.case_sensitive:
+                    result = col(field).regexp_match(pattern=f"^{value}.*")
+                else:
+                    result = col(field).startswith(
+                        other=value,
+                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
+                    )
+            case SearchType.ENDS_WITH:
+                if settings.case_sensitive:
+                    result = col(field).regexp_match(pattern=f"{value}$")
+                else:
+                    result = col(field).endswith(
+                        other=value,
+                        autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
+                    )
+            case SearchType.REGEX:
+                if settings.case_sensitive:
+                    result = col(field).regexp_match(pattern=value)
+                else:
+                    raise NotImplementedError(
+                        "regex string comparison case insensitive is not implemented"
+                    )
+        if settings.not_operator:
+            return ~result
+        else:
+            return result

esgvoc/api/universe.py ADDED Viewed

@@ -0,0 +1,218 @@
+from typing import Sequence
+from esgvoc.api._utils import (get_universe_session,
+                               instantiate_pydantic_terms)
+from esgvoc.api.search import SearchSettings, create_str_comparison_expression
+from esgvoc.core.db.models.universe import DataDescriptor, UTerm
+from pydantic import BaseModel
+from sqlmodel import Session, select
+def _find_terms_in_data_descriptor(data_descriptor_id: str,
+                                   term_id: str,
+                                   session: Session,
+                                   settings: SearchSettings|None) -> Sequence[UTerm]:
+    """Settings only apply on the term_id comparison."""
+    where_expression = create_str_comparison_expression(field=UTerm.id,
+                                                        value=term_id,
+                                                        settings=settings)
+    statement = select(UTerm).join(DataDescriptor).where(DataDescriptor.id==data_descriptor_id,
+                                                         where_expression)
+    results = session.exec(statement)
+    result = results.all()
+    return result
+def find_terms_in_data_descriptor(data_descriptor_id: str,
+                                  term_id: str,
+                                  settings: SearchSettings|None = None) \
+                                     -> list[BaseModel]:
+    """
+    Finds one or more terms in the given data descriptor based on the specified search settings.
+    This function performs an exact match on the `data_descriptor_id` and
+    does **not** search for similar or related descriptors.
+    The given `term_id` is searched according to the search type specified in
+    the parameter `settings`,
+    which allows a flexible matching (e.g., `LIKE` may return multiple results).
+    If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
+    If any of the provided ids (`data_descriptor_id` or `term_id`) is not found, the function
+    returns an empty list.
+    Behavior based on search type:
+    - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
+    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more Pydantic term
+      instances in the list.
+    :param data_descriptor_id: A data descriptor id
+    :type data_descriptor_id: str
+    :param term_id: A term id to be found
+    :type term_id: str
+    :param settings: The search settings
+    :type settings: SearchSettings|None
+    :returns: A list of Pydantic model term instances.
+    Returns an empty list if no matches are found.
+    :rtype: list[BaseModel]
+    """
+    result: list[BaseModel] = list()
+    with get_universe_session() as session:
+        terms = _find_terms_in_data_descriptor(data_descriptor_id, term_id, session, settings)
+        instantiate_pydantic_terms(terms, result)
+    return result
+def _find_terms_in_universe(term_id: str,
+                            session: Session,
+                            settings: SearchSettings|None) -> Sequence[UTerm]:
+    where_expression = create_str_comparison_expression(field=UTerm.id,
+                                                        value=term_id,
+                                                        settings=settings)
+    statement = select(UTerm).where(where_expression)
+    results = session.exec(statement).all()
+    return results
+def find_terms_in_universe(term_id: str,
+                           settings: SearchSettings|None = None) \
+                              -> list[BaseModel]:
+    """
+    Finds one or more terms of the universe.
+    The given `term_id` is searched according to the search type specified in
+    the parameter `settings`,
+    which allows a flexible matching (e.g., `LIKE` may return multiple results).
+    If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
+    Terms are unique within a data descriptor but may have some synonyms in the universe.
+    If the provided `term_id` is not found, the function returns an empty list.
+    :param term_id: A term id to be found
+    :type term_id: str
+    :param settings: The search settings
+    :type settings: SearchSettings|None
+    :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
+    :rtype: list[BaseModel]
+    """
+    result: list[BaseModel] = list()
+    with get_universe_session() as session:
+        terms = _find_terms_in_universe(term_id, session, settings)
+        instantiate_pydantic_terms(terms, result)
+    return result
+def _get_all_terms_in_data_descriptor(data_descriptor: DataDescriptor) -> list[BaseModel]:
+    result: list[BaseModel] = list()
+    instantiate_pydantic_terms(data_descriptor.terms, result)
+    return result
+def _find_data_descriptors_in_universe(data_descriptor_id: str,
+                                       session: Session,
+                                       settings: SearchSettings|None) -> Sequence[DataDescriptor]:
+    where_expression = create_str_comparison_expression(field=DataDescriptor.id,
+                                                        value=data_descriptor_id,
+                                                        settings=settings)
+    statement = select(DataDescriptor).where(where_expression)
+    results = session.exec(statement)
+    result = results.all()
+    return result
+def get_all_terms_in_data_descriptor(data_descriptor_id: str) \
+                                        -> list[BaseModel]:
+    """
+    Gets all the terms of the given data descriptor.
+    This function performs an exact match on the `data_descriptor_id` and does **not** search
+    for similar or related descriptors.
+    If the provided `data_descriptor_id` is not found, the function returns an empty list.
+    :param data_descriptor_id: A data descriptor id
+    :type data_descriptor_id: str
+    :returns: a list of Pydantic term instances. Returns an empty list if no matches are found.
+    :rtype: list[BaseModel]
+    """
+    with get_universe_session() as session:
+        data_descriptors = _find_data_descriptors_in_universe(data_descriptor_id,
+                                                              session,
+                                                              None)
+        if data_descriptors:
+            data_descriptor = data_descriptors[0]
+            result = _get_all_terms_in_data_descriptor(data_descriptor)
+        else:
+            result = list()
+    return result
+def find_data_descriptors_in_universe(data_descriptor_id: str,
+                                      settings: SearchSettings|None = None) \
+                                        -> list[dict]:
+    """
+    Finds one or more data descriptor of the universe, based on the specified search settings.
+    The given `data_descriptor_id` is searched according to the search type specified in
+    the parameter `settings`,
+    which allows a flexible matching (e.g., `LIKE` may return multiple results).
+    If the parameter `settings` is `None`, this function performs an exact match on
+    the `data_descriptor_id`.
+    If the provided `data_descriptor_id` is not found, the function returns an empty list.
+    Behavior based on search type:
+    - `EXACT` and absence of `settings`: returns zero or one data descriptor context in the list.
+    - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
+      data descriptor contexts in the list.
+    :param data_descriptor_id: A data descriptor id to be found
+    :type data_descriptor_id: str
+    :param settings: The search settings
+    :type settings: SearchSettings|None
+    :returns: A list of data descriptor contexts. Returns an empty list if no matches are found.
+    :rtype: list[dict]
+    """
+    result = list()
+    with get_universe_session() as session:
+        data_descriptors = _find_data_descriptors_in_universe(data_descriptor_id,
+                                                              session,
+                                                              settings)
+        for data_descriptor in data_descriptors:
+            result.append(data_descriptor.context)
+    return result
+def _get_all_data_descriptors_in_universe(session: Session) -> Sequence[DataDescriptor]:
+    statement = select(DataDescriptor)
+    data_descriptors = session.exec(statement)
+    result = data_descriptors.all()
+    return result
+def get_all_data_descriptors_in_universe() -> list[str]:
+    """
+    Gets all the data descriptors of the universe.
+    :returns: A list of data descriptor ids.
+    :rtype: list[str]
+    """
+    result = list()
+    with get_universe_session() as session:
+        data_descriptors = _get_all_data_descriptors_in_universe(session)
+        for data_descriptor in data_descriptors:
+            result.append(data_descriptor.id)
+    return result
+def get_all_terms_in_universe() -> list[BaseModel]:
+    """
+    Gets all the terms of the universe.
+    Terms are unique within a data descriptor but may have some synonyms in the universe.
+    :returns: A list of Pydantic term instances.
+    :rtype: list[BaseModel]
+    """
+    result = list()
+    with get_universe_session() as session:
+        data_descriptors = _get_all_data_descriptors_in_universe(session)
+        for data_descriptor in data_descriptors:
+            # Term may have some synonyms within the whole universe.
+            terms = _get_all_terms_in_data_descriptor(data_descriptor)
+            result.extend(terms)
+    return result
+if __name__ == "__main__":
+    print(find_terms_in_data_descriptor('institution', 'ipsl'))

esgvoc/apps/drs/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from esgvoc.apps.drs.models import (DrsType,
+                                    DrsPartType,
+                                    DrsConstant,
+                                    DrsCollection,
+                                    DrsPart,
+                                    DrsSpecification,
+                                    ProjectSpecs)
+__all__ = ["DrsType",
+           "DrsPartType",
+           "DrsConstant",
+           "DrsCollection",
+           "DrsPart",
+           "DrsSpecification",
+           "ProjectSpecs"]

esgvoc/apps/drs/models.py ADDED Viewed

@@ -0,0 +1,43 @@
+from enum import Enum
+from typing import Annotated, Literal
+from pydantic import BaseModel, ConfigDict, Field
+class DrsType(str, Enum):
+    directory = "directory"
+    filename = "filename"
+    dataset_id = "dataset_id"
+class DrsPartType(str, Enum):
+    constant = "constant"
+    collection = "collection"
+class DrsConstant(BaseModel):
+    value: str
+    kind: Literal[DrsPartType.constant] = DrsPartType.constant
+class DrsCollection(BaseModel):
+    collection_id: str
+    is_required: bool
+    kind: Literal[DrsPartType.collection] = DrsPartType.collection
+DrsPart = Annotated[DrsConstant | DrsCollection, Field(discriminator="kind")]
+class DrsSpecification(BaseModel):
+    type: DrsType
+    separator: str
+    properties: dict|None = None
+    parts: list[DrsPart]
+class ProjectSpecs(BaseModel):
+    project_id: str
+    description: str
+    drs_specs: list[DrsSpecification]
+    model_config = ConfigDict(extra = "allow")

esgvoc/apps/drs/parser.py ADDED Viewed

@@ -0,0 +1,27 @@
+import logging
+from esgvoc.apps.drs.models import ProjectSpecs
+import esgvoc.api.projects as projects
+_LOGGER = logging.getLogger("drs")
+def parse_project_specs(project_id: str) -> ProjectSpecs:
+    project_specs = projects.find_project(project_id)
+    if not project_specs:
+        msg = f'Unable to find project {project_id}'
+        _LOGGER.fatal(msg)
+        raise ValueError(msg)
+    try:
+        result = ProjectSpecs(**project_specs)
+    except Exception as e:
+        msg = f'Unable to read specs in project {project_id}'
+        _LOGGER.fatal(msg)
+        raise RuntimeError(msg) from e
+    return result
+if __name__ == "__main__":
+    drs_specs = parse_project_specs('cmip6plus').drs_specs
+    print(drs_specs[1])

esgvoc/cli/config.py ADDED Viewed

@@ -0,0 +1,79 @@
+import json
+from rich.syntax import Syntax
+import typer
+from esgvoc.core.service.settings import SETTINGS_FILE, ServiceSettings, load_settings
+from rich import print
+import toml
+app = typer.Typer()
+def get_nested_value(settings_dict: dict, key_path: str):
+    """Navigate through nested dictionary keys using dot-separated key paths."""
+    keys = key_path.split(".")
+    value = settings_dict
+    for key in keys:
+        value = value[key]
+    return value
+def set_nested_value(settings_dict: dict, key_path: str, new_value):
+    """Set a value in a nested dictionary using a dot-separated key path."""
+    keys = key_path.split(".")
+    sub_dict = settings_dict
+    for key in keys[:-1]:
+        sub_dict = sub_dict[key]
+    sub_dict[keys[-1]] = new_value
+    return settings_dict
+@app.command()
+def config(key: str |None = typer.Argument(None), value: str|None = typer.Argument(None)):
+    """
+    Manage configuration settings.
+    - With no arguments: display all settings.
+    - With one argument (key): display the value of the key.
+    - With two arguments (key and value): modify the key's value and save.
+    """
+    settings = load_settings()
+    if key is None:
+        # No key provided, print all settings
+        # typer.echo(settings.model_dump())
+        syntax = Syntax(toml.dumps(settings.model_dump()), "toml")
+        print(syntax)
+        return
+    if value is None:
+        # Key provided but no value, print the specific key's value
+        try:
+            selected_value = get_nested_value(json.loads(settings.model_dump_json()),key)
+            typer.echo(selected_value)
+        except KeyError:
+            try:
+                selected_value = get_nested_value(json.loads(settings.model_dump_json()),"projects."+key)
+                typer.echo(selected_value)
+                return
+            except KeyError:
+                pass
+            typer.echo(f"Key '{key}' not found in settings.")
+        return
+    # Modify the key's value
+    try :
+        selected_value = get_nested_value(json.loads(settings.model_dump_json()),key)
+    except Exception:
+        key = "projects."+key
+    try :
+        selected_value = get_nested_value(json.loads(settings.model_dump_json()),key)
+        if selected_value:
+            new_settings_dict = set_nested_value(json.loads(settings.model_dump_json()),key, value )
+            new_settings = ServiceSettings(**new_settings_dict)
+            new_settings.save_to_file(str(SETTINGS_FILE)) #TODO improved that .. remove SETTINGS_FILE dependancy
+            # save_settings(new_settings)
+            typer.echo(f"New settings {new_settings.model_dump_json(indent=4)}")
+            typer.echo(f"Updated '{key}' to '{value}'.")
+        else:
+            typer.echo(f"Key '{key}' not found in settings.")
+    except Exception as e:
+        typer.echo(f"Error updating settings: {e}")