PyPI - esgvoc - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

esgvoc 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (29) hide show

esgvoc/__init__.py +1 -1
esgvoc/api/__init__.py +95 -60
esgvoc/api/project_specs.py +3 -2
esgvoc/api/projects.py +671 -406
esgvoc/api/py.typed +0 -0
esgvoc/api/report.py +12 -8
esgvoc/api/search.py +141 -98
esgvoc/api/universe.py +353 -157
esgvoc/apps/drs/constants.py +1 -1
esgvoc/apps/drs/generator.py +51 -69
esgvoc/apps/drs/report.py +60 -15
esgvoc/apps/drs/validator.py +60 -71
esgvoc/apps/py.typed +0 -0
esgvoc/cli/drs.py +3 -2
esgvoc/cli/get.py +9 -6
esgvoc/core/constants.py +1 -1
esgvoc/core/db/__init__.py +2 -4
esgvoc/core/db/connection.py +5 -3
esgvoc/core/db/models/project.py +50 -8
esgvoc/core/db/models/universe.py +48 -9
esgvoc/core/db/project_ingestion.py +60 -46
esgvoc/core/db/universe_ingestion.py +55 -27
esgvoc/core/exceptions.py +33 -0
{esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/METADATA +1 -1
{esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/RECORD +28 -26
esgvoc/api/_utils.py +0 -53
{esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
{esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
{esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/apps/drs/validator.py CHANGED Viewed

@@ -2,17 +2,33 @@ from typing import cast
 import esgvoc.api.projects as projects
 import esgvoc.apps.drs.constants as constants
-from esgvoc.api import APIException
-from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPart,
-                                      DrsPartKind, DrsSpecification, DrsType,
-                                      ProjectSpecs)
-from esgvoc.apps.drs.report import (BlankTerm, ComplianceIssue, DrsIssue,
-                                    DrsValidationReport, ExtraChar,
-                                    ExtraSeparator, ExtraTerm,
-                                    FileNameExtensionIssue, InvalidTerm,
-                                    MissingTerm, ParsingIssue, Space,
-                                    Unparsable, ValidationError,
-                                    ValidationWarning)
+from esgvoc.api.project_specs import (
+    DrsCollection,
+    DrsConstant,
+    DrsPart,
+    DrsPartKind,
+    DrsSpecification,
+    DrsType,
+    ProjectSpecs,
+)
+from esgvoc.apps.drs.report import (
+    BlankTerm,
+    ComplianceIssue,
+    DrsIssue,
+    DrsValidationReport,
+    ExtraChar,
+    ExtraSeparator,
+    ExtraTerm,
+    FileNameExtensionIssue,
+    InvalidTerm,
+    MissingTerm,
+    ParsingIssue,
+    Space,
+    Unparsable,
+    ValidationError,
+    ValidationWarning,
+)
+from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError
 class DrsApplication:
@@ -25,9 +41,9 @@ class DrsApplication:
         """The project id."""
         self.pedantic: bool = pedantic
         """Same as the option of GCC: turn warnings into errors. Default False."""
-        project_specs: ProjectSpecs|None = projects.find_project(project_id)
+        project_specs: ProjectSpecs | None = projects.get_project(project_id)
         if not project_specs:
-            raise APIException(f'unable to find project {project_id}')
+            raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
         for specs in project_specs.drs_specs:
             match specs.type:
                 case DrsType.DIRECTORY:
@@ -40,7 +56,7 @@ class DrsApplication:
                     self.dataset_id_specs: DrsSpecification = specs
                     """The DRS dataset id specs of the project."""
                 case _:
-                    raise RuntimeError(f'unsupported DRS specs type {specs.type}')
+                    raise EsgvocDbError(f"unsupported DRS specs type '{specs.type}'")
     def _get_full_file_name_extension(self) -> str:
         """
@@ -55,8 +71,8 @@ class DrsApplication:
             full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
                              specs.properties[constants.FILE_NAME_EXTENSION_KEY]
         else:
-            raise RuntimeError('missing properties in the DRS file name specifications of the ' +
-                               f'project {self.project_id}')
+            raise EsgvocDbError('missing properties in the DRS file name specifications of the ' +
+                                f"project '{self.project_id}'")
         return full_extension
@@ -66,7 +82,7 @@ class DrsValidator(DrsApplication):
     """
     def validate_directory(self, drs_expression: str,
-                           prefix: str|None = None) -> DrsValidationReport:
+                           prefix: str | None = None) -> DrsValidationReport:
         """
         Validate a DRS directory expression.
@@ -112,7 +128,7 @@ class DrsValidator(DrsApplication):
                                          [issue], [])
         return result
-    def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
+    def validate(self, drs_expression: str, drs_type: DrsType | str) -> DrsValidationReport:
         """
         Validate a DRS expression.
@@ -131,14 +147,14 @@ class DrsValidator(DrsApplication):
             case DrsType.DATASET_ID:
                 return self.validate_dataset_id(drs_expression=drs_expression)
             case _:
-                raise RuntimeError(f'unsupported drs type {drs_type}')
+                raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
     def _parse(self,
                drs_expression: str,
                separator: str,
-               drs_type: DrsType) -> tuple[list[str]|None,  # terms
+               drs_type: DrsType) -> tuple[list[str] | None,  # terms
                                            list[DrsIssue],  # Errors
-                                           list[DrsIssue]]: # Warnings
+                                           list[DrsIssue]]:  # Warnings
         errors: list[DrsIssue] = list()
         warnings: list[DrsIssue] = list()
         cursor_offset = 0
@@ -160,7 +176,7 @@ class DrsValidator(DrsApplication):
         terms = drs_expression.split(separator)
         if len(terms) < 2:
             errors.append(Unparsable(expected_drs_type=drs_type))
-            return None, errors, warnings # Early exit
+            return None, errors, warnings  # Early exit
         max_term_index = len(terms)
         cursor_position = initial_cursor_position = len(drs_expression) + 1
         has_white_term = False
@@ -178,7 +194,10 @@ class DrsValidator(DrsApplication):
             column = cursor_position+cursor_offset
             if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
                 issue = ExtraSeparator(column=column)
-                warnings.append(issue)
+                if self.pedantic:
+                    errors.append(issue)
+                else:
+                    warnings.append(issue)
             else:
                 issue = ExtraChar(column=column)
                 errors.append(issue)
@@ -188,7 +207,7 @@ class DrsValidator(DrsApplication):
             if not term:
                 column = cursor_position + cursor_offset
                 issue = ExtraSeparator(column=column)
-                if (drs_type != DrsType.DIRECTORY) or self.pedantic or (index == 0):
+                if self.pedantic or drs_type != DrsType.DIRECTORY or index == 0:
                     errors.append(issue)
                 else:
                     warnings.append(issue)
@@ -200,10 +219,10 @@ class DrsValidator(DrsApplication):
                 del terms[index]
             cursor_position -= len_term + 1
-        # Mypy doesn't understand that ParsingIssues are DrsIssues...
-        sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
-        sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
-        return terms, sorted_errors, sorted_warnings # type: ignore
+        # Mypy doesn't understand that ParsingIssues are DrsIssues...
+        sorted_errors = DrsValidator._sort_parser_issues(errors)  # type: ignore
+        sorted_warnings = DrsValidator._sort_parser_issues(warnings)  # type: ignore
+        return terms, sorted_errors, sorted_warnings  # type: ignore
     @staticmethod
     def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
@@ -213,13 +232,9 @@ class DrsValidator(DrsApplication):
         match part.kind:
             case DrsPartKind.COLLECTION:
                 casted_part: DrsCollection = cast(DrsCollection, part)
-                try:
-                    matching_terms = projects.valid_term_in_collection(term,
-                                                                       self.project_id,
-                                                                       casted_part.collection_id)
-                except Exception as e:
-                    msg = f'problem while validating term: {e}.Abort.'
-                    raise APIException(msg) from e
+                matching_terms = projects.valid_term_in_collection(term,
+                                                                   self.project_id,
+                                                                   casted_part.collection_id)
                 if len(matching_terms) > 0:
                     return True
                 else:
@@ -228,7 +243,7 @@ class DrsValidator(DrsApplication):
                 part_casted: DrsConstant = cast(DrsConstant, part)
                 return part_casted.value != term
             case _:
-                raise RuntimeError(f'unsupported DRS specs part type {part.kind}')
+                raise EsgvocDbError(f"unsupported DRS specs part type '{part.kind}'")
     def _create_report(self,
                        type: DrsType,
@@ -245,7 +260,7 @@ class DrsValidator(DrsApplication):
                   specs: DrsSpecification) -> DrsValidationReport:
         terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
         if not terms:
-            return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
+            return self._create_report(specs.type, drs_expression, errors, warnings)  # Early exit.
         term_index = 0
         term_max_index = len(terms)
         part_index = 0
@@ -259,27 +274,27 @@ class DrsValidator(DrsApplication):
                 part_index += 1
                 matching_code_mapping[part.__str__()] = 0
             elif part.kind == DrsPartKind.CONSTANT or \
-                 cast(DrsCollection, part).is_required:
+                 cast(DrsCollection, part).is_required:  # noqa E127
                 issue: ComplianceIssue = InvalidTerm(term=term,
-                                                      term_position=term_index+1,
-                                                      collection_id_or_constant_value=str(part))
+                                                     term_position=term_index+1,
+                                                     collection_id_or_constant_value=str(part))
                 errors.append(issue)
                 matching_code_mapping[part.__str__()] = 1
                 term_index += 1
                 part_index += 1
-            else: # The part is not required so try to match the term with the next part.
+            else:  # The part is not required so try to match the term with the next part.
                 part_index += 1
                 matching_code_mapping[part.__str__()] = -1
             if term_index == term_max_index:
                 break
         # Cases:
         # - All terms and collections have been processed.
-        # - Not enough term to process all collections.
+        # - Not enough term to process all collections.
         # - Extra terms left whereas all collections have been processed:
         #   + The last collections are required => report extra terms.
         #   + The last collections are not required and these terms were not validated by them.
         #     => Should report error even if the collections are not required.
-        if part_index < part_max_index: # Missing terms.
+        if part_index < part_max_index:  # Missing terms.
             for index in range(part_index, part_max_index):
                 part = specs.parts[index]
                 issue = MissingTerm(collection_id=str(part), collection_position=index+1)
@@ -288,43 +303,17 @@ class DrsValidator(DrsApplication):
                     errors.append(issue)
                 else:
                     warnings.append(issue)
-        elif term_index < term_max_index: # Extra terms.
+        elif term_index < term_max_index:  # Extra terms.
             part_index -= term_max_index - term_index
             for index in range(term_index, term_max_index):
                 term = terms[index]
                 part = specs.parts[part_index]
                 if part.kind != DrsPartKind.CONSTANT           and \
                    (not cast(DrsCollection, part).is_required) and \
-                    matching_code_mapping[part.__str__()] < 0:
+                    matching_code_mapping[part.__str__()] < 0: # noqa E125
                     issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
                 else:
                     issue = ExtraTerm(term=term, term_position=index, collection_id=None)
                 errors.append(issue)
                 part_index += 1
         return self._create_report(specs.type, drs_expression, errors, warnings)
-if __name__ == "__main__":
-    project_id = 'cmip6plus'
-    validator = DrsValidator(project_id)
-    drs_expressions = [
-".CMIP6Plus.CMIP.IPSL.  .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
-]
-    import time
-    for drs_expression in drs_expressions:
-        start_time = time.perf_counter_ns()
-        report = validator.validate_dataset_id(drs_expression)
-        stop_time = time.perf_counter_ns()
-        print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
-        if report.nb_errors > 0:
-            print(f'error(s): {report.nb_errors}')
-            for error in report.errors:
-                print(error)
-        else:
-            print('error(s): 0')
-        if report.nb_warnings > 0:
-            print(f'warning(s): {report.nb_warnings}')
-            for warning in report.warnings:
-                print(warning)
-        else:
-            print('warning(s): 0')

esgvoc/apps/py.typed ADDED Viewed

File without changes

esgvoc/cli/drs.py CHANGED Viewed

@@ -10,6 +10,7 @@ import esgvoc.api as ev
 from esgvoc.apps.drs.generator import DrsGenerator
 from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
 from esgvoc.apps.drs.validator import DrsValidator
+from esgvoc.core.exceptions import EsgvocValueError
 app = typer.Typer()
 console = Console()
@@ -108,7 +109,7 @@ def drsvalid(
             case "dataset":
                 report = validator.validate_dataset_id(string)
             case _:
-                raise RuntimeError("drstype is not known")
+                raise EsgvocValueError(f"unsupported drs type '{current_drs_type}'")
         reports.append(report)
     if verbose:
@@ -213,7 +214,7 @@ def drsgen(
             case "dataset":
                 report = generator.generate_dataset_id_from_bag_of_terms(bag_of_terms)
             case _:
-                raise RuntimeError("drstype is not known")
+                raise EsgvocValueError(f"unsupported drs type '{current_drs_type}'")
         generated_reports.append(report)
     if verbose:

esgvoc/cli/get.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from typing import Any
-from esgvoc.api.projects import find_terms_in_collection, find_terms_in_project, get_all_collections_in_project, get_all_projects, get_all_terms_in_collection
-from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor
+from esgvoc.api.projects import get_all_collections_in_project, get_all_projects, \
+    get_all_terms_in_collection, get_term_in_project, get_term_in_collection
+from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, \
+    get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor, get_term_in_data_descriptor, \
+    get_term_in_universe
 from pydantic import BaseModel
 from requests import logging
 from rich.table import Table
@@ -28,11 +31,11 @@ def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
     _LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
     if data_descriptor_id and term_id:
-        return find_terms_in_data_descriptor(data_descriptor_id,term_id,options)
+        return get_term_in_data_descriptor(data_descriptor_id, term_id, options)
         # BaseModel|dict[str: BaseModel]|None:
     elif term_id:
-        return find_terms_in_universe(term_id,options)
+        return get_term_in_universe(term_id, options)
         # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
@@ -48,11 +51,11 @@ def handle_project(project_id:str,collection_id:str|None,term_id:str|None,option
     _LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
     if project_id and collection_id and term_id:
-        return find_terms_in_collection(project_id,collection_id,term_id)
+        return get_term_in_collection(project_id, collection_id, term_id, options)
         # BaseModel|dict[str: BaseModel]|None:
     elif term_id:
-        return find_terms_in_project(project_id, term_id,options)
+        return get_term_in_project(project_id, term_id,options)
         # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:

esgvoc/core/constants.py CHANGED Viewed

@@ -10,4 +10,4 @@ PATTERN_JSON_KEY = 'regex'
 TERM_TYPE_JSON_KEY = 'type'
 DRS_SPECS_JSON_KEY = 'drs_name'
 SQLITE_FIRST_PK = 1
-DATA_DESCRIPTOR_JSON_KEY = "@base"
+DATA_DESCRIPTOR_JSON_KEY = "@base"

esgvoc/core/db/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from esgvoc.core.db.connection import read_json_file
-from esgvoc.core.db.connection import DBConnection
+from esgvoc.core.db.connection import DBConnection, read_json_file
-__all__ = ["DBConnection", "read_json_file"]
+__all__ = ["DBConnection", "read_json_file"]

esgvoc/core/db/connection.py CHANGED Viewed

@@ -1,11 +1,13 @@
-from pathlib import Path
 import json
+from pathlib import Path
 from sqlalchemy import Engine
 from sqlmodel import Session, create_engine
 class DBConnection:
     SQLITE_URL_PREFIX = 'sqlite://'
     def __init__(self, db_file_path: Path, echo: bool = False) -> None:
         self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
         self.name = db_file_path.stem
@@ -20,7 +22,7 @@ class DBConnection:
     def create_session(self) -> Session:
         return Session(self.engine)
-    def get_name(self) -> str|None:
+    def get_name(self) -> str | None:
         return self.name
     def get_file_path(self) -> Path:
@@ -28,4 +30,4 @@ class DBConnection:
 def read_json_file(json_file_path: Path) -> dict:
-    return json.loads(json_file_path.read_text())
+    return json.loads(json_file_path.read_text())

esgvoc/core/db/models/project.py CHANGED Viewed

@@ -2,11 +2,14 @@ import logging
 from pathlib import Path
 import sqlalchemy as sa
+from sqlalchemy import text
 from sqlalchemy.dialects.sqlite import JSON
 from sqlmodel import Column, Field, Relationship, SQLModel
 import esgvoc.core.db.connection as db
 from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
+from esgvoc.core.exceptions import EsgvocDbError
 _LOGGER = logging.getLogger("project_db_creation")
@@ -27,34 +30,73 @@ class Collection(SQLModel, PkMixin, IdMixin, table=True):
     term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
+# Well, the following instructions are not data duplication. It is more building an index.
+# Read: https://sqlite.org/fts5.html
+class PCollectionFTS5(SQLModel, PkMixin, IdMixin, table=True):
+    __tablename__ = "pcollections_fts5"
+    data_descriptor_id: str
+    context: dict = Field(sa_column=sa.Column(JSON))
+    project_pk: int | None = Field(default=None, foreign_key="projects.pk")
+    term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
 class PTerm(SQLModel, PkMixin, IdMixin, table=True):
     __tablename__ = "pterms"
     specs: dict = Field(sa_column=sa.Column(JSON))
     kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
     collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
     collection: Collection = Relationship(back_populates="terms")
-    __table_args__ = (sa.Index(
-             "drs_name_index", specs.sa_column["drs_name"]
+    __table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), )  # type: ignore
+# Well, the following instructions are not data duplication. It is more building an index.
+# Read: https://sqlite.org/fts5.html
+class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
+    __tablename__ = "pterms_fts5"
+    specs: dict = Field(sa_column=sa.Column(JSON))
+    kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
+    collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
-         ),)
 def project_create_db(db_file_path: Path):
     try:
         connection = db.DBConnection(db_file_path)
     except Exception as e:
-        msg = f'Unable to create SQlite file at {db_file_path}. Abort.'
+        msg = f'unable to create SQlite file at {db_file_path}'
         _LOGGER.fatal(msg)
-        raise RuntimeError(msg) from e
+        raise EsgvocDbError(msg) from e
     try:
+        # Do not include pterms_fts5 table: it is build from a raw SQL query.
         tables_to_be_created = [SQLModel.metadata.tables['projects'],
                                 SQLModel.metadata.tables['collections'],
                                 SQLModel.metadata.tables['pterms']]
         SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
     except Exception as e:
-        msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
+        msg = f'unable to create tables in SQLite database at {db_file_path}'
+        _LOGGER.fatal(msg)
+        raise EsgvocDbError(msg) from e
+    try:
+        with connection.create_session() as session:
+            sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING ' + \
+                        'fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk);'
+            session.exec(text(sql_query))  # type: ignore
+            session.commit()
+    except Exception as e:
+        msg = f'unable to create table pterms_fts5 for {db_file_path}'
+        _LOGGER.fatal(msg)
+        raise EsgvocDbError(msg) from e
+    try:
+        with connection.create_session() as session:
+            sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
+                        'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
+                        'term_kind, content=collections, content_rowid=pk);'
+            session.exec(text(sql_query))  # type: ignore
+            session.commit()
+    except Exception as e:
+        msg = f'unable to create table pcollections_fts5 for {db_file_path}'
         _LOGGER.fatal(msg)
-        raise RuntimeError(msg) from e
+        raise EsgvocDbError(msg) from e
 if __name__ == "__main__":
-    pass
+    pass

esgvoc/core/db/models/universe.py CHANGED Viewed

@@ -2,11 +2,13 @@ import logging
 from pathlib import Path
 import sqlalchemy as sa
+from sqlalchemy import text
 from sqlalchemy.dialects.sqlite import JSON
 from sqlmodel import Column, Field, Relationship, SQLModel
 import esgvoc.core.db.connection as db
 from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
+from esgvoc.core.exceptions import EsgvocDbError
 _LOGGER = logging.getLogger("universe_db_creation")
@@ -18,7 +20,7 @@ class Universe(SQLModel, PkMixin, table=True):
 class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
-    __tablename__ = "data_descriptors"
+    __tablename__ = "udata_descriptors"
     context: dict = Field(sa_column=sa.Column(JSON))
     universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
     universe: Universe = Relationship(back_populates="data_descriptors")
@@ -26,33 +28,70 @@ class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
     term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
+# Well, the following instructions are not data duplication. It is more building an index.
+# Read: https://sqlite.org/fts5.html
+class UDataDescriptorFTS5(SQLModel, PkMixin, IdMixin, table=True):
+    __tablename__ = "udata_descriptors_fts5"
+    context: dict = Field(sa_column=sa.Column(JSON))
+    universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
+    term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
 class UTerm(SQLModel, PkMixin, IdMixin, table=True):
     __tablename__ = "uterms"
     specs: dict = Field(sa_column=sa.Column(JSON))
     kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
-    data_descriptor_pk: int | None = Field(
-        default=None, foreign_key="data_descriptors.pk"
-    )
+    data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
     data_descriptor: UDataDescriptor = Relationship(back_populates="terms")
+# Well, the following instructions are not data duplication. It is more building an index.
+# Read: https://sqlite.org/fts5.html
+class UTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
+    __tablename__ = "uterms_fts5"
+    specs: dict = Field(sa_column=sa.Column(JSON))
+    kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
+    data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
 def universe_create_db(db_file_path: Path) -> None:
     try:
         connection = db.DBConnection(db_file_path)
     except Exception as e:
-        msg = f'Unable to create SQLite file at {db_file_path}. Abort.'
+        msg = f'unable to create SQLite file at {db_file_path}'
         _LOGGER.fatal(msg)
-        raise RuntimeError(msg) from e
+        raise EsgvocDbError(msg) from e
     try:
         # Avoid creating project tables.
         tables_to_be_created = [SQLModel.metadata.tables['uterms'],
-                                SQLModel.metadata.tables['data_descriptors'],
+                                SQLModel.metadata.tables['udata_descriptors'],
                                 SQLModel.metadata.tables['universes']]
         SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
     except Exception as e:
-        msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
+        msg = f'unable to create tables in SQLite database at {db_file_path}'
+        _LOGGER.fatal(msg)
+        raise EsgvocDbError(msg) from e
+    try:
+        with connection.create_session() as session:
+            sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
+                        'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk);'
+            session.exec(text(sql_query))  # type: ignore
+            session.commit()
+    except Exception as e:
+        msg = f'unable to create table uterms_fts5 for {db_file_path}'
+        _LOGGER.fatal(msg)
+        raise EsgvocDbError(msg) from e
+    try:
+        with connection.create_session() as session:
+            sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
+                        'fts5(pk, id, universe_pk, context, ' + \
+                        'term_kind, content=udata_descriptors, content_rowid=pk);'
+            session.exec(text(sql_query))  # type: ignore
+            session.commit()
+    except Exception as e:
+        msg = f'unable to create table udata_descriptors_fts5 for {db_file_path}'
         _LOGGER.fatal(msg)
-        raise RuntimeError(msg) from e
+        raise EsgvocDbError(msg) from e
 if __name__ == "__main__":

esgvoc 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl