esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +96 -72
  3. esgvoc/api/data_descriptors/__init__.py +18 -12
  4. esgvoc/api/data_descriptors/activity.py +8 -45
  5. esgvoc/api/data_descriptors/area_label.py +6 -0
  6. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  8. esgvoc/api/data_descriptors/consortium.py +16 -56
  9. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  10. esgvoc/api/data_descriptors/date.py +3 -46
  11. esgvoc/api/data_descriptors/directory_date.py +3 -46
  12. esgvoc/api/data_descriptors/experiment.py +19 -54
  13. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  14. esgvoc/api/data_descriptors/frequency.py +6 -43
  15. esgvoc/api/data_descriptors/grid_label.py +6 -44
  16. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  17. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  18. esgvoc/api/data_descriptors/institution.py +11 -54
  19. esgvoc/api/data_descriptors/license.py +4 -44
  20. esgvoc/api/data_descriptors/mip_era.py +6 -44
  21. esgvoc/api/data_descriptors/model_component.py +7 -45
  22. esgvoc/api/data_descriptors/organisation.py +3 -40
  23. esgvoc/api/data_descriptors/physic_index.py +3 -45
  24. esgvoc/api/data_descriptors/product.py +4 -43
  25. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  26. esgvoc/api/data_descriptors/realm.py +4 -42
  27. esgvoc/api/data_descriptors/resolution.py +6 -44
  28. esgvoc/api/data_descriptors/source.py +18 -53
  29. esgvoc/api/data_descriptors/source_type.py +3 -41
  30. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  31. esgvoc/api/data_descriptors/table.py +6 -48
  32. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  33. esgvoc/api/data_descriptors/time_range.py +3 -27
  34. esgvoc/api/data_descriptors/variable.py +13 -71
  35. esgvoc/api/data_descriptors/variant_label.py +3 -47
  36. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  37. esgvoc/api/project_specs.py +3 -2
  38. esgvoc/api/projects.py +727 -446
  39. esgvoc/api/py.typed +0 -0
  40. esgvoc/api/report.py +29 -16
  41. esgvoc/api/search.py +140 -95
  42. esgvoc/api/universe.py +362 -156
  43. esgvoc/apps/__init__.py +3 -4
  44. esgvoc/apps/drs/constants.py +1 -1
  45. esgvoc/apps/drs/generator.py +185 -198
  46. esgvoc/apps/drs/report.py +272 -136
  47. esgvoc/apps/drs/validator.py +132 -145
  48. esgvoc/apps/py.typed +0 -0
  49. esgvoc/cli/drs.py +32 -21
  50. esgvoc/cli/get.py +35 -31
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +0 -2
  53. esgvoc/cli/status.py +5 -5
  54. esgvoc/cli/valid.py +40 -40
  55. esgvoc/core/constants.py +1 -1
  56. esgvoc/core/db/__init__.py +2 -4
  57. esgvoc/core/db/connection.py +5 -3
  58. esgvoc/core/db/models/project.py +50 -8
  59. esgvoc/core/db/models/universe.py +51 -12
  60. esgvoc/core/db/project_ingestion.py +60 -46
  61. esgvoc/core/db/universe_ingestion.py +58 -29
  62. esgvoc/core/exceptions.py +33 -0
  63. esgvoc/core/logging_handler.py +1 -1
  64. esgvoc/core/repo_fetcher.py +4 -3
  65. esgvoc/core/service/__init__.py +37 -5
  66. esgvoc/core/service/configuration/config_manager.py +188 -0
  67. esgvoc/core/service/configuration/setting.py +88 -0
  68. esgvoc/core/service/state.py +49 -32
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
  70. esgvoc-0.4.0.dist-info/RECORD +80 -0
  71. esgvoc/api/_utils.py +0 -39
  72. esgvoc/cli/config.py +0 -82
  73. esgvoc/core/service/settings.py +0 -73
  74. esgvoc/core/service/settings.toml +0 -17
  75. esgvoc/core/service/settings_default.toml +0 -17
  76. esgvoc-0.2.1.dist-info/RECORD +0 -73
  77. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
  79. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/cli/get.py CHANGED
@@ -1,7 +1,10 @@
1
1
 
2
2
  from typing import Any
3
- from esgvoc.api.projects import find_terms_in_collection, find_terms_in_project, get_all_collections_in_project, get_all_projects, get_all_terms_in_collection
4
- from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor
3
+ from esgvoc.api.projects import get_all_collections_in_project, get_all_projects, \
4
+ get_all_terms_in_collection, get_term_in_project, get_term_in_collection
5
+ from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, \
6
+ get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor, get_term_in_data_descriptor, \
7
+ get_term_in_universe
5
8
  from pydantic import BaseModel
6
9
  from requests import logging
7
10
  from rich.table import Table
@@ -19,7 +22,7 @@ def validate_key_format(key: str):
19
22
  """
20
23
  Validate if the key matches the XXXX:YYYY:ZZZZ format.
21
24
  """
22
- if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*$", key):
25
+ if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_.]*$", key):
23
26
  raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
24
27
  return key.split(":")
25
28
 
@@ -28,11 +31,11 @@ def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
28
31
  _LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
29
32
 
30
33
  if data_descriptor_id and term_id:
31
- return find_terms_in_data_descriptor(data_descriptor_id,term_id,options)
34
+ return get_term_in_data_descriptor(data_descriptor_id, term_id, options)
32
35
  # BaseModel|dict[str: BaseModel]|None:
33
36
 
34
37
  elif term_id:
35
- return find_terms_in_universe(term_id,options)
38
+ return get_term_in_universe(term_id, options)
36
39
  # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
37
40
 
38
41
 
@@ -48,11 +51,11 @@ def handle_project(project_id:str,collection_id:str|None,term_id:str|None,option
48
51
  _LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
49
52
 
50
53
  if project_id and collection_id and term_id:
51
- return find_terms_in_collection(project_id,collection_id,term_id)
54
+ return get_term_in_collection(project_id, collection_id, term_id, options)
52
55
  # BaseModel|dict[str: BaseModel]|None:
53
56
 
54
57
  elif term_id:
55
- return find_terms_in_project(project_id, term_id,options)
58
+ return get_term_in_project(project_id, term_id,options)
56
59
  # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
57
60
 
58
61
 
@@ -96,30 +99,31 @@ def display(data:Any):
96
99
  @app.command()
97
100
  def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format")):
98
101
  """
99
- Retrieve a specific value from the database system.
100
- This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
101
- and term in a structured format.
102
-
103
- Usage:
104
- `get <project>:<collection>:<term>`
105
-
106
- Arguments:
107
- <project> The name of the project to query. like `cmip6plus`
108
- <collection> The name of the collection in the specified database.
109
- <term> The name or term within the specified collection.
110
-
102
+ Retrieve a specific value from the database system.\n
103
+ This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
104
+ and term in a structured format.\n
105
+ \n
106
+
107
+ Usage:\n
108
+ `get <project>:<collection>:<term>`\n
109
+ \n
110
+ Arguments:\n
111
+ <project>\tThe project id to query. like `cmip6plus`\n
112
+ <collection>\tThe collection id in the specified database.\n
113
+ <term>\t\tThe term id within the specified collection.\n
114
+ \n
111
115
  Example:
112
- To retrieve the value from the "cmip6plus" project, under the "institution_id" column,
113
- in the term with the identifier "ipsl", you would use:
114
- `get cmip6plus:institution_id:ipsl`
115
- The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl`
116
- - to get list of available term from universe institution `:institution:`
117
-
118
- Notes:
119
- - Ensure data exist in your system before using this command (use status command to see whats available).
120
- - Use a colon (`:`) to separate the parts of the argument.
121
- - if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended.
122
-
116
+ To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
117
+ `get cmip6plus:institution_id:ipsl`\n
118
+ The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl` \n
119
+ - to get list of available term from universe institution `:institution:` \n
120
+ \n
121
+ \n
122
+ Notes:\n
123
+ - Ensure data exist in your system before using this command (use `esgvoc status` command to see whats available).\n
124
+ - Use a colon (`:`) to separate the parts of the argument. \n
125
+ - if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended. \n
126
+ \n
123
127
  """
124
128
  known_projects = get_all_projects()
125
129
 
@@ -133,7 +137,7 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
133
137
  if where == "" or where=="universe":
134
138
  res = handle_universe(what,who)
135
139
  elif where in known_projects:
136
- res = handle_project(where,what,who,{})
140
+ res = handle_project(where,what,who,None)
137
141
  else:
138
142
  res = handle_unknown(where,what,who)
139
143
 
esgvoc/cli/install.py CHANGED
@@ -1,14 +1,17 @@
1
1
  import typer
2
- from esgvoc.core.service import esg_voc
2
+ from esgvoc.core.service import current_state
3
3
 
4
4
  app = typer.Typer()
5
5
 
6
6
  @app.command()
7
7
  def install():
8
- """
9
- Command to clone and build necessary db with the latest available version
10
-
11
- """
12
- esg_voc.install()
13
-
14
-
8
+ """Initialize default config and apply settings"""
9
+ try:
10
+ typer.echo("Initialized default configuration")
11
+ current_state.synchronize_all()
12
+ except Exception as e:
13
+ typer.echo(f"Error during installation: {str(e)}", err=True)
14
+ raise typer.Exit(1)
15
+
16
+ if __name__ == "__main__":
17
+ app()
esgvoc/cli/main.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import typer
2
- from esgvoc.cli.config import app as config_app
3
2
  from esgvoc.cli.get import app as get_app
4
3
  from esgvoc.cli.status import app as status_app
5
4
  from esgvoc.cli.valid import app as valid_app
@@ -8,7 +7,6 @@ from esgvoc.cli.drs import app as drs_app
8
7
  app = typer.Typer()
9
8
 
10
9
  # Register the subcommands
11
- app.add_typer(config_app)
12
10
  app.add_typer(get_app)
13
11
  app.add_typer(status_app)
14
12
  app.add_typer(valid_app)
esgvoc/cli/status.py CHANGED
@@ -20,17 +20,17 @@ def status():
20
20
  i.e summary of version of usable ressources (between remote/cached)
21
21
 
22
22
  """
23
-
24
- service.state_service.get_state_summary()
23
+ assert(service.current_state is not None)
24
+ service.current_state.get_state_summary()
25
25
  #display(service.state_service.table())
26
26
 
27
27
 
28
28
  table = Table(show_header=False, show_lines=True)
29
29
 
30
30
  table.add_row("","Remote github repo","Local repository","Cache Database", style = "bright_green")
31
- table.add_row("Universe path",service.state_service.universe.github_repo,service.state_service.universe.local_path,service.state_service.universe.db_path, style = "white")
32
- table.add_row("Version",service.state_service.universe.github_version,service.state_service.universe.local_version,service.state_service.universe.db_version, style="bright_blue")
33
- for proj_name,proj in service.state_service.projects.items():
31
+ table.add_row("Universe path",service.current_state.universe.github_repo,service.current_state.universe.local_path,service.current_state.universe.db_path, style = "white")
32
+ table.add_row("Version",service.current_state.universe.github_version,service.current_state.universe.local_version,service.current_state.universe.db_version, style="bright_blue")
33
+ for proj_name,proj in service.current_state.projects.items():
34
34
  table.add_row(f"{proj_name} path",proj.github_repo,proj.local_path,proj.db_path, style="white")
35
35
  table.add_row("Version",proj.github_version,proj.local_version,proj.db_version,style ="bright_blue")
36
36
  display(table)
esgvoc/cli/valid.py CHANGED
@@ -38,52 +38,52 @@ def valid(
38
38
  verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results")
39
39
  ):
40
40
  """
41
- Validates one or more strings against specified Project:Collection:Term configurations.
42
-
43
- Depending on the provided key structure, the function performs different validation operations:
44
- - If all are None (e.g., "::"), validates the term across all projects (`valid_term_in_all_projects`).
45
- - If Term is None (e.g., "Project:Collection:"), validates the term in the specified collection (`valid_term_in_collection`).
46
- - If Term and Collection are None (e.g., "Project::"), validates the term in the specified project (`valid_term_in_project`).
47
- - If all are specified (e.g., "Project:Collection:Term"), validates the term exactly (`valid_term`).
48
-
49
- Parameters:
50
- strings_targets (List[str]): A list of validation pairs, where each pair consists of:
51
- - A string to validate.
52
- - A key in the form '<Project:Collection:Term>'.
53
- Usage :
54
- Valid one:
55
- esgvocab valid IPSL cmip6plus:institution_id:ipsl
56
- esgvocab valid IPSL cmip6plus:institution_id:
57
- esgvocab valid IPSL cmip6plus::
58
- esgvocab valid IPSL ::
59
-
60
- Unvalid one:
61
- esgvocab valid IPSL_invalid cmip6plus:institution_id:ipsl
62
- esgvocab valid IPSL cmip6plus:institution_id:isl <= term cant be found
63
- esgvocab valid IPSL cmip6plus:institutin_id:ispl <= collection cant be found
64
- esgvocab valid IPSL cmip6pls:institution_id:ispl <= project cant be found
65
-
66
- Multiple validation for all known projects:
67
- esgvocab valid IPSL :: IPS ::
68
- result will be [True, False]
69
-
70
- esgvocab valid --verbose IPS :: IPSL ::
71
- result will be
72
- ┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
73
- ┃ String ┃ Key ┃ Result ┃ Errors
74
- ┡━━━━━━━━╇━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
75
- │ IPS │ :: │ ❌ Invalid │ did not found matching term
76
- │ IPSL │ :: │ ✅ Valid │ None
77
- └────────┴─────┴────────────┴─────────────────────────────┘
78
- Returns:
79
- List[bool]: Validation results for each pair in the input.
41
+ Validates one or more strings against specified Project:Collection:Term configurations.\n
42
+ \n
43
+ Depending on the provided key structure, the function performs different validation operations:\n
44
+ - If all are None (e.g., "::"), validates the term across all projects (`valid_term_in_all_projects`).\n
45
+ - If Term is None (e.g., "Project:Collection:"), validates the term in the specified collection (`valid_term_in_collection`).\n
46
+ - If Term and Collection are None (e.g., "Project::"), validates the term in the specified project (`valid_term_in_project`).\n
47
+ - If all are specified (e.g., "Project:Collection:Term"), validates the term exactly (`valid_term`).\n
48
+ \n
49
+ Parameters:\n
50
+ \tstrings_targets (List[str]): A list of validation pairs, where each pair consists of:\n
51
+ \t\t- A string to validate.\n
52
+ \t\t- A key in the form '<Project:Collection:Term>'.\n
53
+ Usage :\n
54
+ \tValid one:\n
55
+ \tesgvocab valid IPSL cmip6plus:institution_id:ipsl\n
56
+ \tesgvocab valid IPSL cmip6plus:institution_id:\n
57
+ \tesgvocab valid IPSL cmip6plus::\n
58
+ \tesgvocab valid IPSL ::\n
59
+ \n
60
+ \tUnvalid one:\n
61
+ \tesgvocab valid IPSL_invalid cmip6plus:institution_id:ipsl\n
62
+ \tesgvocab valid IPSL cmip6plus:institution_id:isl <= term cant be found\n
63
+ \tesgvocab valid IPSL cmip6plus:institutin_id:ispl <= collection cant be found\n
64
+ \tesgvocab valid IPSL cmip6pls:institution_id:ispl <= project cant be found\n
65
+ \n
66
+ \tMultiple validation for all known projects: \n
67
+ \tesgvocab valid IPSL :: IPS :: \n
68
+ \t\tresult will be [True, False]\n
69
+ \n
70
+ \tesgvocab valid --verbose IPS :: IPSL ::\n
71
+ \tresult will be \n
72
+ \t\t┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n
73
+ \t\t┃ String ┃ Key ┃ Result ┃ Errors ┃\n
74
+ \t\t┡━━━━━━━━╇━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n
75
+ \t\t│ IPS │ :: │ ❌ Invalid │ did not found matching term │\n
76
+ \t\t│ IPSL │ :: │ ✅ Valid │ None │\n
77
+ \t\t└────────┴─────┴────────────┴─────────────────────────────┘\n
78
+ Returns:\n
79
+ \tList[bool]: Validation results for each pair in the input.\n
80
80
  """
81
81
  results = []
82
82
  detailed_results = []
83
83
 
84
84
  # Combine string and target into pairs
85
85
  pairs = [strings_targets[i] + " " + strings_targets[i + 1] for i in range(0, len(strings_targets), 2)]
86
-
86
+
87
87
  # Validate each string against each target
88
88
  for validation in pairs:
89
89
  match = re.match(r"(.+)\s+([^:]*):([^:]*):([^:]*)", validation)
esgvoc/core/constants.py CHANGED
@@ -10,4 +10,4 @@ PATTERN_JSON_KEY = 'regex'
10
10
  TERM_TYPE_JSON_KEY = 'type'
11
11
  DRS_SPECS_JSON_KEY = 'drs_name'
12
12
  SQLITE_FIRST_PK = 1
13
- DATA_DESCRIPTOR_JSON_KEY = "@base"
13
+ DATA_DESCRIPTOR_JSON_KEY = "@base"
@@ -1,5 +1,3 @@
1
- from esgvoc.core.db.connection import read_json_file
2
- from esgvoc.core.db.connection import DBConnection
1
+ from esgvoc.core.db.connection import DBConnection, read_json_file
3
2
 
4
-
5
- __all__ = ["DBConnection", "read_json_file"]
3
+ __all__ = ["DBConnection", "read_json_file"]
@@ -1,11 +1,13 @@
1
- from pathlib import Path
2
1
  import json
2
+ from pathlib import Path
3
+
3
4
  from sqlalchemy import Engine
4
5
  from sqlmodel import Session, create_engine
5
6
 
6
7
 
7
8
  class DBConnection:
8
9
  SQLITE_URL_PREFIX = 'sqlite://'
10
+
9
11
  def __init__(self, db_file_path: Path, echo: bool = False) -> None:
10
12
  self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
11
13
  self.name = db_file_path.stem
@@ -20,7 +22,7 @@ class DBConnection:
20
22
  def create_session(self) -> Session:
21
23
  return Session(self.engine)
22
24
 
23
- def get_name(self) -> str|None:
25
+ def get_name(self) -> str | None:
24
26
  return self.name
25
27
 
26
28
  def get_file_path(self) -> Path:
@@ -28,4 +30,4 @@ class DBConnection:
28
30
 
29
31
 
30
32
  def read_json_file(json_file_path: Path) -> dict:
31
- return json.loads(json_file_path.read_text())
33
+ return json.loads(json_file_path.read_text())
@@ -2,11 +2,14 @@ import logging
2
2
  from pathlib import Path
3
3
 
4
4
  import sqlalchemy as sa
5
+ from sqlalchemy import text
5
6
  from sqlalchemy.dialects.sqlite import JSON
6
7
  from sqlmodel import Column, Field, Relationship, SQLModel
7
8
 
8
9
  import esgvoc.core.db.connection as db
9
10
  from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
+ from esgvoc.core.exceptions import EsgvocDbError
12
+
10
13
  _LOGGER = logging.getLogger("project_db_creation")
11
14
 
12
15
 
@@ -27,34 +30,73 @@ class Collection(SQLModel, PkMixin, IdMixin, table=True):
27
30
  term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
28
31
 
29
32
 
33
+ # Well, the following instructions are not data duplication. It is more building an index.
34
+ # Read: https://sqlite.org/fts5.html
35
+ class PCollectionFTS5(SQLModel, PkMixin, IdMixin, table=True):
36
+ __tablename__ = "pcollections_fts5"
37
+ data_descriptor_id: str
38
+ context: dict = Field(sa_column=sa.Column(JSON))
39
+ project_pk: int | None = Field(default=None, foreign_key="projects.pk")
40
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
41
+
42
+
30
43
  class PTerm(SQLModel, PkMixin, IdMixin, table=True):
31
44
  __tablename__ = "pterms"
32
45
  specs: dict = Field(sa_column=sa.Column(JSON))
33
46
  kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
34
47
  collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
35
48
  collection: Collection = Relationship(back_populates="terms")
36
- __table_args__ = (sa.Index(
37
- "drs_name_index", specs.sa_column["drs_name"]
49
+ __table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), ) # type: ignore
50
+
51
+
52
+ # Well, the following instructions are not data duplication. It is more building an index.
53
+ # Read: https://sqlite.org/fts5.html
54
+ class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
55
+ __tablename__ = "pterms_fts5"
56
+ specs: dict = Field(sa_column=sa.Column(JSON))
57
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
58
+ collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
38
59
 
39
- ),)
40
60
 
41
61
  def project_create_db(db_file_path: Path):
42
62
  try:
43
63
  connection = db.DBConnection(db_file_path)
44
64
  except Exception as e:
45
- msg = f'Unable to create SQlite file at {db_file_path}. Abort.'
65
+ msg = f'unable to create SQlite file at {db_file_path}'
46
66
  _LOGGER.fatal(msg)
47
- raise RuntimeError(msg) from e
67
+ raise EsgvocDbError(msg) from e
48
68
  try:
69
+ # Do not include pterms_fts5 table: it is build from a raw SQL query.
49
70
  tables_to_be_created = [SQLModel.metadata.tables['projects'],
50
71
  SQLModel.metadata.tables['collections'],
51
72
  SQLModel.metadata.tables['pterms']]
52
73
  SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
53
74
  except Exception as e:
54
- msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
75
+ msg = f'unable to create tables in SQLite database at {db_file_path}'
76
+ _LOGGER.fatal(msg)
77
+ raise EsgvocDbError(msg) from e
78
+ try:
79
+ with connection.create_session() as session:
80
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING ' + \
81
+ 'fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk);'
82
+ session.exec(text(sql_query)) # type: ignore
83
+ session.commit()
84
+ except Exception as e:
85
+ msg = f'unable to create table pterms_fts5 for {db_file_path}'
86
+ _LOGGER.fatal(msg)
87
+ raise EsgvocDbError(msg) from e
88
+ try:
89
+ with connection.create_session() as session:
90
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
91
+ 'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
92
+ 'term_kind, content=collections, content_rowid=pk);'
93
+ session.exec(text(sql_query)) # type: ignore
94
+ session.commit()
95
+ except Exception as e:
96
+ msg = f'unable to create table pcollections_fts5 for {db_file_path}'
55
97
  _LOGGER.fatal(msg)
56
- raise RuntimeError(msg) from e
98
+ raise EsgvocDbError(msg) from e
57
99
 
58
100
 
59
101
  if __name__ == "__main__":
60
- pass
102
+ pass
@@ -2,11 +2,13 @@ import logging
2
2
  from pathlib import Path
3
3
 
4
4
  import sqlalchemy as sa
5
+ from sqlalchemy import text
5
6
  from sqlalchemy.dialects.sqlite import JSON
6
7
  from sqlmodel import Column, Field, Relationship, SQLModel
7
8
 
8
9
  import esgvoc.core.db.connection as db
9
10
  from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
+ from esgvoc.core.exceptions import EsgvocDbError
10
12
 
11
13
  _LOGGER = logging.getLogger("universe_db_creation")
12
14
 
@@ -14,11 +16,11 @@ _LOGGER = logging.getLogger("universe_db_creation")
14
16
  class Universe(SQLModel, PkMixin, table=True):
15
17
  __tablename__ = "universes"
16
18
  git_hash: str
17
- data_descriptors: list["DataDescriptor"] = Relationship(back_populates="universe")
19
+ data_descriptors: list["UDataDescriptor"] = Relationship(back_populates="universe")
18
20
 
19
21
 
20
- class DataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
21
- __tablename__ = "data_descriptors"
22
+ class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
23
+ __tablename__ = "udata_descriptors"
22
24
  context: dict = Field(sa_column=sa.Column(JSON))
23
25
  universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
24
26
  universe: Universe = Relationship(back_populates="data_descriptors")
@@ -26,33 +28,70 @@ class DataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
26
28
  term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
27
29
 
28
30
 
31
+ # Well, the following instructions are not data duplication. It is more building an index.
32
+ # Read: https://sqlite.org/fts5.html
33
+ class UDataDescriptorFTS5(SQLModel, PkMixin, IdMixin, table=True):
34
+ __tablename__ = "udata_descriptors_fts5"
35
+ context: dict = Field(sa_column=sa.Column(JSON))
36
+ universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
37
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
38
+
39
+
29
40
  class UTerm(SQLModel, PkMixin, IdMixin, table=True):
30
41
  __tablename__ = "uterms"
31
42
  specs: dict = Field(sa_column=sa.Column(JSON))
32
43
  kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
33
- data_descriptor_pk: int | None = Field(
34
- default=None, foreign_key="data_descriptors.pk"
35
- )
36
- data_descriptor: DataDescriptor = Relationship(back_populates="terms")
44
+ data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
45
+ data_descriptor: UDataDescriptor = Relationship(back_populates="terms")
46
+
47
+
48
+ # Well, the following instructions are not data duplication. It is more building an index.
49
+ # Read: https://sqlite.org/fts5.html
50
+ class UTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
51
+ __tablename__ = "uterms_fts5"
52
+ specs: dict = Field(sa_column=sa.Column(JSON))
53
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
54
+ data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
37
55
 
38
56
 
39
57
  def universe_create_db(db_file_path: Path) -> None:
40
58
  try:
41
59
  connection = db.DBConnection(db_file_path)
42
60
  except Exception as e:
43
- msg = f'Unable to create SQLite file at {db_file_path}. Abort.'
61
+ msg = f'unable to create SQLite file at {db_file_path}'
44
62
  _LOGGER.fatal(msg)
45
- raise RuntimeError(msg) from e
63
+ raise EsgvocDbError(msg) from e
46
64
  try:
47
65
  # Avoid creating project tables.
48
66
  tables_to_be_created = [SQLModel.metadata.tables['uterms'],
49
- SQLModel.metadata.tables['data_descriptors'],
67
+ SQLModel.metadata.tables['udata_descriptors'],
50
68
  SQLModel.metadata.tables['universes']]
51
69
  SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
52
70
  except Exception as e:
53
- msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
71
+ msg = f'unable to create tables in SQLite database at {db_file_path}'
72
+ _LOGGER.fatal(msg)
73
+ raise EsgvocDbError(msg) from e
74
+ try:
75
+ with connection.create_session() as session:
76
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
77
+ 'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk);'
78
+ session.exec(text(sql_query)) # type: ignore
79
+ session.commit()
80
+ except Exception as e:
81
+ msg = f'unable to create table uterms_fts5 for {db_file_path}'
82
+ _LOGGER.fatal(msg)
83
+ raise EsgvocDbError(msg) from e
84
+ try:
85
+ with connection.create_session() as session:
86
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
87
+ 'fts5(pk, id, universe_pk, context, ' + \
88
+ 'term_kind, content=udata_descriptors, content_rowid=pk);'
89
+ session.exec(text(sql_query)) # type: ignore
90
+ session.commit()
91
+ except Exception as e:
92
+ msg = f'unable to create table udata_descriptors_fts5 for {db_file_path}'
54
93
  _LOGGER.fatal(msg)
55
- raise RuntimeError(msg) from e
94
+ raise EsgvocDbError(msg) from e
56
95
 
57
96
 
58
97
  if __name__ == "__main__":