esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +120 -4
- esgvoc/api/projects.py +733 -505
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +168 -98
- esgvoc/api/universe.py +368 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +46 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +57 -15
- esgvoc/core/db/models/universe.py +49 -10
- esgvoc/core/db/project_ingestion.py +79 -65
- esgvoc/core/db/universe_ingestion.py +71 -40
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/api/_utils.py +0 -53
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.3.0.dist-info/RECORD +0 -78
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/cli/get.py
CHANGED
|
@@ -1,63 +1,70 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, List, Optional
|
|
1
4
|
|
|
2
|
-
from typing import Any
|
|
3
|
-
from esgvoc.api.projects import find_terms_in_collection, find_terms_in_project, get_all_collections_in_project, get_all_projects, get_all_terms_in_collection
|
|
4
|
-
from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
from requests import logging
|
|
7
|
-
from rich.table import Table
|
|
8
5
|
import typer
|
|
9
|
-
import
|
|
10
|
-
from rich.json import JSON
|
|
6
|
+
from pydantic import BaseModel
|
|
11
7
|
from rich.console import Console
|
|
8
|
+
from rich.json import JSON
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from esgvoc.api.projects import (get_all_collections_in_project,
|
|
12
|
+
get_all_projects, get_all_terms_in_collection,
|
|
13
|
+
get_term_in_collection, get_term_in_project)
|
|
14
|
+
from esgvoc.api.universe import (find_terms_in_data_descriptor,
|
|
15
|
+
find_terms_in_universe,
|
|
16
|
+
get_all_data_descriptors_in_universe,
|
|
17
|
+
get_all_terms_in_data_descriptor,
|
|
18
|
+
get_term_in_data_descriptor,
|
|
19
|
+
get_term_in_universe)
|
|
12
20
|
|
|
13
21
|
app = typer.Typer()
|
|
14
22
|
console = Console()
|
|
15
23
|
|
|
16
24
|
_LOGGER = logging.getLogger(__name__)
|
|
17
25
|
|
|
26
|
+
|
|
18
27
|
def validate_key_format(key: str):
|
|
19
28
|
"""
|
|
20
29
|
Validate if the key matches the XXXX:YYYY:ZZZZ format.
|
|
21
30
|
"""
|
|
22
|
-
if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_
|
|
31
|
+
if not re.match(r"^[a-zA-Z0-9\/_-]*:[a-zA-Z0-9\/_-]*:[a-zA-Z0-9\/_.-]*$", key):
|
|
23
32
|
raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
|
|
24
33
|
return key.split(":")
|
|
25
34
|
|
|
26
35
|
|
|
27
|
-
def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
|
|
28
|
-
_LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
|
|
29
|
-
|
|
36
|
+
def handle_universe(data_descriptor_id: str | None, term_id: str | None, options=None):
|
|
37
|
+
_LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
|
|
30
38
|
if data_descriptor_id and term_id:
|
|
31
|
-
return
|
|
39
|
+
return get_term_in_data_descriptor(data_descriptor_id, term_id, options)
|
|
32
40
|
# BaseModel|dict[str: BaseModel]|None:
|
|
33
41
|
|
|
34
42
|
elif term_id:
|
|
35
|
-
return
|
|
43
|
+
return get_term_in_universe(term_id, options)
|
|
36
44
|
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
37
45
|
|
|
38
|
-
|
|
39
46
|
elif data_descriptor_id:
|
|
40
|
-
return get_all_terms_in_data_descriptor(data_descriptor_id)
|
|
47
|
+
return get_all_terms_in_data_descriptor(data_descriptor_id, options)
|
|
41
48
|
# dict[str, BaseModel]|None:
|
|
42
49
|
|
|
43
50
|
else:
|
|
44
51
|
return get_all_data_descriptors_in_universe()
|
|
45
52
|
# dict[str, dict]:
|
|
46
53
|
|
|
47
|
-
|
|
54
|
+
|
|
55
|
+
def handle_project(project_id: str, collection_id: str | None, term_id: str | None, options=None):
|
|
48
56
|
_LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
|
|
49
|
-
|
|
57
|
+
|
|
50
58
|
if project_id and collection_id and term_id:
|
|
51
|
-
return
|
|
59
|
+
return get_term_in_collection(project_id, collection_id, term_id, options)
|
|
52
60
|
# BaseModel|dict[str: BaseModel]|None:
|
|
53
61
|
|
|
54
62
|
elif term_id:
|
|
55
|
-
return
|
|
63
|
+
return get_term_in_project(project_id, term_id, options)
|
|
56
64
|
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
57
65
|
|
|
58
|
-
|
|
59
66
|
elif collection_id:
|
|
60
|
-
return get_all_terms_in_collection(project_id, collection_id)
|
|
67
|
+
return get_all_terms_in_collection(project_id, collection_id, options)
|
|
61
68
|
# dict[str, BaseModel]|None:
|
|
62
69
|
|
|
63
70
|
else:
|
|
@@ -69,12 +76,11 @@ def handle_project(project_id:str,collection_id:str|None,term_id:str|None,option
|
|
|
69
76
|
# dict[str, dict]:
|
|
70
77
|
|
|
71
78
|
|
|
72
|
-
def handle_unknown(x:str|None,y:str|None,z:str|None):
|
|
79
|
+
def handle_unknown(x: str | None, y: str | None, z: str | None):
|
|
73
80
|
print(f"Something wrong in X,Y or Z : X={x}, Y={y}, Z={z}")
|
|
74
81
|
|
|
75
82
|
|
|
76
|
-
def display(data:Any):
|
|
77
|
-
|
|
83
|
+
def display(data: Any):
|
|
78
84
|
if isinstance(data, BaseModel):
|
|
79
85
|
# Pydantic Model
|
|
80
86
|
console.print(JSON.from_data(data.model_dump()))
|
|
@@ -93,8 +99,12 @@ def display(data:Any):
|
|
|
93
99
|
# Fallback to simple print
|
|
94
100
|
console.print(data)
|
|
95
101
|
|
|
102
|
+
|
|
96
103
|
@app.command()
|
|
97
|
-
def get(
|
|
104
|
+
def get(
|
|
105
|
+
keys: List[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format"),
|
|
106
|
+
select: Optional[List[str]] = typer.Option(None, "--select", help="keys selected for the result"),
|
|
107
|
+
):
|
|
98
108
|
"""
|
|
99
109
|
Retrieve a specific value from the database system.\n
|
|
100
110
|
This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
|
|
@@ -110,7 +120,7 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
|
|
|
110
120
|
<term>\t\tThe term id within the specified collection.\n
|
|
111
121
|
\n
|
|
112
122
|
Example:
|
|
113
|
-
To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
|
|
123
|
+
To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
|
|
114
124
|
`get cmip6plus:institution_id:ipsl`\n
|
|
115
125
|
The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl` \n
|
|
116
126
|
- to get list of available term from universe institution `:institution:` \n
|
|
@@ -121,23 +131,21 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
|
|
|
121
131
|
- Use a colon (`:`) to separate the parts of the argument. \n
|
|
122
132
|
- if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended. \n
|
|
123
133
|
\n
|
|
124
|
-
"""
|
|
134
|
+
"""
|
|
125
135
|
known_projects = get_all_projects()
|
|
126
136
|
|
|
127
137
|
# Validate and process each key
|
|
128
138
|
for key in keys:
|
|
129
139
|
validated_key = validate_key_format(key)
|
|
130
140
|
_LOGGER.debug(f"Processed key: {validated_key}")
|
|
131
|
-
where,what,who = validated_key
|
|
132
|
-
what = what if what!="" else None
|
|
133
|
-
who = who if who!="" else None
|
|
134
|
-
if where == "" or where=="universe":
|
|
135
|
-
res = handle_universe(what,who)
|
|
141
|
+
where, what, who = validated_key
|
|
142
|
+
what = what if what != "" else None
|
|
143
|
+
who = who if who != "" else None
|
|
144
|
+
if where == "" or where == "universe":
|
|
145
|
+
res = handle_universe(what, who, select)
|
|
136
146
|
elif where in known_projects:
|
|
137
|
-
res = handle_project(where,what,who,
|
|
147
|
+
res = handle_project(where, what, who, select)
|
|
138
148
|
else:
|
|
139
|
-
res = handle_unknown(where,what,who)
|
|
140
|
-
|
|
141
|
-
display(res)
|
|
142
|
-
|
|
149
|
+
res = handle_unknown(where, what, who)
|
|
143
150
|
|
|
151
|
+
display(res)
|
esgvoc/cli/main.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import typer
|
|
2
|
+
|
|
3
|
+
from esgvoc.cli.config import app as config_app
|
|
4
|
+
from esgvoc.cli.drs import app as drs_app
|
|
5
|
+
from esgvoc.cli.find import app as find_app
|
|
2
6
|
from esgvoc.cli.get import app as get_app
|
|
7
|
+
from esgvoc.cli.install import app as install_app
|
|
3
8
|
from esgvoc.cli.status import app as status_app
|
|
4
9
|
from esgvoc.cli.valid import app as valid_app
|
|
5
|
-
|
|
6
|
-
from esgvoc.cli.drs import app as drs_app
|
|
10
|
+
|
|
7
11
|
app = typer.Typer()
|
|
8
12
|
|
|
9
13
|
# Register the subcommands
|
|
@@ -12,10 +16,13 @@ app.add_typer(status_app)
|
|
|
12
16
|
app.add_typer(valid_app)
|
|
13
17
|
app.add_typer(install_app)
|
|
14
18
|
app.add_typer(drs_app)
|
|
19
|
+
app.add_typer(config_app, name="config")
|
|
20
|
+
app.add_typer(find_app)
|
|
21
|
+
|
|
15
22
|
|
|
16
23
|
def main():
|
|
17
24
|
app()
|
|
18
|
-
|
|
25
|
+
|
|
19
26
|
|
|
20
27
|
if __name__ == "__main__":
|
|
21
28
|
main()
|
esgvoc/cli/status.py
CHANGED
|
@@ -1,38 +1,47 @@
|
|
|
1
|
-
from esgvoc.core import service
|
|
2
|
-
from rich.table import Table
|
|
3
1
|
import typer
|
|
4
2
|
from rich.console import Console
|
|
3
|
+
from rich.table import Table
|
|
4
|
+
|
|
5
|
+
from esgvoc.core import service
|
|
5
6
|
|
|
6
7
|
app = typer.Typer()
|
|
7
8
|
console = Console()
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
def display(table):
|
|
11
|
-
console = Console(record=True,width=200)
|
|
12
|
+
console = Console(record=True, width=200)
|
|
12
13
|
console.print(table)
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
|
|
16
16
|
@app.command()
|
|
17
17
|
def status():
|
|
18
18
|
"""
|
|
19
|
-
Command to display status
|
|
20
|
-
i.e summary of version of usable ressources (between remote/cached)
|
|
21
|
-
|
|
19
|
+
Command to display status
|
|
20
|
+
i.e summary of version of usable ressources (between remote/cached)
|
|
21
|
+
|
|
22
22
|
"""
|
|
23
|
-
assert
|
|
23
|
+
assert service.current_state is not None
|
|
24
24
|
service.current_state.get_state_summary()
|
|
25
|
-
#display(service.state_service.table())
|
|
26
|
-
|
|
25
|
+
# display(service.state_service.table())
|
|
27
26
|
|
|
28
27
|
table = Table(show_header=False, show_lines=True)
|
|
29
28
|
|
|
30
|
-
table.add_row("","Remote github repo","Local repository","Cache Database", style
|
|
31
|
-
table.add_row(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
29
|
+
table.add_row("", "Remote github repo", "Local repository", "Cache Database", style="bright_green")
|
|
30
|
+
table.add_row(
|
|
31
|
+
"Universe path",
|
|
32
|
+
service.current_state.universe.github_repo,
|
|
33
|
+
service.current_state.universe.local_path,
|
|
34
|
+
service.current_state.universe.db_path,
|
|
35
|
+
style="white",
|
|
36
|
+
)
|
|
37
|
+
table.add_row(
|
|
38
|
+
"Version",
|
|
39
|
+
service.current_state.universe.github_version,
|
|
40
|
+
service.current_state.universe.local_version,
|
|
41
|
+
service.current_state.universe.db_version,
|
|
42
|
+
style="bright_blue",
|
|
43
|
+
)
|
|
44
|
+
for proj_name, proj in service.current_state.projects.items():
|
|
45
|
+
table.add_row(f"{proj_name} path", proj.github_repo, proj.local_path, proj.db_path, style="white")
|
|
46
|
+
table.add_row("Version", proj.github_version, proj.local_version, proj.db_version, style="bright_blue")
|
|
36
47
|
display(table)
|
|
37
|
-
|
|
38
|
-
|
esgvoc/cli/valid.py
CHANGED
|
@@ -1,26 +1,21 @@
|
|
|
1
1
|
|
|
2
|
+
import re
|
|
2
3
|
from typing import List
|
|
3
|
-
|
|
4
|
-
valid_term,
|
|
5
|
-
valid_term_in_collection,
|
|
6
|
-
valid_term_in_project,
|
|
7
|
-
valid_term_in_all_projects
|
|
8
|
-
)
|
|
9
|
-
from requests import logging
|
|
10
|
-
from rich.table import Table
|
|
4
|
+
|
|
11
5
|
import typer
|
|
12
|
-
import re
|
|
13
6
|
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from esgvoc.api.projects import valid_term, valid_term_in_all_projects, valid_term_in_collection, valid_term_in_project
|
|
14
10
|
|
|
15
11
|
app = typer.Typer()
|
|
16
12
|
console = Console()
|
|
17
13
|
|
|
18
|
-
_LOGGER = logging.getLogger(__name__)
|
|
19
14
|
|
|
20
15
|
@app.command()
|
|
21
16
|
def valid(
|
|
22
17
|
strings_targets: List[str] = typer.Argument(
|
|
23
|
-
...,
|
|
18
|
+
...,
|
|
24
19
|
help=(
|
|
25
20
|
"Pairs of strings to validate against a key in the form '<StringToValidate> <Project:Collection:Term>'.\n"
|
|
26
21
|
"Multiple pairs can be provided. The key '<Project:Collection:Term>' consists of three parts:\n"
|
|
@@ -51,7 +46,7 @@ def valid(
|
|
|
51
46
|
\t\t- A string to validate.\n
|
|
52
47
|
\t\t- A key in the form '<Project:Collection:Term>'.\n
|
|
53
48
|
Usage :\n
|
|
54
|
-
\tValid one:\n
|
|
49
|
+
\tValid one:\n
|
|
55
50
|
\tesgvocab valid IPSL cmip6plus:institution_id:ipsl\n
|
|
56
51
|
\tesgvocab valid IPSL cmip6plus:institution_id:\n
|
|
57
52
|
\tesgvocab valid IPSL cmip6plus::\n
|
|
@@ -67,7 +62,7 @@ def valid(
|
|
|
67
62
|
\tesgvocab valid IPSL :: IPS :: \n
|
|
68
63
|
\t\tresult will be [True, False]\n
|
|
69
64
|
\n
|
|
70
|
-
\tesgvocab valid --verbose IPS :: IPSL ::\n
|
|
65
|
+
\tesgvocab valid --verbose IPS :: IPSL ::\n
|
|
71
66
|
\tresult will be \n
|
|
72
67
|
\t\t┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n
|
|
73
68
|
\t\t┃ String ┃ Key ┃ Result ┃ Errors ┃\n
|
|
@@ -83,7 +78,7 @@ def valid(
|
|
|
83
78
|
|
|
84
79
|
# Combine string and target into pairs
|
|
85
80
|
pairs = [strings_targets[i] + " " + strings_targets[i + 1] for i in range(0, len(strings_targets), 2)]
|
|
86
|
-
|
|
81
|
+
|
|
87
82
|
# Validate each string against each target
|
|
88
83
|
for validation in pairs:
|
|
89
84
|
match = re.match(r"(.+)\s+([^:]*):([^:]*):([^:]*)", validation)
|
|
@@ -109,7 +104,7 @@ def valid(
|
|
|
109
104
|
except Exception as e:
|
|
110
105
|
validation_result=False
|
|
111
106
|
exception_message = repr(e)
|
|
112
|
-
|
|
107
|
+
|
|
113
108
|
# Handle validation result
|
|
114
109
|
|
|
115
110
|
if validation_result:
|
esgvoc/core/constants.py
CHANGED
esgvoc/core/db/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from esgvoc.core.db.connection import read_json_file
|
|
2
|
-
from esgvoc.core.db.connection import DBConnection
|
|
1
|
+
from esgvoc.core.db.connection import DBConnection, read_json_file
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
__all__ = ["DBConnection", "read_json_file"]
|
|
3
|
+
__all__ = ["DBConnection", "read_json_file"]
|
esgvoc/core/db/connection.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
3
4
|
from sqlalchemy import Engine
|
|
4
5
|
from sqlmodel import Session, create_engine
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class DBConnection:
|
|
8
9
|
SQLITE_URL_PREFIX = 'sqlite://'
|
|
10
|
+
|
|
9
11
|
def __init__(self, db_file_path: Path, echo: bool = False) -> None:
|
|
10
12
|
self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
|
|
11
13
|
self.name = db_file_path.stem
|
|
@@ -20,7 +22,7 @@ class DBConnection:
|
|
|
20
22
|
def create_session(self) -> Session:
|
|
21
23
|
return Session(self.engine)
|
|
22
24
|
|
|
23
|
-
def get_name(self) -> str|None:
|
|
25
|
+
def get_name(self) -> str | None:
|
|
24
26
|
return self.name
|
|
25
27
|
|
|
26
28
|
def get_file_path(self) -> Path:
|
|
@@ -28,4 +30,4 @@ class DBConnection:
|
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def read_json_file(json_file_path: Path) -> dict:
|
|
31
|
-
return json.loads(json_file_path.read_text())
|
|
33
|
+
return json.loads(json_file_path.read_text())
|
esgvoc/core/db/models/project.py
CHANGED
|
@@ -2,23 +2,26 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy import text
|
|
5
6
|
from sqlalchemy.dialects.sqlite import JSON
|
|
6
7
|
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
8
|
|
|
8
9
|
import esgvoc.core.db.connection as db
|
|
9
10
|
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
10
|
-
|
|
11
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
12
|
+
|
|
13
|
+
_LOGGER = logging.getLogger(__name__)
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class Project(SQLModel, PkMixin, IdMixin, table=True):
|
|
14
17
|
__tablename__ = "projects"
|
|
15
18
|
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
16
19
|
git_hash: str
|
|
17
|
-
collections: list["
|
|
20
|
+
collections: list["PCollection"] = Relationship(back_populates="project")
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class
|
|
21
|
-
__tablename__ = "
|
|
23
|
+
class PCollection(SQLModel, PkMixin, IdMixin, table=True):
|
|
24
|
+
__tablename__ = "pcollections"
|
|
22
25
|
data_descriptor_id: str = Field(index=True)
|
|
23
26
|
context: dict = Field(sa_column=sa.Column(JSON))
|
|
24
27
|
project_pk: int | None = Field(default=None, foreign_key="projects.pk")
|
|
@@ -27,34 +30,73 @@ class Collection(SQLModel, PkMixin, IdMixin, table=True):
|
|
|
27
30
|
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
28
31
|
|
|
29
32
|
|
|
33
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
34
|
+
# Read: https://sqlite.org/fts5.html
|
|
35
|
+
class PCollectionFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
36
|
+
__tablename__ = "pcollections_fts5"
|
|
37
|
+
data_descriptor_id: str
|
|
38
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
39
|
+
project_pk: int | None = Field(default=None, foreign_key="projects.pk")
|
|
40
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
41
|
+
|
|
42
|
+
|
|
30
43
|
class PTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
31
44
|
__tablename__ = "pterms"
|
|
32
45
|
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
33
46
|
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
34
|
-
collection_pk: int | None = Field(default=None, foreign_key="
|
|
35
|
-
collection:
|
|
36
|
-
__table_args__ = (sa.Index(
|
|
37
|
-
|
|
47
|
+
collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
|
|
48
|
+
collection: PCollection = Relationship(back_populates="terms")
|
|
49
|
+
__table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), ) # type: ignore
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
53
|
+
# Read: https://sqlite.org/fts5.html
|
|
54
|
+
class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
55
|
+
__tablename__ = "pterms_fts5"
|
|
56
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
57
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
58
|
+
collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
|
|
38
59
|
|
|
39
|
-
),)
|
|
40
60
|
|
|
41
61
|
def project_create_db(db_file_path: Path):
|
|
42
62
|
try:
|
|
43
63
|
connection = db.DBConnection(db_file_path)
|
|
44
64
|
except Exception as e:
|
|
45
|
-
msg = f'
|
|
65
|
+
msg = f'unable to create SQlite file at {db_file_path}'
|
|
46
66
|
_LOGGER.fatal(msg)
|
|
47
|
-
raise
|
|
67
|
+
raise EsgvocDbError(msg) from e
|
|
48
68
|
try:
|
|
69
|
+
# Do not include pterms_fts5 table: it is build from a raw SQL query.
|
|
49
70
|
tables_to_be_created = [SQLModel.metadata.tables['projects'],
|
|
50
|
-
SQLModel.metadata.tables['
|
|
71
|
+
SQLModel.metadata.tables['pcollections'],
|
|
51
72
|
SQLModel.metadata.tables['pterms']]
|
|
52
73
|
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
53
74
|
except Exception as e:
|
|
54
|
-
msg = f'
|
|
75
|
+
msg = f'unable to create tables in SQLite database at {db_file_path}'
|
|
76
|
+
_LOGGER.fatal(msg)
|
|
77
|
+
raise EsgvocDbError(msg) from e
|
|
78
|
+
try:
|
|
79
|
+
with connection.create_session() as session:
|
|
80
|
+
sql_query = "CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING " + \
|
|
81
|
+
"fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk, prefix=3);"
|
|
82
|
+
session.exec(text(sql_query)) # type: ignore
|
|
83
|
+
session.commit()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
msg = f'unable to create table pterms_fts5 for {db_file_path}'
|
|
86
|
+
_LOGGER.fatal(msg)
|
|
87
|
+
raise EsgvocDbError(msg) from e
|
|
88
|
+
try:
|
|
89
|
+
with connection.create_session() as session:
|
|
90
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
|
|
91
|
+
'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
|
|
92
|
+
'term_kind, content=pcollections, content_rowid=pk, prefix=3);'
|
|
93
|
+
session.exec(text(sql_query)) # type: ignore
|
|
94
|
+
session.commit()
|
|
95
|
+
except Exception as e:
|
|
96
|
+
msg = f'unable to create table pcollections_fts5 for {db_file_path}'
|
|
55
97
|
_LOGGER.fatal(msg)
|
|
56
|
-
raise
|
|
98
|
+
raise EsgvocDbError(msg) from e
|
|
57
99
|
|
|
58
100
|
|
|
59
101
|
if __name__ == "__main__":
|
|
60
|
-
pass
|
|
102
|
+
pass
|
|
@@ -2,13 +2,15 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy import text
|
|
5
6
|
from sqlalchemy.dialects.sqlite import JSON
|
|
6
7
|
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
8
|
|
|
8
9
|
import esgvoc.core.db.connection as db
|
|
9
10
|
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
11
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
10
12
|
|
|
11
|
-
_LOGGER = logging.getLogger(
|
|
13
|
+
_LOGGER = logging.getLogger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class Universe(SQLModel, PkMixin, table=True):
|
|
@@ -18,7 +20,7 @@ class Universe(SQLModel, PkMixin, table=True):
|
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
|
|
21
|
-
__tablename__ = "
|
|
23
|
+
__tablename__ = "udata_descriptors"
|
|
22
24
|
context: dict = Field(sa_column=sa.Column(JSON))
|
|
23
25
|
universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
|
|
24
26
|
universe: Universe = Relationship(back_populates="data_descriptors")
|
|
@@ -26,33 +28,70 @@ class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
|
|
|
26
28
|
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
27
29
|
|
|
28
30
|
|
|
31
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
32
|
+
# Read: https://sqlite.org/fts5.html
|
|
33
|
+
class UDataDescriptorFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
34
|
+
__tablename__ = "udata_descriptors_fts5"
|
|
35
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
36
|
+
universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
|
|
37
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
38
|
+
|
|
39
|
+
|
|
29
40
|
class UTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
30
41
|
__tablename__ = "uterms"
|
|
31
42
|
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
32
43
|
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
33
|
-
data_descriptor_pk: int | None = Field(
|
|
34
|
-
default=None, foreign_key="data_descriptors.pk"
|
|
35
|
-
)
|
|
44
|
+
data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
|
|
36
45
|
data_descriptor: UDataDescriptor = Relationship(back_populates="terms")
|
|
37
46
|
|
|
38
47
|
|
|
48
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
49
|
+
# Read: https://sqlite.org/fts5.html
|
|
50
|
+
class UTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
51
|
+
__tablename__ = "uterms_fts5"
|
|
52
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
53
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
54
|
+
data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
|
|
55
|
+
|
|
56
|
+
|
|
39
57
|
def universe_create_db(db_file_path: Path) -> None:
|
|
40
58
|
try:
|
|
41
59
|
connection = db.DBConnection(db_file_path)
|
|
42
60
|
except Exception as e:
|
|
43
|
-
msg = f'
|
|
61
|
+
msg = f'unable to create SQLite file at {db_file_path}'
|
|
44
62
|
_LOGGER.fatal(msg)
|
|
45
|
-
raise
|
|
63
|
+
raise EsgvocDbError(msg) from e
|
|
46
64
|
try:
|
|
47
65
|
# Avoid creating project tables.
|
|
48
66
|
tables_to_be_created = [SQLModel.metadata.tables['uterms'],
|
|
49
|
-
SQLModel.metadata.tables['
|
|
67
|
+
SQLModel.metadata.tables['udata_descriptors'],
|
|
50
68
|
SQLModel.metadata.tables['universes']]
|
|
51
69
|
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
52
70
|
except Exception as e:
|
|
53
|
-
msg = f'
|
|
71
|
+
msg = f'unable to create tables in SQLite database at {db_file_path}'
|
|
72
|
+
_LOGGER.fatal(msg)
|
|
73
|
+
raise EsgvocDbError(msg) from e
|
|
74
|
+
try:
|
|
75
|
+
with connection.create_session() as session:
|
|
76
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
|
|
77
|
+
'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk, prefix=3);'
|
|
78
|
+
session.exec(text(sql_query)) # type: ignore
|
|
79
|
+
session.commit()
|
|
80
|
+
except Exception as e:
|
|
81
|
+
msg = f'unable to create table uterms_fts5 for {db_file_path}'
|
|
82
|
+
_LOGGER.fatal(msg)
|
|
83
|
+
raise EsgvocDbError(msg) from e
|
|
84
|
+
try:
|
|
85
|
+
with connection.create_session() as session:
|
|
86
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
|
|
87
|
+
'fts5(pk, id, universe_pk, context, ' + \
|
|
88
|
+
'term_kind, content=udata_descriptors, content_rowid=pk, prefix=3);'
|
|
89
|
+
session.exec(text(sql_query)) # type: ignore
|
|
90
|
+
session.commit()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
msg = f'unable to create table udata_descriptors_fts5 for {db_file_path}'
|
|
54
93
|
_LOGGER.fatal(msg)
|
|
55
|
-
raise
|
|
94
|
+
raise EsgvocDbError(msg) from e
|
|
56
95
|
|
|
57
96
|
|
|
58
97
|
if __name__ == "__main__":
|