esgvoc 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/project_specs.py +3 -2
- esgvoc/api/projects.py +671 -406
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +141 -98
- esgvoc/api/universe.py +353 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/get.py +9 -6
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +50 -8
- esgvoc/core/db/models/universe.py +48 -9
- esgvoc/core/db/project_ingestion.py +60 -46
- esgvoc/core/db/universe_ingestion.py +55 -27
- esgvoc/core/exceptions.py +33 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/METADATA +1 -1
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/RECORD +28 -26
- esgvoc/api/_utils.py +0 -53
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/apps/drs/validator.py
CHANGED
|
@@ -2,17 +2,33 @@ from typing import cast
|
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
4
|
import esgvoc.apps.drs.constants as constants
|
|
5
|
-
from esgvoc.api import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
5
|
+
from esgvoc.api.project_specs import (
|
|
6
|
+
DrsCollection,
|
|
7
|
+
DrsConstant,
|
|
8
|
+
DrsPart,
|
|
9
|
+
DrsPartKind,
|
|
10
|
+
DrsSpecification,
|
|
11
|
+
DrsType,
|
|
12
|
+
ProjectSpecs,
|
|
13
|
+
)
|
|
14
|
+
from esgvoc.apps.drs.report import (
|
|
15
|
+
BlankTerm,
|
|
16
|
+
ComplianceIssue,
|
|
17
|
+
DrsIssue,
|
|
18
|
+
DrsValidationReport,
|
|
19
|
+
ExtraChar,
|
|
20
|
+
ExtraSeparator,
|
|
21
|
+
ExtraTerm,
|
|
22
|
+
FileNameExtensionIssue,
|
|
23
|
+
InvalidTerm,
|
|
24
|
+
MissingTerm,
|
|
25
|
+
ParsingIssue,
|
|
26
|
+
Space,
|
|
27
|
+
Unparsable,
|
|
28
|
+
ValidationError,
|
|
29
|
+
ValidationWarning,
|
|
30
|
+
)
|
|
31
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError
|
|
16
32
|
|
|
17
33
|
|
|
18
34
|
class DrsApplication:
|
|
@@ -25,9 +41,9 @@ class DrsApplication:
|
|
|
25
41
|
"""The project id."""
|
|
26
42
|
self.pedantic: bool = pedantic
|
|
27
43
|
"""Same as the option of GCC: turn warnings into errors. Default False."""
|
|
28
|
-
project_specs: ProjectSpecs|None = projects.
|
|
44
|
+
project_specs: ProjectSpecs | None = projects.get_project(project_id)
|
|
29
45
|
if not project_specs:
|
|
30
|
-
raise
|
|
46
|
+
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
31
47
|
for specs in project_specs.drs_specs:
|
|
32
48
|
match specs.type:
|
|
33
49
|
case DrsType.DIRECTORY:
|
|
@@ -40,7 +56,7 @@ class DrsApplication:
|
|
|
40
56
|
self.dataset_id_specs: DrsSpecification = specs
|
|
41
57
|
"""The DRS dataset id specs of the project."""
|
|
42
58
|
case _:
|
|
43
|
-
raise
|
|
59
|
+
raise EsgvocDbError(f"unsupported DRS specs type '{specs.type}'")
|
|
44
60
|
|
|
45
61
|
def _get_full_file_name_extension(self) -> str:
|
|
46
62
|
"""
|
|
@@ -55,8 +71,8 @@ class DrsApplication:
|
|
|
55
71
|
full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
|
|
56
72
|
specs.properties[constants.FILE_NAME_EXTENSION_KEY]
|
|
57
73
|
else:
|
|
58
|
-
raise
|
|
59
|
-
|
|
74
|
+
raise EsgvocDbError('missing properties in the DRS file name specifications of the ' +
|
|
75
|
+
f"project '{self.project_id}'")
|
|
60
76
|
return full_extension
|
|
61
77
|
|
|
62
78
|
|
|
@@ -66,7 +82,7 @@ class DrsValidator(DrsApplication):
|
|
|
66
82
|
"""
|
|
67
83
|
|
|
68
84
|
def validate_directory(self, drs_expression: str,
|
|
69
|
-
prefix: str|None = None) -> DrsValidationReport:
|
|
85
|
+
prefix: str | None = None) -> DrsValidationReport:
|
|
70
86
|
"""
|
|
71
87
|
Validate a DRS directory expression.
|
|
72
88
|
|
|
@@ -112,7 +128,7 @@ class DrsValidator(DrsApplication):
|
|
|
112
128
|
[issue], [])
|
|
113
129
|
return result
|
|
114
130
|
|
|
115
|
-
def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
|
|
131
|
+
def validate(self, drs_expression: str, drs_type: DrsType | str) -> DrsValidationReport:
|
|
116
132
|
"""
|
|
117
133
|
Validate a DRS expression.
|
|
118
134
|
|
|
@@ -131,14 +147,14 @@ class DrsValidator(DrsApplication):
|
|
|
131
147
|
case DrsType.DATASET_ID:
|
|
132
148
|
return self.validate_dataset_id(drs_expression=drs_expression)
|
|
133
149
|
case _:
|
|
134
|
-
raise
|
|
150
|
+
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
135
151
|
|
|
136
152
|
def _parse(self,
|
|
137
153
|
drs_expression: str,
|
|
138
154
|
separator: str,
|
|
139
|
-
drs_type: DrsType) -> tuple[list[str]|None, # terms
|
|
155
|
+
drs_type: DrsType) -> tuple[list[str] | None, # terms
|
|
140
156
|
list[DrsIssue], # Errors
|
|
141
|
-
list[DrsIssue]]:
|
|
157
|
+
list[DrsIssue]]: # Warnings
|
|
142
158
|
errors: list[DrsIssue] = list()
|
|
143
159
|
warnings: list[DrsIssue] = list()
|
|
144
160
|
cursor_offset = 0
|
|
@@ -160,7 +176,7 @@ class DrsValidator(DrsApplication):
|
|
|
160
176
|
terms = drs_expression.split(separator)
|
|
161
177
|
if len(terms) < 2:
|
|
162
178
|
errors.append(Unparsable(expected_drs_type=drs_type))
|
|
163
|
-
return None, errors, warnings
|
|
179
|
+
return None, errors, warnings # Early exit
|
|
164
180
|
max_term_index = len(terms)
|
|
165
181
|
cursor_position = initial_cursor_position = len(drs_expression) + 1
|
|
166
182
|
has_white_term = False
|
|
@@ -178,7 +194,10 @@ class DrsValidator(DrsApplication):
|
|
|
178
194
|
column = cursor_position+cursor_offset
|
|
179
195
|
if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
|
|
180
196
|
issue = ExtraSeparator(column=column)
|
|
181
|
-
|
|
197
|
+
if self.pedantic:
|
|
198
|
+
errors.append(issue)
|
|
199
|
+
else:
|
|
200
|
+
warnings.append(issue)
|
|
182
201
|
else:
|
|
183
202
|
issue = ExtraChar(column=column)
|
|
184
203
|
errors.append(issue)
|
|
@@ -188,7 +207,7 @@ class DrsValidator(DrsApplication):
|
|
|
188
207
|
if not term:
|
|
189
208
|
column = cursor_position + cursor_offset
|
|
190
209
|
issue = ExtraSeparator(column=column)
|
|
191
|
-
if
|
|
210
|
+
if self.pedantic or drs_type != DrsType.DIRECTORY or index == 0:
|
|
192
211
|
errors.append(issue)
|
|
193
212
|
else:
|
|
194
213
|
warnings.append(issue)
|
|
@@ -200,10 +219,10 @@ class DrsValidator(DrsApplication):
|
|
|
200
219
|
del terms[index]
|
|
201
220
|
cursor_position -= len_term + 1
|
|
202
221
|
|
|
203
|
-
#
|
|
204
|
-
sorted_errors = DrsValidator._sort_parser_issues(errors)
|
|
205
|
-
sorted_warnings = DrsValidator._sort_parser_issues(warnings)
|
|
206
|
-
return terms, sorted_errors, sorted_warnings
|
|
222
|
+
# Mypy doesn't understand that ParsingIssues are DrsIssues...
|
|
223
|
+
sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
|
|
224
|
+
sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
|
|
225
|
+
return terms, sorted_errors, sorted_warnings # type: ignore
|
|
207
226
|
|
|
208
227
|
@staticmethod
|
|
209
228
|
def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
|
|
@@ -213,13 +232,9 @@ class DrsValidator(DrsApplication):
|
|
|
213
232
|
match part.kind:
|
|
214
233
|
case DrsPartKind.COLLECTION:
|
|
215
234
|
casted_part: DrsCollection = cast(DrsCollection, part)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
casted_part.collection_id)
|
|
220
|
-
except Exception as e:
|
|
221
|
-
msg = f'problem while validating term: {e}.Abort.'
|
|
222
|
-
raise APIException(msg) from e
|
|
235
|
+
matching_terms = projects.valid_term_in_collection(term,
|
|
236
|
+
self.project_id,
|
|
237
|
+
casted_part.collection_id)
|
|
223
238
|
if len(matching_terms) > 0:
|
|
224
239
|
return True
|
|
225
240
|
else:
|
|
@@ -228,7 +243,7 @@ class DrsValidator(DrsApplication):
|
|
|
228
243
|
part_casted: DrsConstant = cast(DrsConstant, part)
|
|
229
244
|
return part_casted.value != term
|
|
230
245
|
case _:
|
|
231
|
-
raise
|
|
246
|
+
raise EsgvocDbError(f"unsupported DRS specs part type '{part.kind}'")
|
|
232
247
|
|
|
233
248
|
def _create_report(self,
|
|
234
249
|
type: DrsType,
|
|
@@ -245,7 +260,7 @@ class DrsValidator(DrsApplication):
|
|
|
245
260
|
specs: DrsSpecification) -> DrsValidationReport:
|
|
246
261
|
terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
|
|
247
262
|
if not terms:
|
|
248
|
-
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
263
|
+
return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
|
|
249
264
|
term_index = 0
|
|
250
265
|
term_max_index = len(terms)
|
|
251
266
|
part_index = 0
|
|
@@ -259,27 +274,27 @@ class DrsValidator(DrsApplication):
|
|
|
259
274
|
part_index += 1
|
|
260
275
|
matching_code_mapping[part.__str__()] = 0
|
|
261
276
|
elif part.kind == DrsPartKind.CONSTANT or \
|
|
262
|
-
cast(DrsCollection, part).is_required:
|
|
277
|
+
cast(DrsCollection, part).is_required: # noqa E127
|
|
263
278
|
issue: ComplianceIssue = InvalidTerm(term=term,
|
|
264
|
-
|
|
265
|
-
|
|
279
|
+
term_position=term_index+1,
|
|
280
|
+
collection_id_or_constant_value=str(part))
|
|
266
281
|
errors.append(issue)
|
|
267
282
|
matching_code_mapping[part.__str__()] = 1
|
|
268
283
|
term_index += 1
|
|
269
284
|
part_index += 1
|
|
270
|
-
else:
|
|
285
|
+
else: # The part is not required so try to match the term with the next part.
|
|
271
286
|
part_index += 1
|
|
272
287
|
matching_code_mapping[part.__str__()] = -1
|
|
273
288
|
if term_index == term_max_index:
|
|
274
289
|
break
|
|
275
290
|
# Cases:
|
|
276
291
|
# - All terms and collections have been processed.
|
|
277
|
-
#
|
|
292
|
+
# - Not enough term to process all collections.
|
|
278
293
|
# - Extra terms left whereas all collections have been processed:
|
|
279
294
|
# + The last collections are required => report extra terms.
|
|
280
295
|
# + The last collections are not required and these terms were not validated by them.
|
|
281
296
|
# => Should report error even if the collections are not required.
|
|
282
|
-
if part_index < part_max_index:
|
|
297
|
+
if part_index < part_max_index: # Missing terms.
|
|
283
298
|
for index in range(part_index, part_max_index):
|
|
284
299
|
part = specs.parts[index]
|
|
285
300
|
issue = MissingTerm(collection_id=str(part), collection_position=index+1)
|
|
@@ -288,43 +303,17 @@ class DrsValidator(DrsApplication):
|
|
|
288
303
|
errors.append(issue)
|
|
289
304
|
else:
|
|
290
305
|
warnings.append(issue)
|
|
291
|
-
elif term_index < term_max_index:
|
|
306
|
+
elif term_index < term_max_index: # Extra terms.
|
|
292
307
|
part_index -= term_max_index - term_index
|
|
293
308
|
for index in range(term_index, term_max_index):
|
|
294
309
|
term = terms[index]
|
|
295
310
|
part = specs.parts[part_index]
|
|
296
311
|
if part.kind != DrsPartKind.CONSTANT and \
|
|
297
312
|
(not cast(DrsCollection, part).is_required) and \
|
|
298
|
-
matching_code_mapping[part.__str__()] < 0:
|
|
313
|
+
matching_code_mapping[part.__str__()] < 0: # noqa E125
|
|
299
314
|
issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
|
|
300
315
|
else:
|
|
301
316
|
issue = ExtraTerm(term=term, term_position=index, collection_id=None)
|
|
302
317
|
errors.append(issue)
|
|
303
318
|
part_index += 1
|
|
304
319
|
return self._create_report(specs.type, drs_expression, errors, warnings)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
if __name__ == "__main__":
|
|
308
|
-
project_id = 'cmip6plus'
|
|
309
|
-
validator = DrsValidator(project_id)
|
|
310
|
-
drs_expressions = [
|
|
311
|
-
".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
|
|
312
|
-
]
|
|
313
|
-
import time
|
|
314
|
-
for drs_expression in drs_expressions:
|
|
315
|
-
start_time = time.perf_counter_ns()
|
|
316
|
-
report = validator.validate_dataset_id(drs_expression)
|
|
317
|
-
stop_time = time.perf_counter_ns()
|
|
318
|
-
print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
|
|
319
|
-
if report.nb_errors > 0:
|
|
320
|
-
print(f'error(s): {report.nb_errors}')
|
|
321
|
-
for error in report.errors:
|
|
322
|
-
print(error)
|
|
323
|
-
else:
|
|
324
|
-
print('error(s): 0')
|
|
325
|
-
if report.nb_warnings > 0:
|
|
326
|
-
print(f'warning(s): {report.nb_warnings}')
|
|
327
|
-
for warning in report.warnings:
|
|
328
|
-
print(warning)
|
|
329
|
-
else:
|
|
330
|
-
print('warning(s): 0')
|
esgvoc/apps/py.typed
ADDED
|
File without changes
|
esgvoc/cli/drs.py
CHANGED
|
@@ -10,6 +10,7 @@ import esgvoc.api as ev
|
|
|
10
10
|
from esgvoc.apps.drs.generator import DrsGenerator
|
|
11
11
|
from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
|
|
12
12
|
from esgvoc.apps.drs.validator import DrsValidator
|
|
13
|
+
from esgvoc.core.exceptions import EsgvocValueError
|
|
13
14
|
|
|
14
15
|
app = typer.Typer()
|
|
15
16
|
console = Console()
|
|
@@ -108,7 +109,7 @@ def drsvalid(
|
|
|
108
109
|
case "dataset":
|
|
109
110
|
report = validator.validate_dataset_id(string)
|
|
110
111
|
case _:
|
|
111
|
-
raise
|
|
112
|
+
raise EsgvocValueError(f"unsupported drs type '{current_drs_type}'")
|
|
112
113
|
reports.append(report)
|
|
113
114
|
|
|
114
115
|
if verbose:
|
|
@@ -213,7 +214,7 @@ def drsgen(
|
|
|
213
214
|
case "dataset":
|
|
214
215
|
report = generator.generate_dataset_id_from_bag_of_terms(bag_of_terms)
|
|
215
216
|
case _:
|
|
216
|
-
raise
|
|
217
|
+
raise EsgvocValueError(f"unsupported drs type '{current_drs_type}'")
|
|
217
218
|
generated_reports.append(report)
|
|
218
219
|
|
|
219
220
|
if verbose:
|
esgvoc/cli/get.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
from typing import Any
|
|
3
|
-
from esgvoc.api.projects import
|
|
4
|
-
|
|
3
|
+
from esgvoc.api.projects import get_all_collections_in_project, get_all_projects, \
|
|
4
|
+
get_all_terms_in_collection, get_term_in_project, get_term_in_collection
|
|
5
|
+
from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, \
|
|
6
|
+
get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor, get_term_in_data_descriptor, \
|
|
7
|
+
get_term_in_universe
|
|
5
8
|
from pydantic import BaseModel
|
|
6
9
|
from requests import logging
|
|
7
10
|
from rich.table import Table
|
|
@@ -28,11 +31,11 @@ def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
|
|
|
28
31
|
_LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
|
|
29
32
|
|
|
30
33
|
if data_descriptor_id and term_id:
|
|
31
|
-
return
|
|
34
|
+
return get_term_in_data_descriptor(data_descriptor_id, term_id, options)
|
|
32
35
|
# BaseModel|dict[str: BaseModel]|None:
|
|
33
36
|
|
|
34
37
|
elif term_id:
|
|
35
|
-
return
|
|
38
|
+
return get_term_in_universe(term_id, options)
|
|
36
39
|
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
37
40
|
|
|
38
41
|
|
|
@@ -48,11 +51,11 @@ def handle_project(project_id:str,collection_id:str|None,term_id:str|None,option
|
|
|
48
51
|
_LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
|
|
49
52
|
|
|
50
53
|
if project_id and collection_id and term_id:
|
|
51
|
-
return
|
|
54
|
+
return get_term_in_collection(project_id, collection_id, term_id, options)
|
|
52
55
|
# BaseModel|dict[str: BaseModel]|None:
|
|
53
56
|
|
|
54
57
|
elif term_id:
|
|
55
|
-
return
|
|
58
|
+
return get_term_in_project(project_id, term_id,options)
|
|
56
59
|
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
57
60
|
|
|
58
61
|
|
esgvoc/core/constants.py
CHANGED
esgvoc/core/db/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from esgvoc.core.db.connection import read_json_file
|
|
2
|
-
from esgvoc.core.db.connection import DBConnection
|
|
1
|
+
from esgvoc.core.db.connection import DBConnection, read_json_file
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
__all__ = ["DBConnection", "read_json_file"]
|
|
3
|
+
__all__ = ["DBConnection", "read_json_file"]
|
esgvoc/core/db/connection.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
3
4
|
from sqlalchemy import Engine
|
|
4
5
|
from sqlmodel import Session, create_engine
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class DBConnection:
|
|
8
9
|
SQLITE_URL_PREFIX = 'sqlite://'
|
|
10
|
+
|
|
9
11
|
def __init__(self, db_file_path: Path, echo: bool = False) -> None:
|
|
10
12
|
self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
|
|
11
13
|
self.name = db_file_path.stem
|
|
@@ -20,7 +22,7 @@ class DBConnection:
|
|
|
20
22
|
def create_session(self) -> Session:
|
|
21
23
|
return Session(self.engine)
|
|
22
24
|
|
|
23
|
-
def get_name(self) -> str|None:
|
|
25
|
+
def get_name(self) -> str | None:
|
|
24
26
|
return self.name
|
|
25
27
|
|
|
26
28
|
def get_file_path(self) -> Path:
|
|
@@ -28,4 +30,4 @@ class DBConnection:
|
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def read_json_file(json_file_path: Path) -> dict:
|
|
31
|
-
return json.loads(json_file_path.read_text())
|
|
33
|
+
return json.loads(json_file_path.read_text())
|
esgvoc/core/db/models/project.py
CHANGED
|
@@ -2,11 +2,14 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy import text
|
|
5
6
|
from sqlalchemy.dialects.sqlite import JSON
|
|
6
7
|
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
8
|
|
|
8
9
|
import esgvoc.core.db.connection as db
|
|
9
10
|
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
11
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
12
|
+
|
|
10
13
|
_LOGGER = logging.getLogger("project_db_creation")
|
|
11
14
|
|
|
12
15
|
|
|
@@ -27,34 +30,73 @@ class Collection(SQLModel, PkMixin, IdMixin, table=True):
|
|
|
27
30
|
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
28
31
|
|
|
29
32
|
|
|
33
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
34
|
+
# Read: https://sqlite.org/fts5.html
|
|
35
|
+
class PCollectionFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
36
|
+
__tablename__ = "pcollections_fts5"
|
|
37
|
+
data_descriptor_id: str
|
|
38
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
39
|
+
project_pk: int | None = Field(default=None, foreign_key="projects.pk")
|
|
40
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
41
|
+
|
|
42
|
+
|
|
30
43
|
class PTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
31
44
|
__tablename__ = "pterms"
|
|
32
45
|
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
33
46
|
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
34
47
|
collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
|
|
35
48
|
collection: Collection = Relationship(back_populates="terms")
|
|
36
|
-
__table_args__ = (sa.Index(
|
|
37
|
-
|
|
49
|
+
__table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), ) # type: ignore
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
53
|
+
# Read: https://sqlite.org/fts5.html
|
|
54
|
+
class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
55
|
+
__tablename__ = "pterms_fts5"
|
|
56
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
57
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
58
|
+
collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
|
|
38
59
|
|
|
39
|
-
),)
|
|
40
60
|
|
|
41
61
|
def project_create_db(db_file_path: Path):
|
|
42
62
|
try:
|
|
43
63
|
connection = db.DBConnection(db_file_path)
|
|
44
64
|
except Exception as e:
|
|
45
|
-
msg = f'
|
|
65
|
+
msg = f'unable to create SQlite file at {db_file_path}'
|
|
46
66
|
_LOGGER.fatal(msg)
|
|
47
|
-
raise
|
|
67
|
+
raise EsgvocDbError(msg) from e
|
|
48
68
|
try:
|
|
69
|
+
# Do not include pterms_fts5 table: it is build from a raw SQL query.
|
|
49
70
|
tables_to_be_created = [SQLModel.metadata.tables['projects'],
|
|
50
71
|
SQLModel.metadata.tables['collections'],
|
|
51
72
|
SQLModel.metadata.tables['pterms']]
|
|
52
73
|
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
53
74
|
except Exception as e:
|
|
54
|
-
msg = f'
|
|
75
|
+
msg = f'unable to create tables in SQLite database at {db_file_path}'
|
|
76
|
+
_LOGGER.fatal(msg)
|
|
77
|
+
raise EsgvocDbError(msg) from e
|
|
78
|
+
try:
|
|
79
|
+
with connection.create_session() as session:
|
|
80
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING ' + \
|
|
81
|
+
'fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk);'
|
|
82
|
+
session.exec(text(sql_query)) # type: ignore
|
|
83
|
+
session.commit()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
msg = f'unable to create table pterms_fts5 for {db_file_path}'
|
|
86
|
+
_LOGGER.fatal(msg)
|
|
87
|
+
raise EsgvocDbError(msg) from e
|
|
88
|
+
try:
|
|
89
|
+
with connection.create_session() as session:
|
|
90
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
|
|
91
|
+
'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
|
|
92
|
+
'term_kind, content=collections, content_rowid=pk);'
|
|
93
|
+
session.exec(text(sql_query)) # type: ignore
|
|
94
|
+
session.commit()
|
|
95
|
+
except Exception as e:
|
|
96
|
+
msg = f'unable to create table pcollections_fts5 for {db_file_path}'
|
|
55
97
|
_LOGGER.fatal(msg)
|
|
56
|
-
raise
|
|
98
|
+
raise EsgvocDbError(msg) from e
|
|
57
99
|
|
|
58
100
|
|
|
59
101
|
if __name__ == "__main__":
|
|
60
|
-
pass
|
|
102
|
+
pass
|
|
@@ -2,11 +2,13 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy import text
|
|
5
6
|
from sqlalchemy.dialects.sqlite import JSON
|
|
6
7
|
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
8
|
|
|
8
9
|
import esgvoc.core.db.connection as db
|
|
9
10
|
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
11
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
10
12
|
|
|
11
13
|
_LOGGER = logging.getLogger("universe_db_creation")
|
|
12
14
|
|
|
@@ -18,7 +20,7 @@ class Universe(SQLModel, PkMixin, table=True):
|
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
|
|
21
|
-
__tablename__ = "
|
|
23
|
+
__tablename__ = "udata_descriptors"
|
|
22
24
|
context: dict = Field(sa_column=sa.Column(JSON))
|
|
23
25
|
universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
|
|
24
26
|
universe: Universe = Relationship(back_populates="data_descriptors")
|
|
@@ -26,33 +28,70 @@ class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
|
|
|
26
28
|
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
27
29
|
|
|
28
30
|
|
|
31
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
32
|
+
# Read: https://sqlite.org/fts5.html
|
|
33
|
+
class UDataDescriptorFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
34
|
+
__tablename__ = "udata_descriptors_fts5"
|
|
35
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
36
|
+
universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
|
|
37
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
38
|
+
|
|
39
|
+
|
|
29
40
|
class UTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
30
41
|
__tablename__ = "uterms"
|
|
31
42
|
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
32
43
|
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
33
|
-
data_descriptor_pk: int | None = Field(
|
|
34
|
-
default=None, foreign_key="data_descriptors.pk"
|
|
35
|
-
)
|
|
44
|
+
data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
|
|
36
45
|
data_descriptor: UDataDescriptor = Relationship(back_populates="terms")
|
|
37
46
|
|
|
38
47
|
|
|
48
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
49
|
+
# Read: https://sqlite.org/fts5.html
|
|
50
|
+
class UTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
|
|
51
|
+
__tablename__ = "uterms_fts5"
|
|
52
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
53
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
54
|
+
data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
|
|
55
|
+
|
|
56
|
+
|
|
39
57
|
def universe_create_db(db_file_path: Path) -> None:
|
|
40
58
|
try:
|
|
41
59
|
connection = db.DBConnection(db_file_path)
|
|
42
60
|
except Exception as e:
|
|
43
|
-
msg = f'
|
|
61
|
+
msg = f'unable to create SQLite file at {db_file_path}'
|
|
44
62
|
_LOGGER.fatal(msg)
|
|
45
|
-
raise
|
|
63
|
+
raise EsgvocDbError(msg) from e
|
|
46
64
|
try:
|
|
47
65
|
# Avoid creating project tables.
|
|
48
66
|
tables_to_be_created = [SQLModel.metadata.tables['uterms'],
|
|
49
|
-
SQLModel.metadata.tables['
|
|
67
|
+
SQLModel.metadata.tables['udata_descriptors'],
|
|
50
68
|
SQLModel.metadata.tables['universes']]
|
|
51
69
|
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
52
70
|
except Exception as e:
|
|
53
|
-
msg = f'
|
|
71
|
+
msg = f'unable to create tables in SQLite database at {db_file_path}'
|
|
72
|
+
_LOGGER.fatal(msg)
|
|
73
|
+
raise EsgvocDbError(msg) from e
|
|
74
|
+
try:
|
|
75
|
+
with connection.create_session() as session:
|
|
76
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
|
|
77
|
+
'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk);'
|
|
78
|
+
session.exec(text(sql_query)) # type: ignore
|
|
79
|
+
session.commit()
|
|
80
|
+
except Exception as e:
|
|
81
|
+
msg = f'unable to create table uterms_fts5 for {db_file_path}'
|
|
82
|
+
_LOGGER.fatal(msg)
|
|
83
|
+
raise EsgvocDbError(msg) from e
|
|
84
|
+
try:
|
|
85
|
+
with connection.create_session() as session:
|
|
86
|
+
sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
|
|
87
|
+
'fts5(pk, id, universe_pk, context, ' + \
|
|
88
|
+
'term_kind, content=udata_descriptors, content_rowid=pk);'
|
|
89
|
+
session.exec(text(sql_query)) # type: ignore
|
|
90
|
+
session.commit()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
msg = f'unable to create table udata_descriptors_fts5 for {db_file_path}'
|
|
54
93
|
_LOGGER.fatal(msg)
|
|
55
|
-
raise
|
|
94
|
+
raise EsgvocDbError(msg) from e
|
|
56
95
|
|
|
57
96
|
|
|
58
97
|
if __name__ == "__main__":
|