esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +96 -72
- esgvoc/api/data_descriptors/__init__.py +18 -12
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +3 -46
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/project_specs.py +3 -2
- esgvoc/api/projects.py +727 -446
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +29 -16
- esgvoc/api/search.py +140 -95
- esgvoc/api/universe.py +362 -156
- esgvoc/apps/__init__.py +3 -4
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +185 -198
- esgvoc/apps/drs/report.py +272 -136
- esgvoc/apps/drs/validator.py +132 -145
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/drs.py +32 -21
- esgvoc/cli/get.py +35 -31
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +0 -2
- esgvoc/cli/status.py +5 -5
- esgvoc/cli/valid.py +40 -40
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +50 -8
- esgvoc/core/db/models/universe.py +51 -12
- esgvoc/core/db/project_ingestion.py +60 -46
- esgvoc/core/db/universe_ingestion.py +58 -29
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +49 -32
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
- esgvoc-0.4.0.dist-info/RECORD +80 -0
- esgvoc/api/_utils.py +0 -39
- esgvoc/cli/config.py +0 -82
- esgvoc/core/service/settings.py +0 -73
- esgvoc/core/service/settings.toml +0 -17
- esgvoc/core/service/settings_default.toml +0 -17
- esgvoc-0.2.1.dist-info/RECORD +0 -73
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py
CHANGED
|
@@ -1,91 +1,95 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Sequence
|
|
2
|
+
from typing import Iterable, Sequence
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import text
|
|
5
|
+
from sqlmodel import Session, and_, col, select
|
|
3
6
|
|
|
4
7
|
import esgvoc.api.universe as universe
|
|
5
|
-
import esgvoc.core.constants
|
|
8
|
+
import esgvoc.core.constants as constants
|
|
6
9
|
import esgvoc.core.service as service
|
|
7
|
-
from esgvoc.api.
|
|
8
|
-
instantiate_pydantic_terms)
|
|
9
|
-
from esgvoc.api.report import (ProjectTermError, UniverseTermError,
|
|
10
|
-
ValidationError, ValidationReport)
|
|
11
|
-
from esgvoc.api.search import MatchingTerm, SearchSettings, _create_str_comparison_expression
|
|
10
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
12
11
|
from esgvoc.api.project_specs import ProjectSpecs
|
|
12
|
+
from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
|
|
13
|
+
from esgvoc.api.search import (
|
|
14
|
+
Item,
|
|
15
|
+
MatchingTerm,
|
|
16
|
+
execute_find_item_statements,
|
|
17
|
+
execute_match_statement,
|
|
18
|
+
generate_matching_condition,
|
|
19
|
+
get_universe_session,
|
|
20
|
+
handle_rank_limit_offset,
|
|
21
|
+
instantiate_pydantic_term,
|
|
22
|
+
instantiate_pydantic_terms,
|
|
23
|
+
)
|
|
13
24
|
from esgvoc.core.db.connection import DBConnection
|
|
14
25
|
from esgvoc.core.db.models.mixins import TermKind
|
|
15
|
-
from esgvoc.core.db.models.project import
|
|
26
|
+
from esgvoc.core.db.models.project import (
|
|
27
|
+
Collection,
|
|
28
|
+
PCollectionFTS5,
|
|
29
|
+
Project,
|
|
30
|
+
PTerm,
|
|
31
|
+
PTermFTS5,
|
|
32
|
+
)
|
|
16
33
|
from esgvoc.core.db.models.universe import UTerm
|
|
17
|
-
from
|
|
18
|
-
from sqlmodel import Session, and_, select
|
|
19
|
-
|
|
34
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
|
|
20
35
|
|
|
21
36
|
# [OPTIMIZATION]
|
|
22
37
|
_VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
|
|
23
|
-
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[
|
|
38
|
+
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
|
|
24
39
|
|
|
25
40
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
try:
|
|
32
|
-
result = ProjectSpecs(**project_specs)
|
|
33
|
-
except Exception as e:
|
|
34
|
-
msg = f'Unable to read specs in project {project_id}'
|
|
35
|
-
raise RuntimeError(msg) from e
|
|
36
|
-
return result
|
|
37
|
-
|
|
41
|
+
def _get_project_connection(project_id: str) -> DBConnection | None:
|
|
42
|
+
if project_id in service.current_state.projects:
|
|
43
|
+
return service.current_state.projects[project_id].db_connection
|
|
44
|
+
else:
|
|
45
|
+
return None
|
|
38
46
|
|
|
39
|
-
def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
40
|
-
return service.state_service.projects[project_id].db_connection
|
|
41
47
|
|
|
42
48
|
def _get_project_session_with_exception(project_id: str) -> Session:
|
|
43
|
-
if connection:=_get_project_connection(project_id):
|
|
49
|
+
if connection := _get_project_connection(project_id):
|
|
44
50
|
project_session = connection.create_session()
|
|
45
51
|
return project_session
|
|
46
52
|
else:
|
|
47
|
-
raise
|
|
48
|
-
|
|
53
|
+
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
49
54
|
|
|
50
|
-
|
|
55
|
+
|
|
56
|
+
def _resolve_term(composite_term_part: dict,
|
|
51
57
|
universe_session: Session,
|
|
52
|
-
project_session: Session) -> UTerm|PTerm:
|
|
58
|
+
project_session: Session) -> UTerm | PTerm:
|
|
53
59
|
# First find the term in the universe than in the current project
|
|
54
|
-
term_id =
|
|
55
|
-
term_type =
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return uterms[0]
|
|
60
|
+
term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
|
|
61
|
+
term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
|
|
62
|
+
uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
|
|
63
|
+
term_id=term_id,
|
|
64
|
+
session=universe_session)
|
|
65
|
+
if uterm:
|
|
66
|
+
return uterm
|
|
62
67
|
else:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
return pterms[0]
|
|
68
|
+
pterm = _get_term_in_collection(collection_id=term_type,
|
|
69
|
+
term_id=term_id,
|
|
70
|
+
session=project_session)
|
|
71
|
+
if pterm:
|
|
72
|
+
return pterm
|
|
69
73
|
else:
|
|
70
|
-
msg = f
|
|
71
|
-
raise
|
|
74
|
+
msg = f"unable to find the term '{term_id}' in '{term_type}'"
|
|
75
|
+
raise EsgvocNotFoundError(msg)
|
|
72
76
|
|
|
73
77
|
|
|
74
|
-
def
|
|
75
|
-
separator = term.specs[
|
|
76
|
-
parts = term.specs[
|
|
78
|
+
def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
|
|
79
|
+
separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
|
|
80
|
+
parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
|
|
77
81
|
return separator, parts
|
|
78
82
|
|
|
79
83
|
|
|
80
84
|
# TODO: support optionality of parts of composite.
|
|
81
85
|
# It is backtrack possible for more than one missing parts.
|
|
82
|
-
def
|
|
83
|
-
term: UTerm|PTerm,
|
|
86
|
+
def _valid_value_composite_term_with_separator(value: str,
|
|
87
|
+
term: UTerm | PTerm,
|
|
84
88
|
universe_session: Session,
|
|
85
89
|
project_session: Session)\
|
|
86
|
-
-> list[
|
|
90
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
87
91
|
result = list()
|
|
88
|
-
separator, parts =
|
|
92
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
89
93
|
if separator in value:
|
|
90
94
|
splits = value.split(separator)
|
|
91
95
|
if len(splits) == len(parts):
|
|
@@ -106,16 +110,20 @@ def _valid_value_term_composite_with_separator(value: str,
|
|
|
106
110
|
return result
|
|
107
111
|
|
|
108
112
|
|
|
109
|
-
def _transform_to_pattern(term: UTerm|PTerm,
|
|
113
|
+
def _transform_to_pattern(term: UTerm | PTerm,
|
|
110
114
|
universe_session: Session,
|
|
111
115
|
project_session: Session) -> str:
|
|
112
116
|
match term.kind:
|
|
113
117
|
case TermKind.PLAIN:
|
|
114
|
-
|
|
118
|
+
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
119
|
+
result = term.specs[constants.DRS_SPECS_JSON_KEY]
|
|
120
|
+
else:
|
|
121
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
122
|
+
"Can't validate it.")
|
|
115
123
|
case TermKind.PATTERN:
|
|
116
|
-
result = term.specs[
|
|
124
|
+
result = term.specs[constants.PATTERN_JSON_KEY]
|
|
117
125
|
case TermKind.COMPOSITE:
|
|
118
|
-
separator, parts =
|
|
126
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
119
127
|
result = ""
|
|
120
128
|
for part in parts:
|
|
121
129
|
resolved_term = _resolve_term(part, universe_session, project_session)
|
|
@@ -123,22 +131,22 @@ def _transform_to_pattern(term: UTerm|PTerm,
|
|
|
123
131
|
result = f'{result}{pattern}{separator}'
|
|
124
132
|
result = result.rstrip(separator)
|
|
125
133
|
case _:
|
|
126
|
-
raise
|
|
134
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
127
135
|
return result
|
|
128
136
|
|
|
129
137
|
|
|
130
138
|
# TODO: support optionality of parts of composite.
|
|
131
139
|
# It is backtrack possible for more than one missing parts.
|
|
132
|
-
def
|
|
133
|
-
term: UTerm|PTerm,
|
|
140
|
+
def _valid_value_composite_term_separator_less(value: str,
|
|
141
|
+
term: UTerm | PTerm,
|
|
134
142
|
universe_session: Session,
|
|
135
143
|
project_session: Session)\
|
|
136
|
-
-> list[
|
|
144
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
137
145
|
result = list()
|
|
138
146
|
try:
|
|
139
147
|
pattern = _transform_to_pattern(term, universe_session, project_session)
|
|
140
148
|
try:
|
|
141
|
-
#
|
|
149
|
+
# Patterns terms are meant to be validated individually.
|
|
142
150
|
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
143
151
|
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
144
152
|
# The later, must be removed.
|
|
@@ -146,34 +154,34 @@ def _valid_value_term_composite_separator_less(value: str,
|
|
|
146
154
|
pattern = f'^{pattern}$'
|
|
147
155
|
regex = re.compile(pattern)
|
|
148
156
|
except Exception as e:
|
|
149
|
-
msg = f
|
|
150
|
-
raise
|
|
157
|
+
msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
|
|
158
|
+
raise EsgvocDbError(msg) from e
|
|
151
159
|
match = regex.match(value)
|
|
152
160
|
if match is None:
|
|
153
161
|
result.append(_create_term_error(value, term))
|
|
154
162
|
return result
|
|
155
163
|
except Exception as e:
|
|
156
|
-
msg = f
|
|
157
|
-
raise
|
|
164
|
+
msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
|
|
165
|
+
raise EsgvocNotImplementedError(msg) from e
|
|
158
166
|
|
|
159
167
|
|
|
160
|
-
def
|
|
161
|
-
term: UTerm|PTerm,
|
|
168
|
+
def _valid_value_for_composite_term(value: str,
|
|
169
|
+
term: UTerm | PTerm,
|
|
162
170
|
universe_session: Session,
|
|
163
171
|
project_session: Session)\
|
|
164
|
-
-> list[
|
|
172
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
165
173
|
result = list()
|
|
166
|
-
separator, _ =
|
|
174
|
+
separator, _ = _get_composite_term_separator_parts(term)
|
|
167
175
|
if separator:
|
|
168
|
-
result =
|
|
176
|
+
result = _valid_value_composite_term_with_separator(value, term, universe_session,
|
|
169
177
|
project_session)
|
|
170
178
|
else:
|
|
171
|
-
result =
|
|
179
|
+
result = _valid_value_composite_term_separator_less(value, term, universe_session,
|
|
172
180
|
project_session)
|
|
173
181
|
return result
|
|
174
182
|
|
|
175
183
|
|
|
176
|
-
def _create_term_error(value: str, term: UTerm|PTerm) ->
|
|
184
|
+
def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
|
|
177
185
|
if isinstance(term, UTerm):
|
|
178
186
|
return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
179
187
|
data_descriptor_id=term.data_descriptor.id)
|
|
@@ -183,31 +191,35 @@ def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
|
|
|
183
191
|
|
|
184
192
|
|
|
185
193
|
def _valid_value(value: str,
|
|
186
|
-
term: UTerm|PTerm,
|
|
194
|
+
term: UTerm | PTerm,
|
|
187
195
|
universe_session: Session,
|
|
188
|
-
project_session: Session) -> list[
|
|
196
|
+
project_session: Session) -> list[UniverseTermError | ProjectTermError]:
|
|
189
197
|
result = list()
|
|
190
198
|
match term.kind:
|
|
191
199
|
case TermKind.PLAIN:
|
|
192
|
-
if
|
|
193
|
-
|
|
200
|
+
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
201
|
+
if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
|
|
202
|
+
result.append(_create_term_error(value, term))
|
|
203
|
+
else:
|
|
204
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
205
|
+
"Can't validate it.")
|
|
194
206
|
case TermKind.PATTERN:
|
|
195
|
-
#
|
|
196
|
-
pattern_match = re.match(term.specs[
|
|
207
|
+
# TODO: Pattern can be compiled and stored for further matching.
|
|
208
|
+
pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
|
|
197
209
|
if pattern_match is None:
|
|
198
210
|
result.append(_create_term_error(value, term))
|
|
199
211
|
case TermKind.COMPOSITE:
|
|
200
|
-
result.extend(
|
|
212
|
+
result.extend(_valid_value_for_composite_term(value, term,
|
|
201
213
|
universe_session,
|
|
202
214
|
project_session))
|
|
203
215
|
case _:
|
|
204
|
-
raise
|
|
216
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
205
217
|
return result
|
|
206
218
|
|
|
207
219
|
|
|
208
220
|
def _check_value(value: str) -> str:
|
|
209
221
|
if not value or value.isspace():
|
|
210
|
-
raise
|
|
222
|
+
raise EsgvocValueError('value should be set')
|
|
211
223
|
else:
|
|
212
224
|
return value
|
|
213
225
|
|
|
@@ -215,9 +227,9 @@ def _check_value(value: str) -> str:
|
|
|
215
227
|
def _search_plain_term_and_valid_value(value: str,
|
|
216
228
|
collection_id: str,
|
|
217
229
|
project_session: Session) \
|
|
218
|
-
-> str|None:
|
|
230
|
+
-> str | None:
|
|
219
231
|
where_expression = and_(Collection.id == collection_id,
|
|
220
|
-
PTerm.specs[
|
|
232
|
+
PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
221
233
|
statement = select(PTerm).join(Collection).where(where_expression)
|
|
222
234
|
term = project_session.exec(statement).one_or_none()
|
|
223
235
|
return term.id if term else None
|
|
@@ -238,7 +250,7 @@ def _valid_value_against_all_terms_of_collection(value: str,
|
|
|
238
250
|
result.append(pterm.id)
|
|
239
251
|
return result
|
|
240
252
|
else:
|
|
241
|
-
raise
|
|
253
|
+
raise EsgvocDbError(f"collection '{collection.id}' has no term")
|
|
242
254
|
|
|
243
255
|
|
|
244
256
|
def _valid_value_against_given_term(value: str,
|
|
@@ -247,27 +259,20 @@ def _valid_value_against_given_term(value: str,
|
|
|
247
259
|
term_id: str,
|
|
248
260
|
universe_session: Session,
|
|
249
261
|
project_session: Session)\
|
|
250
|
-
-> list[
|
|
251
|
-
#
|
|
262
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
263
|
+
# [OPTIMIZATION]
|
|
252
264
|
key = value + project_id + collection_id + term_id
|
|
253
265
|
if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
|
|
254
266
|
result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
|
|
255
267
|
else:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
else:
|
|
265
|
-
raise ValueError(f'unable to find term {term_id} ' +
|
|
266
|
-
f'in collection {collection_id}')
|
|
267
|
-
except Exception as e:
|
|
268
|
-
msg = f'unable to valid term {term_id} ' +\
|
|
269
|
-
f'in collection {collection_id}'
|
|
270
|
-
raise RuntimeError(msg) from e
|
|
268
|
+
term = _get_term_in_collection(collection_id,
|
|
269
|
+
term_id,
|
|
270
|
+
project_session)
|
|
271
|
+
if term:
|
|
272
|
+
result = _valid_value(value, term, universe_session, project_session)
|
|
273
|
+
else:
|
|
274
|
+
raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
|
|
275
|
+
f"in collection '{collection_id}'")
|
|
271
276
|
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
|
|
272
277
|
return result
|
|
273
278
|
|
|
@@ -280,11 +285,11 @@ def valid_term(value: str,
|
|
|
280
285
|
"""
|
|
281
286
|
Check if the given value may or may not represent the given term. The functions returns
|
|
282
287
|
a report that contains the possible errors.
|
|
283
|
-
|
|
288
|
+
|
|
284
289
|
Behavior based on the nature of the term:
|
|
285
290
|
- plain term: the function try to match the value on the drs_name field.
|
|
286
|
-
- term
|
|
287
|
-
- term
|
|
291
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
292
|
+
- composite term:
|
|
288
293
|
- if the composite has got a separator, the function splits the value according to the\
|
|
289
294
|
separator of the term then it try to match every part of the composite\
|
|
290
295
|
with every split of the value.
|
|
@@ -292,7 +297,7 @@ def valid_term(value: str,
|
|
|
292
297
|
composite so as to compare it as a regex to the value.
|
|
293
298
|
|
|
294
299
|
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
295
|
-
the function raises a
|
|
300
|
+
the function raises a EsgvocNotFoundError.
|
|
296
301
|
|
|
297
302
|
:param value: A value to be validated
|
|
298
303
|
:type value: str
|
|
@@ -304,7 +309,7 @@ def valid_term(value: str,
|
|
|
304
309
|
:type term_id: str
|
|
305
310
|
:returns: A validation report that contains the possible errors
|
|
306
311
|
:rtype: ValidationReport
|
|
307
|
-
:raises
|
|
312
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
308
313
|
"""
|
|
309
314
|
value = _check_value(value)
|
|
310
315
|
with get_universe_session() as universe_session, \
|
|
@@ -320,18 +325,15 @@ def _valid_term_in_collection(value: str,
|
|
|
320
325
|
universe_session: Session,
|
|
321
326
|
project_session: Session) \
|
|
322
327
|
-> list[MatchingTerm]:
|
|
323
|
-
#
|
|
328
|
+
# [OPTIMIZATION]
|
|
324
329
|
key = value + project_id + collection_id
|
|
325
330
|
if key in _VALID_TERM_IN_COLLECTION_CACHE:
|
|
326
331
|
result = _VALID_TERM_IN_COLLECTION_CACHE[key]
|
|
327
332
|
else:
|
|
328
333
|
value = _check_value(value)
|
|
329
334
|
result = list()
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
None)
|
|
333
|
-
if collections:
|
|
334
|
-
collection = collections[0]
|
|
335
|
+
collection = _get_collection_in_project(collection_id, project_session)
|
|
336
|
+
if collection:
|
|
335
337
|
match collection.term_kind:
|
|
336
338
|
case TermKind.PLAIN:
|
|
337
339
|
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
@@ -349,8 +351,8 @@ def _valid_term_in_collection(value: str,
|
|
|
349
351
|
collection_id=collection_id,
|
|
350
352
|
term_id=term_id_found))
|
|
351
353
|
else:
|
|
352
|
-
msg = f
|
|
353
|
-
raise
|
|
354
|
+
msg = f"unable to find collection '{collection_id}'"
|
|
355
|
+
raise EsgvocNotFoundError(msg)
|
|
354
356
|
_VALID_TERM_IN_COLLECTION_CACHE[key] = result
|
|
355
357
|
return result
|
|
356
358
|
|
|
@@ -362,11 +364,11 @@ def valid_term_in_collection(value: str,
|
|
|
362
364
|
"""
|
|
363
365
|
Check if the given value may or may not represent a term in the given collection. The function
|
|
364
366
|
returns the terms that the value matches.
|
|
365
|
-
|
|
367
|
+
|
|
366
368
|
Behavior based on the nature of the term:
|
|
367
369
|
- plain term: the function try to match the value on the drs_name field.
|
|
368
|
-
- term
|
|
369
|
-
- term
|
|
370
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
371
|
+
- composite term:
|
|
370
372
|
- if the composite has got a separator, the function splits the value according to the \
|
|
371
373
|
separator of the term then it try to match every part of the composite \
|
|
372
374
|
with every split of the value.
|
|
@@ -374,7 +376,7 @@ def valid_term_in_collection(value: str,
|
|
|
374
376
|
composite so as to compare it as a regex to the value.
|
|
375
377
|
|
|
376
378
|
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
377
|
-
the function raises a
|
|
379
|
+
the function raises a EsgvocNotFoundError.
|
|
378
380
|
|
|
379
381
|
:param value: A value to be validated
|
|
380
382
|
:type value: str
|
|
@@ -384,7 +386,7 @@ def valid_term_in_collection(value: str,
|
|
|
384
386
|
:type collection_id: str
|
|
385
387
|
:returns: The list of terms that the value matches.
|
|
386
388
|
:rtype: list[MatchingTerm]
|
|
387
|
-
:raises
|
|
389
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
388
390
|
"""
|
|
389
391
|
with get_universe_session() as universe_session, \
|
|
390
392
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -408,18 +410,18 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
408
410
|
"""
|
|
409
411
|
Check if the given value may or may not represent a term in the given project. The function
|
|
410
412
|
returns the terms that the value matches.
|
|
411
|
-
|
|
413
|
+
|
|
412
414
|
Behavior based on the nature of the term:
|
|
413
415
|
- plain term: the function try to match the value on the drs_name field.
|
|
414
|
-
- term
|
|
415
|
-
- term
|
|
416
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
417
|
+
- composite term:
|
|
416
418
|
- if the composite has got a separator, the function splits the value according to the \
|
|
417
419
|
separator of the term then it try to match every part of the composite \
|
|
418
420
|
with every split of the value.
|
|
419
421
|
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
420
422
|
composite so as to compare it as a regex to the value.
|
|
421
423
|
|
|
422
|
-
If the `project_id` is not found, the function raises a
|
|
424
|
+
If the `project_id` is not found, the function raises a EsgvocNotFoundError.
|
|
423
425
|
|
|
424
426
|
:param value: A value to be validated
|
|
425
427
|
:type value: str
|
|
@@ -427,7 +429,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
427
429
|
:type project_id: str
|
|
428
430
|
:returns: The list of terms that the value matches.
|
|
429
431
|
:rtype: list[MatchingTerm]
|
|
430
|
-
:raises
|
|
432
|
+
:raises EsgvocNotFoundError: If the `project_id` is not found
|
|
431
433
|
"""
|
|
432
434
|
with get_universe_session() as universe_session, \
|
|
433
435
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -438,11 +440,11 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
438
440
|
"""
|
|
439
441
|
Check if the given value may or may not represent a term in all projects. The function
|
|
440
442
|
returns the terms that the value matches.
|
|
441
|
-
|
|
443
|
+
|
|
442
444
|
Behavior based on the nature of the term:
|
|
443
445
|
- plain term: the function try to match the value on the drs_name field.
|
|
444
|
-
- term
|
|
445
|
-
- term
|
|
446
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
447
|
+
- composite term:
|
|
446
448
|
- if the composite has got a separator, the function splits the value according to the \
|
|
447
449
|
separator of the term then it try to match every part of the composite \
|
|
448
450
|
with every split of the value.
|
|
@@ -463,422 +465,701 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
463
465
|
return result
|
|
464
466
|
|
|
465
467
|
|
|
466
|
-
def
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
468
|
+
def get_all_terms_in_collection(project_id: str,
|
|
469
|
+
collection_id: str,
|
|
470
|
+
selected_term_fields: Iterable[str] | None = None)\
|
|
471
|
+
-> list[DataDescriptor]:
|
|
472
|
+
"""
|
|
473
|
+
Gets all terms of the given collection of a project.
|
|
474
|
+
This function performs an exact match on the `project_id` and `collection_id`,
|
|
475
|
+
and does not search for similar or related projects and collections.
|
|
476
|
+
If any of the provided ids (`project_id` or `collection_id`) is not found, the function
|
|
477
|
+
returns an empty list.
|
|
478
|
+
|
|
479
|
+
:param project_id: A project id
|
|
480
|
+
:type project_id: str
|
|
481
|
+
:param collection_id: A collection id
|
|
482
|
+
:type collection_id: str
|
|
483
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
484
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
485
|
+
:type selected_term_fields: Iterable[str] | None
|
|
486
|
+
:returns: a list of term instances. Returns an empty list if no matches are found.
|
|
487
|
+
:rtype: list[DataDescriptor]
|
|
488
|
+
"""
|
|
489
|
+
result = list()
|
|
490
|
+
if connection := _get_project_connection(project_id):
|
|
491
|
+
with connection.create_session() as session:
|
|
492
|
+
collection = _get_collection_in_project(collection_id, session)
|
|
493
|
+
if collection:
|
|
494
|
+
result = _get_all_terms_in_collection(collection, selected_term_fields)
|
|
478
495
|
return result
|
|
479
496
|
|
|
480
497
|
|
|
481
|
-
def
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
If the
|
|
493
|
-
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
494
|
-
the function returns an empty list.
|
|
495
|
-
|
|
496
|
-
Behavior based on search type:
|
|
497
|
-
- `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
|
|
498
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
499
|
-
Pydantic term instances in the list.
|
|
498
|
+
def _get_all_collections_in_project(session: Session) -> list[Collection]:
|
|
499
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
500
|
+
# Project can't be missing if session exists.
|
|
501
|
+
return project.collections # type: ignore
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
505
|
+
"""
|
|
506
|
+
Gets all collections of the given project.
|
|
507
|
+
This function performs an exact match on the `project_id` and
|
|
508
|
+
does not search for similar or related projects.
|
|
509
|
+
If the provided `project_id` is not found, the function returns an empty list.
|
|
500
510
|
|
|
501
511
|
:param project_id: A project id
|
|
502
512
|
:type project_id: str
|
|
503
|
-
:
|
|
504
|
-
:
|
|
505
|
-
:param term_id: A term id to be found
|
|
506
|
-
:type term_id: str
|
|
507
|
-
:param settings: The search settings
|
|
508
|
-
:type settings: SearchSettings|None
|
|
509
|
-
:returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
|
|
510
|
-
:rtype: list[BaseModel]
|
|
513
|
+
:returns: A list of collection ids. Returns an empty list if no matches are found.
|
|
514
|
+
:rtype: list[str]
|
|
511
515
|
"""
|
|
512
|
-
result
|
|
513
|
-
if connection:=_get_project_connection(project_id):
|
|
516
|
+
result = list()
|
|
517
|
+
if connection := _get_project_connection(project_id):
|
|
514
518
|
with connection.create_session() as session:
|
|
515
|
-
|
|
516
|
-
|
|
519
|
+
collections = _get_all_collections_in_project(session)
|
|
520
|
+
for collection in collections:
|
|
521
|
+
result.append(collection.id)
|
|
517
522
|
return result
|
|
518
523
|
|
|
519
524
|
|
|
520
|
-
def
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
-> Sequence[PTerm]:
|
|
525
|
-
# Settings only apply on the term_id comparison.
|
|
526
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
527
|
-
value=term_id,
|
|
528
|
-
settings=settings)
|
|
529
|
-
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
|
|
530
|
-
where_expression)
|
|
531
|
-
results = session.exec(statement)
|
|
532
|
-
result = results.all()
|
|
525
|
+
def _get_all_terms_in_collection(collection: Collection,
|
|
526
|
+
selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
|
|
527
|
+
result: list[DataDescriptor] = list()
|
|
528
|
+
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
533
529
|
return result
|
|
534
530
|
|
|
535
531
|
|
|
536
|
-
def
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
545
|
-
and does **not** search for similar or related projects and data descriptors.
|
|
546
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
547
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
548
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
549
|
-
If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
|
|
550
|
-
the function returns an empty list.
|
|
551
|
-
|
|
552
|
-
Behavior based on search type:
|
|
553
|
-
- `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
|
|
554
|
-
collection id in the list.
|
|
555
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
556
|
-
Pydantic term instances and collection ids in the list.
|
|
532
|
+
def get_all_terms_in_project(project_id: str,
|
|
533
|
+
selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
|
|
534
|
+
"""
|
|
535
|
+
Gets all terms of the given project.
|
|
536
|
+
This function performs an exact match on the `project_id` and
|
|
537
|
+
does not search for similar or related projects.
|
|
538
|
+
Terms are unique within a collection but may have some synonyms in a project.
|
|
539
|
+
If the provided `project_id` is not found, the function returns an empty list.
|
|
557
540
|
|
|
558
541
|
:param project_id: A project id
|
|
559
542
|
:type project_id: str
|
|
560
|
-
:param
|
|
561
|
-
|
|
562
|
-
:
|
|
563
|
-
:
|
|
564
|
-
:
|
|
565
|
-
:type settings: SearchSettings|None
|
|
566
|
-
:returns: A list of tuple of Pydantic term instances and related collection ids. \
|
|
567
|
-
Returns an empty list if no matches are found.
|
|
568
|
-
:rtype: list[tuple[BaseModel, str]]
|
|
543
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
544
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
545
|
+
:type selected_term_fields: Iterable[str] | None
|
|
546
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
547
|
+
:rtype: list[DataDescriptor]
|
|
569
548
|
"""
|
|
570
549
|
result = list()
|
|
571
|
-
if connection:=_get_project_connection(project_id):
|
|
550
|
+
if connection := _get_project_connection(project_id):
|
|
572
551
|
with connection.create_session() as session:
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
for pterm in terms:
|
|
578
|
-
collection_id = pterm.collection.id
|
|
579
|
-
term = instantiate_pydantic_term(pterm)
|
|
580
|
-
result.append((term, collection_id))
|
|
552
|
+
collections = _get_all_collections_in_project(session)
|
|
553
|
+
for collection in collections:
|
|
554
|
+
# Term may have some synonyms in a project.
|
|
555
|
+
result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
|
|
581
556
|
return result
|
|
582
557
|
|
|
583
558
|
|
|
584
|
-
def
|
|
585
|
-
|
|
586
|
-
settings: SearchSettings|None = None) \
|
|
587
|
-
-> list[tuple[list[tuple[BaseModel, str]], str]]:
|
|
559
|
+
def get_all_terms_in_all_projects(selected_term_fields: Iterable[str] | None = None) \
|
|
560
|
+
-> list[tuple[str, list[DataDescriptor]]]:
|
|
588
561
|
"""
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
|
|
597
|
-
the function returns an empty list.
|
|
598
|
-
|
|
599
|
-
Behavior based on search type:
|
|
600
|
-
- `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
|
|
601
|
-
collection id in the list.
|
|
602
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
603
|
-
Pydantic term instances and collection ids in the list.
|
|
604
|
-
|
|
605
|
-
:param data_descriptor_id: A data descriptor
|
|
606
|
-
:type data_descriptor_id: str
|
|
607
|
-
:param term_id: A term id to be found
|
|
608
|
-
:type term_id: str
|
|
609
|
-
:param settings: The search settings
|
|
610
|
-
:type settings: SearchSettings|None
|
|
611
|
-
:returns: A list of tuple of matching terms with their collection id, per project. \
|
|
612
|
-
Returns an empty list if no matches are found.
|
|
613
|
-
:rtype: list[tuple[list[tuple[BaseModel, str]], str]]
|
|
562
|
+
Gets all terms of all projects.
|
|
563
|
+
|
|
564
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
565
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
566
|
+
:type selected_term_fields: Iterable[str] | None
|
|
567
|
+
:returns: A list of tuple project_id and term instances of that project.
|
|
568
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
614
569
|
"""
|
|
615
570
|
project_ids = get_all_projects()
|
|
616
|
-
result
|
|
571
|
+
result = list()
|
|
617
572
|
for project_id in project_ids:
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
term_id,
|
|
621
|
-
settings)
|
|
622
|
-
result.append((matching_terms, project_id))
|
|
573
|
+
terms = get_all_terms_in_project(project_id, selected_term_fields)
|
|
574
|
+
result.append((project_id, terms))
|
|
623
575
|
return result
|
|
624
576
|
|
|
625
577
|
|
|
626
|
-
def
|
|
627
|
-
session: Session,
|
|
628
|
-
settings: SearchSettings|None) -> Sequence[PTerm]:
|
|
629
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
630
|
-
value=term_id,
|
|
631
|
-
settings=settings)
|
|
632
|
-
statement = select(PTerm).where(where_expression)
|
|
633
|
-
results = session.exec(statement).all()
|
|
634
|
-
return results
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
def find_terms_in_all_projects(term_id: str,
|
|
638
|
-
settings: SearchSettings|None = None) \
|
|
639
|
-
-> list[BaseModel]:
|
|
640
|
-
"""
|
|
641
|
-
Finds one or more terms, based on the specified search settings, in all projects.
|
|
642
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
643
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
644
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
645
|
-
Terms are unique within a collection but may have some synonyms within a project.
|
|
646
|
-
If the provided `term_id` is not found, the function returns an empty list.
|
|
647
|
-
|
|
648
|
-
:param term_id: A term id to be found
|
|
649
|
-
:type term_id: str
|
|
650
|
-
:param settings: The search settings
|
|
651
|
-
:type settings: SearchSettings|None
|
|
652
|
-
:returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
|
|
653
|
-
:rtype: list[BaseModel]
|
|
578
|
+
def get_all_projects() -> list[str]:
|
|
654
579
|
"""
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
580
|
+
Gets all projects.
|
|
581
|
+
|
|
582
|
+
:returns: A list of project ids.
|
|
583
|
+
:rtype: list[str]
|
|
584
|
+
"""
|
|
585
|
+
return list(service.current_state.projects.keys())
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
|
|
589
|
+
statement = select(PTerm).where(PTerm.id == term_id)
|
|
590
|
+
results = session.exec(statement)
|
|
591
|
+
result = results.first() # Term ids are not supposed to be unique within a project.
|
|
659
592
|
return result
|
|
660
593
|
|
|
661
594
|
|
|
662
|
-
def
|
|
663
|
-
|
|
664
|
-
settings: SearchSettings|None = None) \
|
|
665
|
-
-> list[BaseModel]:
|
|
595
|
+
def get_term_in_project(project_id: str, term_id: str,
|
|
596
|
+
selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
|
|
666
597
|
"""
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
If the
|
|
673
|
-
|
|
674
|
-
If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
|
|
675
|
-
an empty list.
|
|
598
|
+
Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
|
|
599
|
+
the given term id.
|
|
600
|
+
Terms are unique within a collection but may have some synonyms in a project.
|
|
601
|
+
This function performs an exact match on the `project_id` and `term_id`, and does not search
|
|
602
|
+
for similar or related projects and terms.
|
|
603
|
+
If any of the provided ids (`project_id` or `term_id`) is not found,
|
|
604
|
+
the function returns `None`.
|
|
676
605
|
|
|
677
|
-
:param project_id:
|
|
606
|
+
:param project_id: The id of the given project.
|
|
678
607
|
:type project_id: str
|
|
679
|
-
:param term_id:
|
|
608
|
+
:param term_id: The id of a term to be found.
|
|
680
609
|
:type term_id: str
|
|
681
|
-
:param
|
|
682
|
-
|
|
683
|
-
:
|
|
684
|
-
:
|
|
610
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
611
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
612
|
+
:type selected_term_fields: Iterable[str] | None
|
|
613
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
614
|
+
:rtype: DataDescriptor | None
|
|
685
615
|
"""
|
|
686
|
-
result:
|
|
687
|
-
if connection:=_get_project_connection(project_id):
|
|
616
|
+
result: DataDescriptor | None = None
|
|
617
|
+
if connection := _get_project_connection(project_id):
|
|
688
618
|
with connection.create_session() as session:
|
|
689
|
-
|
|
690
|
-
|
|
619
|
+
term_found = _get_term_in_project(term_id, session)
|
|
620
|
+
if term_found:
|
|
621
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
691
622
|
return result
|
|
692
623
|
|
|
693
624
|
|
|
694
|
-
def
|
|
695
|
-
|
|
696
|
-
|
|
625
|
+
def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
|
|
626
|
+
statement = select(PTerm).join(Collection).where(Collection.id == collection_id,
|
|
627
|
+
PTerm.id == term_id)
|
|
628
|
+
results = session.exec(statement)
|
|
629
|
+
result = results.one_or_none()
|
|
630
|
+
return result
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
|
|
634
|
+
selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
|
|
697
635
|
"""
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
636
|
+
Returns the term, in the given project and collection,
|
|
637
|
+
whose id corresponds exactly to the given term id.
|
|
638
|
+
This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
|
|
639
|
+
and does not search for similar or related projects, collections and terms.
|
|
640
|
+
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
641
|
+
the function returns `None`.
|
|
703
642
|
|
|
704
|
-
:param project_id:
|
|
643
|
+
:param project_id: The id of the given project.
|
|
705
644
|
:type project_id: str
|
|
706
|
-
:param collection_id:
|
|
645
|
+
:param collection_id: The id of the given collection.
|
|
707
646
|
:type collection_id: str
|
|
708
|
-
:
|
|
709
|
-
:
|
|
647
|
+
:param term_id: The id of a term to be found.
|
|
648
|
+
:type term_id: str
|
|
649
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
650
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
651
|
+
:type selected_term_fields: Iterable[str] | None
|
|
652
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
653
|
+
:rtype: DataDescriptor | None
|
|
710
654
|
"""
|
|
711
|
-
result =
|
|
712
|
-
if connection:=_get_project_connection(project_id):
|
|
655
|
+
result: DataDescriptor | None = None
|
|
656
|
+
if connection := _get_project_connection(project_id):
|
|
713
657
|
with connection.create_session() as session:
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
if collections:
|
|
718
|
-
collection = collections[0]
|
|
719
|
-
result = _get_all_terms_in_collection(collection)
|
|
658
|
+
term_found = _get_term_in_collection(collection_id, term_id, session)
|
|
659
|
+
if term_found:
|
|
660
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
720
661
|
return result
|
|
721
662
|
|
|
722
663
|
|
|
723
|
-
def
|
|
724
|
-
|
|
725
|
-
settings: SearchSettings|None) \
|
|
726
|
-
-> Sequence[Collection]:
|
|
727
|
-
where_exp = _create_str_comparison_expression(field=Collection.id,
|
|
728
|
-
value=collection_id,
|
|
729
|
-
settings=settings)
|
|
730
|
-
statement = select(Collection).where(where_exp)
|
|
664
|
+
def _get_collection_in_project(collection_id: str, session: Session) -> Collection | None:
|
|
665
|
+
statement = select(Collection).where(Collection.id == collection_id)
|
|
731
666
|
results = session.exec(statement)
|
|
732
|
-
result = results.
|
|
667
|
+
result = results.one_or_none()
|
|
733
668
|
return result
|
|
734
669
|
|
|
735
670
|
|
|
736
|
-
def
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
The given `collection_id` is searched according to the search type specified in
|
|
745
|
-
the parameter `settings`,
|
|
746
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
747
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
|
|
748
|
-
If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
|
|
749
|
-
an empty list.
|
|
750
|
-
|
|
751
|
-
Behavior based on search type:
|
|
752
|
-
- `EXACT` and absence of `settings`: returns zero or one collection context in the list.
|
|
753
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
754
|
-
collection contexts in the list.
|
|
671
|
+
def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
|
|
672
|
+
"""
|
|
673
|
+
Returns the collection, in the given project, whose id corresponds exactly to
|
|
674
|
+
the given collection id.
|
|
675
|
+
This function performs an exact match on the `project_id` and `collection_id`, and does not search
|
|
676
|
+
for similar or related projects and collections.
|
|
677
|
+
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
678
|
+
the function returns `None`.
|
|
755
679
|
|
|
756
|
-
:param project_id:
|
|
680
|
+
:param project_id: The id of the given project.
|
|
757
681
|
:type project_id: str
|
|
758
|
-
:param collection_id:
|
|
682
|
+
:param collection_id: The id of a collection to be found.
|
|
759
683
|
:type collection_id: str
|
|
760
|
-
:
|
|
761
|
-
:
|
|
762
|
-
:returns: A list of collection contexts. Returns an empty list if no matches are found.
|
|
763
|
-
:rtype: list[dict]
|
|
684
|
+
:returns: A collection id and context. Returns `None` if no match is found.
|
|
685
|
+
:rtype: tuple[str, dict] | None
|
|
764
686
|
"""
|
|
765
|
-
result =
|
|
766
|
-
if connection:=_get_project_connection(project_id):
|
|
687
|
+
result: tuple[str, dict] | None = None
|
|
688
|
+
if connection := _get_project_connection(project_id):
|
|
767
689
|
with connection.create_session() as session:
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
for collection in collections:
|
|
772
|
-
result.append(collection.context)
|
|
690
|
+
collection_found = _get_collection_in_project(collection_id, session)
|
|
691
|
+
if collection_found:
|
|
692
|
+
result = collection_found.id, collection_found.context
|
|
773
693
|
return result
|
|
774
694
|
|
|
775
695
|
|
|
776
|
-
def
|
|
777
|
-
project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
|
|
778
|
-
# Project can't be missing if session exists.
|
|
779
|
-
return project.collections # type: ignore
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
696
|
+
def get_project(project_id: str) -> ProjectSpecs | None:
|
|
783
697
|
"""
|
|
784
|
-
|
|
785
|
-
This function performs an exact match on the `project_id` and
|
|
786
|
-
does
|
|
787
|
-
If the provided `project_id` is not found, the function returns
|
|
698
|
+
Get a project and returns its specifications.
|
|
699
|
+
This function performs an exact match on the `project_id` and
|
|
700
|
+
does not search for similar or related projects.
|
|
701
|
+
If the provided `project_id` is not found, the function returns `None`.
|
|
788
702
|
|
|
789
|
-
:param project_id: A project id
|
|
703
|
+
:param project_id: A project id to be found
|
|
790
704
|
:type project_id: str
|
|
791
|
-
:returns:
|
|
792
|
-
:rtype:
|
|
705
|
+
:returns: The specs of the project found. Returns `None` if no matches are found.
|
|
706
|
+
:rtype: ProjectSpecs | None
|
|
793
707
|
"""
|
|
794
|
-
result =
|
|
795
|
-
if connection:=_get_project_connection(project_id):
|
|
708
|
+
result: ProjectSpecs | None = None
|
|
709
|
+
if connection := _get_project_connection(project_id):
|
|
796
710
|
with connection.create_session() as session:
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
711
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
712
|
+
try:
|
|
713
|
+
# Project can't be missing if session exists.
|
|
714
|
+
result = ProjectSpecs(**project.specs) # type: ignore
|
|
715
|
+
except Exception as e:
|
|
716
|
+
msg = f"unable to read specs in project '{project_id}'"
|
|
717
|
+
raise EsgvocDbError(msg) from e
|
|
800
718
|
return result
|
|
801
719
|
|
|
802
720
|
|
|
803
|
-
def
|
|
804
|
-
|
|
805
|
-
|
|
721
|
+
def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
|
|
722
|
+
session: Session) -> Collection | None:
|
|
723
|
+
statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
|
|
724
|
+
result = session.exec(statement).one_or_none()
|
|
806
725
|
return result
|
|
807
726
|
|
|
808
727
|
|
|
809
|
-
def
|
|
728
|
+
def get_collection_from_data_descriptor_in_project(project_id: str,
|
|
729
|
+
data_descriptor_id: str) \
|
|
730
|
+
-> tuple[str, dict] | None:
|
|
810
731
|
"""
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
If the provided `project_id` is not found,
|
|
816
|
-
|
|
817
|
-
|
|
732
|
+
Returns the collection, in the given project, that corresponds to the given data descriptor
|
|
733
|
+
in the universe.
|
|
734
|
+
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
735
|
+
and does not search for similar or related projects and data descriptors.
|
|
736
|
+
If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
|
|
737
|
+
there is no collection corresponding to the given data descriptor, the function returns `None`.
|
|
738
|
+
|
|
739
|
+
:param project_id: The id of the given project.
|
|
818
740
|
:type project_id: str
|
|
819
|
-
:
|
|
820
|
-
:
|
|
741
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
742
|
+
:type data_descriptor_id: str
|
|
743
|
+
:returns: A collection id and context. Returns `None` if no matches are found.
|
|
744
|
+
:rtype: tuple[str, dict] | None
|
|
745
|
+
"""
|
|
746
|
+
result: tuple[str, dict] | None = None
|
|
747
|
+
if connection := _get_project_connection(project_id):
|
|
748
|
+
with connection.create_session() as session:
|
|
749
|
+
collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
|
|
750
|
+
session)
|
|
751
|
+
if collection_found:
|
|
752
|
+
result = collection_found.id, collection_found.context
|
|
753
|
+
return result
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) \
|
|
757
|
+
-> list[tuple[str, str, dict]]:
|
|
758
|
+
"""
|
|
759
|
+
Returns the collections, in all projects, that correspond to the given data descriptor
|
|
760
|
+
in the universe.
|
|
761
|
+
This function performs an exact match on `data_descriptor_id`,
|
|
762
|
+
and does not search for similar or related data descriptors.
|
|
763
|
+
If the provided `data_descriptor_id` is not found, or if
|
|
764
|
+
there is no collection corresponding to the given data descriptor, the function returns
|
|
765
|
+
an empty list.
|
|
766
|
+
|
|
767
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
768
|
+
:type data_descriptor_id: str
|
|
769
|
+
:returns: A list of collection ids, their project_ids and contexts. \
|
|
770
|
+
Returns an empty list if no matches are found.
|
|
771
|
+
:rtype: list[tuple[str, str, dict]]
|
|
821
772
|
"""
|
|
822
773
|
result = list()
|
|
823
|
-
|
|
774
|
+
project_ids = get_all_projects()
|
|
775
|
+
for project_id in project_ids:
|
|
776
|
+
collection_found = get_collection_from_data_descriptor_in_project(project_id,
|
|
777
|
+
data_descriptor_id)
|
|
778
|
+
if collection_found:
|
|
779
|
+
result.append((project_id, collection_found[0], collection_found[1]))
|
|
780
|
+
return result
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def _get_term_from_universe_term_id_in_project(data_descriptor_id: str,
|
|
784
|
+
universe_term_id: str,
|
|
785
|
+
project_session: Session) -> PTerm | None:
|
|
786
|
+
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id == data_descriptor_id,
|
|
787
|
+
PTerm.id == universe_term_id)
|
|
788
|
+
results = project_session.exec(statement)
|
|
789
|
+
result = results.one_or_none()
|
|
790
|
+
return result
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def get_term_from_universe_term_id_in_project(project_id: str,
|
|
794
|
+
data_descriptor_id: str,
|
|
795
|
+
universe_term_id: str,
|
|
796
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
797
|
+
-> tuple[str, DataDescriptor] | None:
|
|
798
|
+
"""
|
|
799
|
+
Returns the term, in the given project, that corresponds to the given term in the universe.
|
|
800
|
+
This function performs an exact match on the `project_id`, `data_descriptor_id`
|
|
801
|
+
and `universe_term_id`, and does not search for similar or related projects, data descriptors
|
|
802
|
+
and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
|
|
803
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
804
|
+
the function returns `None`.
|
|
805
|
+
|
|
806
|
+
:param project_id: The id of the given project.
|
|
807
|
+
:type project_id: str
|
|
808
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
809
|
+
:type data_descriptor_id: str
|
|
810
|
+
:param universe_term_id: The id of the given universe term.
|
|
811
|
+
:type universe_term_id: str
|
|
812
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
813
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
814
|
+
:type selected_term_fields: Iterable[str] | None
|
|
815
|
+
:returns: A collection id and the project term instance. Returns `None` if no matches are found.
|
|
816
|
+
:rtype: tuple[str, DataDescriptor] | None
|
|
817
|
+
"""
|
|
818
|
+
result: tuple[str, DataDescriptor] | None = None
|
|
819
|
+
if connection := _get_project_connection(project_id):
|
|
824
820
|
with connection.create_session() as session:
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
821
|
+
term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
|
|
822
|
+
universe_term_id,
|
|
823
|
+
session)
|
|
824
|
+
if term_found:
|
|
825
|
+
pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
826
|
+
result = (term_found.collection.id, pydantic_term)
|
|
829
827
|
return result
|
|
830
828
|
|
|
831
829
|
|
|
832
|
-
def
|
|
830
|
+
def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
|
|
831
|
+
universe_term_id: str,
|
|
832
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
833
|
+
-> list[tuple[str, str, DataDescriptor]]:
|
|
833
834
|
"""
|
|
834
|
-
|
|
835
|
+
Returns the terms, in all projects, that correspond to the given term in the universe.
|
|
836
|
+
This function performs an exact match on the `data_descriptor_id`
|
|
837
|
+
and `universe_term_id`, and does not search for similar or related data descriptors
|
|
838
|
+
and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
|
|
839
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
840
|
+
the function returns an empty list.
|
|
835
841
|
|
|
836
|
-
:
|
|
837
|
-
:
|
|
842
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
843
|
+
:type data_descriptor_id: str
|
|
844
|
+
:param universe_term_id: The id of the given universe term.
|
|
845
|
+
:type universe_term_id: str
|
|
846
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
847
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
848
|
+
:type selected_term_fields: Iterable[str] | None
|
|
849
|
+
:returns: A project_id, collection id and the project term instance. \
|
|
850
|
+
Returns an empty list if no matches are found.
|
|
851
|
+
:rtype: list[tuple[str, str, DataDescriptor]]
|
|
838
852
|
"""
|
|
853
|
+
result: list[tuple[str, str, DataDescriptor]] = list()
|
|
839
854
|
project_ids = get_all_projects()
|
|
840
|
-
result = list()
|
|
841
855
|
for project_id in project_ids:
|
|
842
|
-
|
|
856
|
+
term_found = get_term_from_universe_term_id_in_project(project_id,
|
|
857
|
+
data_descriptor_id,
|
|
858
|
+
universe_term_id,
|
|
859
|
+
selected_term_fields)
|
|
860
|
+
if term_found:
|
|
861
|
+
result.append((project_id, term_found[0], term_found[1]))
|
|
843
862
|
return result
|
|
844
863
|
|
|
845
864
|
|
|
846
|
-
def
|
|
865
|
+
def _find_collections_in_project(expression: str,
|
|
866
|
+
session: Session,
|
|
867
|
+
only_id: bool = False,
|
|
868
|
+
limit: int | None = None,
|
|
869
|
+
offset: int | None = None) -> Sequence[Collection]:
|
|
870
|
+
matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
|
|
871
|
+
tmp_statement = select(PCollectionFTS5).where(matching_condition)
|
|
872
|
+
statement = select(Collection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
873
|
+
return execute_match_statement(expression, statement, session)
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def find_collections_in_project(expression: str, project_id: str,
|
|
877
|
+
only_id: bool = False,
|
|
878
|
+
limit: int | None = None,
|
|
879
|
+
offset: int | None = None) -> list[tuple[str, dict]]:
|
|
847
880
|
"""
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
881
|
+
Find collections in the given project based on a full text search defined by the given `expression`.
|
|
882
|
+
The `expression` comes from the powerful
|
|
883
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
884
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
885
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
886
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
887
|
+
with the wildcard `*`.
|
|
888
|
+
The function returns a list of collection ids and contexts, sorted according to the
|
|
889
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
890
|
+
This function performs an exact match on the `project_id`,
|
|
891
|
+
and does not search for similar or related projects.
|
|
892
|
+
If the provided `expression` does not hit any collection or the given `project_id` does not
|
|
893
|
+
match exactly to an id of a project, the function returns an empty list.
|
|
894
|
+
The function searches for the `expression` in the collection specifications.
|
|
895
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
896
|
+
collections. **At the moment, `only_id` is set to `True` as the collections
|
|
897
|
+
haven't got any description.**
|
|
898
|
+
|
|
899
|
+
:param expression: The full text search expression.
|
|
900
|
+
:type expression: str
|
|
901
|
+
:param project_id: The id of the given project.
|
|
854
902
|
:type project_id: str
|
|
855
|
-
:
|
|
856
|
-
:
|
|
903
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
904
|
+
:type only_id: bool
|
|
905
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
906
|
+
`limit` is either `None`, zero or negative.
|
|
907
|
+
:type limit: int | None
|
|
908
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
909
|
+
either `None`, zero or negative.
|
|
910
|
+
:type offset: int | None
|
|
911
|
+
:returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
|
|
912
|
+
:rtype: list[tuple[str, dict]]
|
|
913
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
857
914
|
"""
|
|
858
|
-
result =
|
|
859
|
-
if connection:=_get_project_connection(project_id):
|
|
915
|
+
result: list[tuple[str, dict]] = list()
|
|
916
|
+
if connection := _get_project_connection(project_id):
|
|
860
917
|
with connection.create_session() as session:
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
918
|
+
collections_found = _find_collections_in_project(expression, session, only_id,
|
|
919
|
+
limit, offset)
|
|
920
|
+
for collection in collections_found:
|
|
921
|
+
result.append((collection.id, collection.context))
|
|
864
922
|
return result
|
|
865
923
|
|
|
866
924
|
|
|
867
|
-
def
|
|
925
|
+
def _find_terms_in_collection(expression: str,
|
|
926
|
+
collection_id: str,
|
|
927
|
+
session: Session,
|
|
928
|
+
only_id: bool = False,
|
|
929
|
+
limit: int | None = None,
|
|
930
|
+
offset: int | None = None) -> Sequence[PTerm]:
|
|
931
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
932
|
+
where_condition = Collection.id == collection_id, matching_condition
|
|
933
|
+
tmp_statement = select(PTermFTS5).join(Collection).where(*where_condition)
|
|
934
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
935
|
+
return execute_match_statement(expression, statement, session)
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def _find_terms_in_project(expression: str,
|
|
939
|
+
session: Session,
|
|
940
|
+
only_id: bool = False,
|
|
941
|
+
limit: int | None = None,
|
|
942
|
+
offset: int | None = None) -> Sequence[PTerm]:
|
|
943
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
944
|
+
tmp_statement = select(PTermFTS5).where(matching_condition)
|
|
945
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
946
|
+
return execute_match_statement(expression, statement, session)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
def find_terms_in_collection(expression: str, project_id: str,
|
|
950
|
+
collection_id: str,
|
|
951
|
+
only_id: bool = False,
|
|
952
|
+
limit: int | None = None,
|
|
953
|
+
offset: int | None = None,
|
|
954
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
955
|
+
-> list[DataDescriptor]:
|
|
868
956
|
"""
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
957
|
+
Find terms in the given project and collection based on a full text search defined by the given
|
|
958
|
+
`expression`. The `expression` comes from the powerful
|
|
959
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
960
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
961
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
962
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
963
|
+
with the wildcard `*`.
|
|
964
|
+
The function returns a list of term instances, sorted according to the
|
|
965
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
966
|
+
This function performs an exact match on the `project_id` and `collection_id`,
|
|
967
|
+
and does not search for similar or related projects and collections.
|
|
968
|
+
If the provided `expression` does not hit any term or if any of the provided ids
|
|
969
|
+
(`project_id` or `collection_id`) is not found, the function returns an empty list.
|
|
970
|
+
The function searches for the `expression` in the term specifications.
|
|
971
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
972
|
+
terms.
|
|
973
|
+
|
|
974
|
+
:param expression: The full text search expression.
|
|
975
|
+
:type expression: str
|
|
976
|
+
:param project_id: The id of the given project.
|
|
977
|
+
:type project_id: str
|
|
978
|
+
:param collection_id: The id of the given collection.
|
|
979
|
+
:type collection_id: str
|
|
980
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
981
|
+
:type only_id: bool
|
|
982
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
983
|
+
`limit` is either `None`, zero or negative.
|
|
984
|
+
:type limit: int | None
|
|
985
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
986
|
+
either `None`, zero or negative.
|
|
987
|
+
:type offset: int | None
|
|
988
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
989
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
990
|
+
:type selected_term_fields: Iterable[str] | None
|
|
991
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
992
|
+
:rtype: list[DataDescriptor]
|
|
993
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
873
994
|
"""
|
|
874
|
-
|
|
995
|
+
result: list[DataDescriptor] = list()
|
|
996
|
+
if connection := _get_project_connection(project_id):
|
|
997
|
+
with connection.create_session() as session:
|
|
998
|
+
pterms_found = _find_terms_in_collection(expression, collection_id, session,
|
|
999
|
+
only_id, limit, offset)
|
|
1000
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1001
|
+
return result
|
|
875
1002
|
|
|
876
1003
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
1004
|
+
def find_terms_in_project(expression: str,
|
|
1005
|
+
project_id: str,
|
|
1006
|
+
only_id: bool = False,
|
|
1007
|
+
limit: int | None = None,
|
|
1008
|
+
offset: int | None = None,
|
|
1009
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
1010
|
+
-> list[DataDescriptor]:
|
|
1011
|
+
"""
|
|
1012
|
+
Find terms in the given project on a full text search defined by the given
|
|
1013
|
+
`expression`. The `expression` comes from the powerful
|
|
1014
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1015
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1016
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1017
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1018
|
+
with the wildcard `*`.
|
|
1019
|
+
The function returns a list of term instances, sorted according to the
|
|
1020
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1021
|
+
This function performs an exact match on the `project_id`,
|
|
1022
|
+
and does not search for similar or related projects.
|
|
1023
|
+
If the provided `expression` does not hit any term or if any of the provided `project_id` is
|
|
1024
|
+
not found, the function returns an empty list.
|
|
1025
|
+
The function searches for the `expression` in the term specifications.
|
|
1026
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1027
|
+
terms.
|
|
1028
|
+
|
|
1029
|
+
:param expression: The full text search expression.
|
|
1030
|
+
:type expression: str
|
|
1031
|
+
:param project_id: The id of the given project.
|
|
1032
|
+
:type project_id: str
|
|
1033
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1034
|
+
:type only_id: bool
|
|
1035
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1036
|
+
`limit` is either `None`, zero or negative.
|
|
1037
|
+
:type limit: int | None
|
|
1038
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1039
|
+
either `None`, zero or negative.
|
|
1040
|
+
:type offset: int | None
|
|
1041
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
1042
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1043
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1044
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
1045
|
+
:rtype: list[DataDescriptor]
|
|
1046
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1047
|
+
"""
|
|
1048
|
+
result: list[DataDescriptor] = list()
|
|
1049
|
+
if connection := _get_project_connection(project_id):
|
|
1050
|
+
with connection.create_session() as session:
|
|
1051
|
+
pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
|
|
1052
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1053
|
+
return result
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def find_terms_in_all_projects(expression: str,
|
|
1057
|
+
only_id: bool = False,
|
|
1058
|
+
limit: int | None = None,
|
|
1059
|
+
offset: int | None = None,
|
|
1060
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
1061
|
+
-> list[tuple[str, list[DataDescriptor]]]:
|
|
1062
|
+
"""
|
|
1063
|
+
Find terms in the all projects on a full text search defined by the given
|
|
1064
|
+
`expression`. The `expression` comes from the powerful
|
|
1065
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1066
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1067
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1068
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1069
|
+
with the wildcard `*`.
|
|
1070
|
+
The function returns a list of project ids and term instances, sorted according to the
|
|
1071
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1072
|
+
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
1073
|
+
The function searches for the `expression` in the term specifications.
|
|
1074
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1075
|
+
terms.
|
|
1076
|
+
|
|
1077
|
+
:param expression: The full text search expression.
|
|
1078
|
+
:type expression: str
|
|
1079
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1080
|
+
:type only_id: bool
|
|
1081
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1082
|
+
`limit` is either `None`, zero or negative.
|
|
1083
|
+
:type limit: int | None
|
|
1084
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1085
|
+
either `None`, zero or negative.
|
|
1086
|
+
:type offset: int | None
|
|
1087
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
1088
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1089
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1090
|
+
:returns: A list of project ids and term instances. Returns an empty list if no matches are found.
|
|
1091
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
1092
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1093
|
+
"""
|
|
1094
|
+
result: list[tuple[str, list[DataDescriptor]]] = list()
|
|
1095
|
+
project_ids = get_all_projects()
|
|
1096
|
+
for project_id in project_ids:
|
|
1097
|
+
terms_found = find_terms_in_project(expression, project_id, only_id,
|
|
1098
|
+
limit, offset, selected_term_fields)
|
|
1099
|
+
if terms_found:
|
|
1100
|
+
result.append((project_id, terms_found))
|
|
1101
|
+
return result
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
def find_items_in_project(expression: str,
|
|
1105
|
+
project_id: str,
|
|
1106
|
+
only_id: bool = False,
|
|
1107
|
+
limit: int | None = None,
|
|
1108
|
+
offset: int | None = None) -> list[Item]:
|
|
1109
|
+
"""
|
|
1110
|
+
Find items, at the moment terms and collections, in the given project based on a full-text
|
|
1111
|
+
search defined by the given `expression`. The `expression` comes from the powerful
|
|
1112
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1113
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1114
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1115
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1116
|
+
with the wildcard `*`.
|
|
1117
|
+
The function returns a list of item instances sorted according to the
|
|
1118
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1119
|
+
This function performs an exact match on the `project_id`,
|
|
1120
|
+
and does not search for similar or related projects.
|
|
1121
|
+
If the provided `expression` does not hit any item, or the provided `project_id` is not found,
|
|
1122
|
+
the function returns an empty list.
|
|
1123
|
+
The function searches for the `expression` in the term and collection specifications.
|
|
1124
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1125
|
+
terms and collections. **At the moment, `only_id` is set to `True` for the collections because
|
|
1126
|
+
they haven't got any description.**
|
|
1127
|
+
|
|
1128
|
+
:param expression: The full text search expression.
|
|
1129
|
+
:type expression: str
|
|
1130
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1131
|
+
:type only_id: bool
|
|
1132
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1133
|
+
`limit` is either `None`, zero or negative.
|
|
1134
|
+
:type limit: int | None
|
|
1135
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1136
|
+
either `None`, zero or negative.
|
|
1137
|
+
:type offset: int | None
|
|
1138
|
+
:returns: A list of item instances. Returns an empty list if no matches are found.
|
|
1139
|
+
:rtype: list[Item]
|
|
1140
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1141
|
+
"""
|
|
1142
|
+
# TODO: execute union query when it will be possible to compute parent of terms and collections.
|
|
1143
|
+
result = list()
|
|
1144
|
+
if connection := _get_project_connection(project_id):
|
|
1145
|
+
with connection.create_session() as session:
|
|
1146
|
+
if only_id:
|
|
1147
|
+
collection_column = col(PCollectionFTS5.id)
|
|
1148
|
+
term_column = col(PTermFTS5.id)
|
|
1149
|
+
else:
|
|
1150
|
+
collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
|
|
1151
|
+
term_column = col(PTermFTS5.specs) # type: ignore
|
|
1152
|
+
collection_where_condition = collection_column.match(expression)
|
|
1153
|
+
collection_statement = select(PCollectionFTS5.id,
|
|
1154
|
+
text("'collection' AS TYPE"),
|
|
1155
|
+
text(f"'{project_id}' AS TYPE"),
|
|
1156
|
+
text('rank')).where(collection_where_condition)
|
|
1157
|
+
term_where_condition = term_column.match(expression)
|
|
1158
|
+
term_statement = select(PTermFTS5.id,
|
|
1159
|
+
text("'term' AS TYPE"),
|
|
1160
|
+
Collection.id,
|
|
1161
|
+
text('rank')).join(Collection) \
|
|
1162
|
+
.where(term_where_condition)
|
|
1163
|
+
result = execute_find_item_statements(session, expression, collection_statement,
|
|
1164
|
+
term_statement, limit, offset)
|
|
1165
|
+
return result
|