esgvoc 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/project_specs.py +3 -2
- esgvoc/api/projects.py +671 -406
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +141 -98
- esgvoc/api/universe.py +353 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/get.py +9 -6
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +50 -8
- esgvoc/core/db/models/universe.py +48 -9
- esgvoc/core/db/project_ingestion.py +60 -46
- esgvoc/core/db/universe_ingestion.py +55 -27
- esgvoc/core/exceptions.py +33 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/METADATA +1 -1
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/RECORD +28 -26
- esgvoc/api/_utils.py +0 -53
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py
CHANGED
|
@@ -1,31 +1,44 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from
|
|
2
|
+
from typing import Iterable, Sequence
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from sqlalchemy import text
|
|
5
|
+
from sqlmodel import Session, and_, col, select
|
|
5
6
|
|
|
6
7
|
import esgvoc.api.universe as universe
|
|
7
8
|
import esgvoc.core.constants as constants
|
|
8
9
|
import esgvoc.core.service as service
|
|
9
|
-
from esgvoc.api._utils import (APIException, get_universe_session,
|
|
10
|
-
instantiate_pydantic_term,
|
|
11
|
-
instantiate_pydantic_terms)
|
|
12
10
|
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
13
11
|
from esgvoc.api.project_specs import ProjectSpecs
|
|
14
|
-
from esgvoc.api.report import
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
|
|
13
|
+
from esgvoc.api.search import (
|
|
14
|
+
Item,
|
|
15
|
+
MatchingTerm,
|
|
16
|
+
execute_find_item_statements,
|
|
17
|
+
execute_match_statement,
|
|
18
|
+
generate_matching_condition,
|
|
19
|
+
get_universe_session,
|
|
20
|
+
handle_rank_limit_offset,
|
|
21
|
+
instantiate_pydantic_term,
|
|
22
|
+
instantiate_pydantic_terms,
|
|
23
|
+
)
|
|
18
24
|
from esgvoc.core.db.connection import DBConnection
|
|
19
25
|
from esgvoc.core.db.models.mixins import TermKind
|
|
20
|
-
from esgvoc.core.db.models.project import
|
|
26
|
+
from esgvoc.core.db.models.project import (
|
|
27
|
+
Collection,
|
|
28
|
+
PCollectionFTS5,
|
|
29
|
+
Project,
|
|
30
|
+
PTerm,
|
|
31
|
+
PTermFTS5,
|
|
32
|
+
)
|
|
21
33
|
from esgvoc.core.db.models.universe import UTerm
|
|
34
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
|
|
22
35
|
|
|
23
36
|
# [OPTIMIZATION]
|
|
24
37
|
_VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
|
|
25
|
-
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
|
|
38
|
+
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
|
|
26
39
|
|
|
27
40
|
|
|
28
|
-
def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
41
|
+
def _get_project_connection(project_id: str) -> DBConnection | None:
|
|
29
42
|
if project_id in service.current_state.projects:
|
|
30
43
|
return service.current_state.projects[project_id].db_connection
|
|
31
44
|
else:
|
|
@@ -33,38 +46,36 @@ def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
|
33
46
|
|
|
34
47
|
|
|
35
48
|
def _get_project_session_with_exception(project_id: str) -> Session:
|
|
36
|
-
if connection:=_get_project_connection(project_id):
|
|
49
|
+
if connection := _get_project_connection(project_id):
|
|
37
50
|
project_session = connection.create_session()
|
|
38
51
|
return project_session
|
|
39
52
|
else:
|
|
40
|
-
raise
|
|
53
|
+
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
41
54
|
|
|
42
55
|
|
|
43
56
|
def _resolve_term(composite_term_part: dict,
|
|
44
57
|
universe_session: Session,
|
|
45
|
-
project_session: Session) -> UTerm|PTerm:
|
|
58
|
+
project_session: Session) -> UTerm | PTerm:
|
|
46
59
|
# First find the term in the universe than in the current project
|
|
47
60
|
term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
|
|
48
61
|
term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
return uterms[0]
|
|
62
|
+
uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
|
|
63
|
+
term_id=term_id,
|
|
64
|
+
session=universe_session)
|
|
65
|
+
if uterm:
|
|
66
|
+
return uterm
|
|
55
67
|
else:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return pterms[0]
|
|
68
|
+
pterm = _get_term_in_collection(collection_id=term_type,
|
|
69
|
+
term_id=term_id,
|
|
70
|
+
session=project_session)
|
|
71
|
+
if pterm:
|
|
72
|
+
return pterm
|
|
62
73
|
else:
|
|
63
|
-
msg = f
|
|
64
|
-
raise
|
|
74
|
+
msg = f"unable to find the term '{term_id}' in '{term_type}'"
|
|
75
|
+
raise EsgvocNotFoundError(msg)
|
|
65
76
|
|
|
66
77
|
|
|
67
|
-
def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
|
|
78
|
+
def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
|
|
68
79
|
separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
|
|
69
80
|
parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
|
|
70
81
|
return separator, parts
|
|
@@ -73,10 +84,10 @@ def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
|
|
|
73
84
|
# TODO: support optionality of parts of composite.
|
|
74
85
|
# It is backtrack possible for more than one missing parts.
|
|
75
86
|
def _valid_value_composite_term_with_separator(value: str,
|
|
76
|
-
term: UTerm|PTerm,
|
|
87
|
+
term: UTerm | PTerm,
|
|
77
88
|
universe_session: Session,
|
|
78
89
|
project_session: Session)\
|
|
79
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
90
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
80
91
|
result = list()
|
|
81
92
|
separator, parts = _get_composite_term_separator_parts(term)
|
|
82
93
|
if separator in value:
|
|
@@ -99,7 +110,7 @@ def _valid_value_composite_term_with_separator(value: str,
|
|
|
99
110
|
return result
|
|
100
111
|
|
|
101
112
|
|
|
102
|
-
def _transform_to_pattern(term: UTerm|PTerm,
|
|
113
|
+
def _transform_to_pattern(term: UTerm | PTerm,
|
|
103
114
|
universe_session: Session,
|
|
104
115
|
project_session: Session) -> str:
|
|
105
116
|
match term.kind:
|
|
@@ -107,12 +118,12 @@ def _transform_to_pattern(term: UTerm|PTerm,
|
|
|
107
118
|
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
108
119
|
result = term.specs[constants.DRS_SPECS_JSON_KEY]
|
|
109
120
|
else:
|
|
110
|
-
raise
|
|
111
|
-
|
|
121
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
122
|
+
"Can't validate it.")
|
|
112
123
|
case TermKind.PATTERN:
|
|
113
124
|
result = term.specs[constants.PATTERN_JSON_KEY]
|
|
114
125
|
case TermKind.COMPOSITE:
|
|
115
|
-
separator, parts =
|
|
126
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
116
127
|
result = ""
|
|
117
128
|
for part in parts:
|
|
118
129
|
resolved_term = _resolve_term(part, universe_session, project_session)
|
|
@@ -120,17 +131,17 @@ def _transform_to_pattern(term: UTerm|PTerm,
|
|
|
120
131
|
result = f'{result}{pattern}{separator}'
|
|
121
132
|
result = result.rstrip(separator)
|
|
122
133
|
case _:
|
|
123
|
-
raise
|
|
134
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
124
135
|
return result
|
|
125
136
|
|
|
126
137
|
|
|
127
138
|
# TODO: support optionality of parts of composite.
|
|
128
139
|
# It is backtrack possible for more than one missing parts.
|
|
129
140
|
def _valid_value_composite_term_separator_less(value: str,
|
|
130
|
-
term: UTerm|PTerm,
|
|
141
|
+
term: UTerm | PTerm,
|
|
131
142
|
universe_session: Session,
|
|
132
143
|
project_session: Session)\
|
|
133
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
144
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
134
145
|
result = list()
|
|
135
146
|
try:
|
|
136
147
|
pattern = _transform_to_pattern(term, universe_session, project_session)
|
|
@@ -143,22 +154,22 @@ def _valid_value_composite_term_separator_less(value: str,
|
|
|
143
154
|
pattern = f'^{pattern}$'
|
|
144
155
|
regex = re.compile(pattern)
|
|
145
156
|
except Exception as e:
|
|
146
|
-
msg = f
|
|
147
|
-
raise
|
|
157
|
+
msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
|
|
158
|
+
raise EsgvocDbError(msg) from e
|
|
148
159
|
match = regex.match(value)
|
|
149
160
|
if match is None:
|
|
150
161
|
result.append(_create_term_error(value, term))
|
|
151
162
|
return result
|
|
152
163
|
except Exception as e:
|
|
153
|
-
msg = f
|
|
154
|
-
raise
|
|
164
|
+
msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
|
|
165
|
+
raise EsgvocNotImplementedError(msg) from e
|
|
155
166
|
|
|
156
167
|
|
|
157
168
|
def _valid_value_for_composite_term(value: str,
|
|
158
|
-
term: UTerm|PTerm,
|
|
169
|
+
term: UTerm | PTerm,
|
|
159
170
|
universe_session: Session,
|
|
160
171
|
project_session: Session)\
|
|
161
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
172
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
162
173
|
result = list()
|
|
163
174
|
separator, _ = _get_composite_term_separator_parts(term)
|
|
164
175
|
if separator:
|
|
@@ -170,7 +181,7 @@ def _valid_value_for_composite_term(value: str,
|
|
|
170
181
|
return result
|
|
171
182
|
|
|
172
183
|
|
|
173
|
-
def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
|
|
184
|
+
def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
|
|
174
185
|
if isinstance(term, UTerm):
|
|
175
186
|
return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
176
187
|
data_descriptor_id=term.data_descriptor.id)
|
|
@@ -180,9 +191,9 @@ def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|Proje
|
|
|
180
191
|
|
|
181
192
|
|
|
182
193
|
def _valid_value(value: str,
|
|
183
|
-
term: UTerm|PTerm,
|
|
194
|
+
term: UTerm | PTerm,
|
|
184
195
|
universe_session: Session,
|
|
185
|
-
project_session: Session) -> list[UniverseTermError|ProjectTermError]:
|
|
196
|
+
project_session: Session) -> list[UniverseTermError | ProjectTermError]:
|
|
186
197
|
result = list()
|
|
187
198
|
match term.kind:
|
|
188
199
|
case TermKind.PLAIN:
|
|
@@ -190,10 +201,10 @@ def _valid_value(value: str,
|
|
|
190
201
|
if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
|
|
191
202
|
result.append(_create_term_error(value, term))
|
|
192
203
|
else:
|
|
193
|
-
raise
|
|
194
|
-
|
|
204
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
205
|
+
"Can't validate it.")
|
|
195
206
|
case TermKind.PATTERN:
|
|
196
|
-
#
|
|
207
|
+
# TODO: Pattern can be compiled and stored for further matching.
|
|
197
208
|
pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
|
|
198
209
|
if pattern_match is None:
|
|
199
210
|
result.append(_create_term_error(value, term))
|
|
@@ -202,13 +213,13 @@ def _valid_value(value: str,
|
|
|
202
213
|
universe_session,
|
|
203
214
|
project_session))
|
|
204
215
|
case _:
|
|
205
|
-
raise
|
|
216
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
206
217
|
return result
|
|
207
218
|
|
|
208
219
|
|
|
209
220
|
def _check_value(value: str) -> str:
|
|
210
221
|
if not value or value.isspace():
|
|
211
|
-
raise
|
|
222
|
+
raise EsgvocValueError('value should be set')
|
|
212
223
|
else:
|
|
213
224
|
return value
|
|
214
225
|
|
|
@@ -216,7 +227,7 @@ def _check_value(value: str) -> str:
|
|
|
216
227
|
def _search_plain_term_and_valid_value(value: str,
|
|
217
228
|
collection_id: str,
|
|
218
229
|
project_session: Session) \
|
|
219
|
-
-> str|None:
|
|
230
|
+
-> str | None:
|
|
220
231
|
where_expression = and_(Collection.id == collection_id,
|
|
221
232
|
PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
222
233
|
statement = select(PTerm).join(Collection).where(where_expression)
|
|
@@ -239,7 +250,7 @@ def _valid_value_against_all_terms_of_collection(value: str,
|
|
|
239
250
|
result.append(pterm.id)
|
|
240
251
|
return result
|
|
241
252
|
else:
|
|
242
|
-
raise
|
|
253
|
+
raise EsgvocDbError(f"collection '{collection.id}' has no term")
|
|
243
254
|
|
|
244
255
|
|
|
245
256
|
def _valid_value_against_given_term(value: str,
|
|
@@ -248,22 +259,20 @@ def _valid_value_against_given_term(value: str,
|
|
|
248
259
|
term_id: str,
|
|
249
260
|
universe_session: Session,
|
|
250
261
|
project_session: Session)\
|
|
251
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
252
|
-
#
|
|
262
|
+
-> list[UniverseTermError | ProjectTermError]:
|
|
263
|
+
# [OPTIMIZATION]
|
|
253
264
|
key = value + project_id + collection_id + term_id
|
|
254
265
|
if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
|
|
255
266
|
result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
|
|
256
267
|
else:
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
if terms:
|
|
262
|
-
term = terms[0]
|
|
268
|
+
term = _get_term_in_collection(collection_id,
|
|
269
|
+
term_id,
|
|
270
|
+
project_session)
|
|
271
|
+
if term:
|
|
263
272
|
result = _valid_value(value, term, universe_session, project_session)
|
|
264
273
|
else:
|
|
265
|
-
raise
|
|
266
|
-
|
|
274
|
+
raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
|
|
275
|
+
f"in collection '{collection_id}'")
|
|
267
276
|
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
|
|
268
277
|
return result
|
|
269
278
|
|
|
@@ -288,7 +297,7 @@ def valid_term(value: str,
|
|
|
288
297
|
composite so as to compare it as a regex to the value.
|
|
289
298
|
|
|
290
299
|
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
291
|
-
the function raises a
|
|
300
|
+
the function raises a EsgvocNotFoundError.
|
|
292
301
|
|
|
293
302
|
:param value: A value to be validated
|
|
294
303
|
:type value: str
|
|
@@ -300,7 +309,7 @@ def valid_term(value: str,
|
|
|
300
309
|
:type term_id: str
|
|
301
310
|
:returns: A validation report that contains the possible errors
|
|
302
311
|
:rtype: ValidationReport
|
|
303
|
-
:raises
|
|
312
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
304
313
|
"""
|
|
305
314
|
value = _check_value(value)
|
|
306
315
|
with get_universe_session() as universe_session, \
|
|
@@ -316,18 +325,15 @@ def _valid_term_in_collection(value: str,
|
|
|
316
325
|
universe_session: Session,
|
|
317
326
|
project_session: Session) \
|
|
318
327
|
-> list[MatchingTerm]:
|
|
319
|
-
#
|
|
328
|
+
# [OPTIMIZATION]
|
|
320
329
|
key = value + project_id + collection_id
|
|
321
330
|
if key in _VALID_TERM_IN_COLLECTION_CACHE:
|
|
322
331
|
result = _VALID_TERM_IN_COLLECTION_CACHE[key]
|
|
323
332
|
else:
|
|
324
333
|
value = _check_value(value)
|
|
325
334
|
result = list()
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
None)
|
|
329
|
-
if collections:
|
|
330
|
-
collection = collections[0]
|
|
335
|
+
collection = _get_collection_in_project(collection_id, project_session)
|
|
336
|
+
if collection:
|
|
331
337
|
match collection.term_kind:
|
|
332
338
|
case TermKind.PLAIN:
|
|
333
339
|
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
@@ -345,8 +351,8 @@ def _valid_term_in_collection(value: str,
|
|
|
345
351
|
collection_id=collection_id,
|
|
346
352
|
term_id=term_id_found))
|
|
347
353
|
else:
|
|
348
|
-
msg = f
|
|
349
|
-
raise
|
|
354
|
+
msg = f"unable to find collection '{collection_id}'"
|
|
355
|
+
raise EsgvocNotFoundError(msg)
|
|
350
356
|
_VALID_TERM_IN_COLLECTION_CACHE[key] = result
|
|
351
357
|
return result
|
|
352
358
|
|
|
@@ -370,7 +376,7 @@ def valid_term_in_collection(value: str,
|
|
|
370
376
|
composite so as to compare it as a regex to the value.
|
|
371
377
|
|
|
372
378
|
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
373
|
-
the function raises a
|
|
379
|
+
the function raises a EsgvocNotFoundError.
|
|
374
380
|
|
|
375
381
|
:param value: A value to be validated
|
|
376
382
|
:type value: str
|
|
@@ -380,7 +386,7 @@ def valid_term_in_collection(value: str,
|
|
|
380
386
|
:type collection_id: str
|
|
381
387
|
:returns: The list of terms that the value matches.
|
|
382
388
|
:rtype: list[MatchingTerm]
|
|
383
|
-
:raises
|
|
389
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
384
390
|
"""
|
|
385
391
|
with get_universe_session() as universe_session, \
|
|
386
392
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -415,7 +421,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
415
421
|
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
416
422
|
composite so as to compare it as a regex to the value.
|
|
417
423
|
|
|
418
|
-
If the `project_id` is not found, the function raises a
|
|
424
|
+
If the `project_id` is not found, the function raises a EsgvocNotFoundError.
|
|
419
425
|
|
|
420
426
|
:param value: A value to be validated
|
|
421
427
|
:type value: str
|
|
@@ -423,7 +429,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
423
429
|
:type project_id: str
|
|
424
430
|
:returns: The list of terms that the value matches.
|
|
425
431
|
:rtype: list[MatchingTerm]
|
|
426
|
-
:raises
|
|
432
|
+
:raises EsgvocNotFoundError: If the `project_id` is not found
|
|
427
433
|
"""
|
|
428
434
|
with get_universe_session() as universe_session, \
|
|
429
435
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -459,246 +465,14 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
459
465
|
return result
|
|
460
466
|
|
|
461
467
|
|
|
462
|
-
def _find_terms_in_collection(collection_id: str,
|
|
463
|
-
term_id: str,
|
|
464
|
-
session: Session,
|
|
465
|
-
settings: SearchSettings|None = None) -> Sequence[PTerm]:
|
|
466
|
-
# Settings only apply on the term_id comparison.
|
|
467
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
468
|
-
value=term_id,
|
|
469
|
-
settings=settings)
|
|
470
|
-
statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
|
|
471
|
-
where_expression)
|
|
472
|
-
results = session.exec(statement)
|
|
473
|
-
result = results.all()
|
|
474
|
-
return result
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
def find_terms_in_collection(project_id:str,
|
|
478
|
-
collection_id: str,
|
|
479
|
-
term_id: str,
|
|
480
|
-
settings: SearchSettings|None = None) \
|
|
481
|
-
-> list[DataDescriptor]:
|
|
482
|
-
"""
|
|
483
|
-
Finds one or more terms, based on the specified search settings, in the given collection of a project.
|
|
484
|
-
This function performs an exact match on the `project_id` and `collection_id`,
|
|
485
|
-
and does **not** search for similar or related projects and collections.
|
|
486
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
487
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
488
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
489
|
-
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
490
|
-
the function returns an empty list.
|
|
491
|
-
|
|
492
|
-
Behavior based on search type:
|
|
493
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance in the list.
|
|
494
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
495
|
-
term instances in the list.
|
|
496
|
-
|
|
497
|
-
:param project_id: A project id
|
|
498
|
-
:type project_id: str
|
|
499
|
-
:param collection_id: A collection
|
|
500
|
-
:type collection_id: str
|
|
501
|
-
:param term_id: A term id to be found
|
|
502
|
-
:type term_id: str
|
|
503
|
-
:param settings: The search settings
|
|
504
|
-
:type settings: SearchSettings|None
|
|
505
|
-
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
506
|
-
:rtype: list[DataDescriptor]
|
|
507
|
-
"""
|
|
508
|
-
result: list[DataDescriptor] = list()
|
|
509
|
-
if connection:=_get_project_connection(project_id):
|
|
510
|
-
with connection.create_session() as session:
|
|
511
|
-
terms = _find_terms_in_collection(collection_id, term_id, session, settings)
|
|
512
|
-
instantiate_pydantic_terms(terms, result,
|
|
513
|
-
settings.selected_term_fields if settings else None)
|
|
514
|
-
return result
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
|
|
518
|
-
term_id: str,
|
|
519
|
-
session: Session,
|
|
520
|
-
settings: SearchSettings|None = None) \
|
|
521
|
-
-> Sequence[PTerm]:
|
|
522
|
-
# Settings only apply on the term_id comparison.
|
|
523
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
524
|
-
value=term_id,
|
|
525
|
-
settings=settings)
|
|
526
|
-
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
|
|
527
|
-
where_expression)
|
|
528
|
-
results = session.exec(statement)
|
|
529
|
-
result = results.all()
|
|
530
|
-
return result
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
def find_terms_from_data_descriptor_in_project(project_id: str,
|
|
534
|
-
data_descriptor_id: str,
|
|
535
|
-
term_id: str,
|
|
536
|
-
settings: SearchSettings|None = None) \
|
|
537
|
-
-> list[tuple[DataDescriptor, str]]:
|
|
538
|
-
"""
|
|
539
|
-
Finds one or more terms in the given project which are instances of the given data descriptor
|
|
540
|
-
in the universe, based on the specified search settings, in the given collection of a project.
|
|
541
|
-
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
542
|
-
and does **not** search for similar or related projects and data descriptors.
|
|
543
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
544
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
545
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
546
|
-
If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
|
|
547
|
-
the function returns an empty list.
|
|
548
|
-
|
|
549
|
-
Behavior based on search type:
|
|
550
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
551
|
-
collection id in the list.
|
|
552
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
553
|
-
term instances and collection ids in the list.
|
|
554
|
-
|
|
555
|
-
:param project_id: A project id
|
|
556
|
-
:type project_id: str
|
|
557
|
-
:param data_descriptor_id: A data descriptor
|
|
558
|
-
:type data_descriptor_id: str
|
|
559
|
-
:param term_id: A term id to be found
|
|
560
|
-
:type term_id: str
|
|
561
|
-
:param settings: The search settings
|
|
562
|
-
:type settings: SearchSettings|None
|
|
563
|
-
:returns: A list of tuple of term instances and related collection ids. \
|
|
564
|
-
Returns an empty list if no matches are found.
|
|
565
|
-
:rtype: list[tuple[DataDescriptor, str]]
|
|
566
|
-
"""
|
|
567
|
-
result = list()
|
|
568
|
-
if connection:=_get_project_connection(project_id):
|
|
569
|
-
with connection.create_session() as session:
|
|
570
|
-
terms = _find_terms_from_data_descriptor_in_project(data_descriptor_id,
|
|
571
|
-
term_id,
|
|
572
|
-
session,
|
|
573
|
-
settings)
|
|
574
|
-
for pterm in terms:
|
|
575
|
-
collection_id = pterm.collection.id
|
|
576
|
-
term = instantiate_pydantic_term(pterm,
|
|
577
|
-
settings.selected_term_fields if settings else None)
|
|
578
|
-
result.append((term, collection_id))
|
|
579
|
-
return result
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
|
|
583
|
-
term_id: str,
|
|
584
|
-
settings: SearchSettings|None = None) \
|
|
585
|
-
-> list[tuple[list[tuple[DataDescriptor, str]], str]]:
|
|
586
|
-
"""
|
|
587
|
-
Finds one or more terms in all projects which are instances of the given data descriptor
|
|
588
|
-
in the universe, based on the specified search settings, in the given collection of a project.
|
|
589
|
-
This function performs an exact match on the `data_descriptor_id`,
|
|
590
|
-
and does **not** search for similar or related data descriptors.
|
|
591
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
592
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
593
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
594
|
-
If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
|
|
595
|
-
the function returns an empty list.
|
|
596
|
-
|
|
597
|
-
Behavior based on search type:
|
|
598
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
599
|
-
collection id in the list.
|
|
600
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
601
|
-
term instances and collection ids in the list.
|
|
602
|
-
|
|
603
|
-
:param data_descriptor_id: A data descriptor
|
|
604
|
-
:type data_descriptor_id: str
|
|
605
|
-
:param term_id: A term id to be found
|
|
606
|
-
:type term_id: str
|
|
607
|
-
:param settings: The search settings
|
|
608
|
-
:type settings: SearchSettings|None
|
|
609
|
-
:returns: A list of tuple of matching terms with their collection id, per project. \
|
|
610
|
-
Returns an empty list if no matches are found.
|
|
611
|
-
:rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
|
|
612
|
-
"""
|
|
613
|
-
project_ids = get_all_projects()
|
|
614
|
-
result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
|
|
615
|
-
for project_id in project_ids:
|
|
616
|
-
matching_terms = find_terms_from_data_descriptor_in_project(project_id,
|
|
617
|
-
data_descriptor_id,
|
|
618
|
-
term_id,
|
|
619
|
-
settings)
|
|
620
|
-
if matching_terms:
|
|
621
|
-
result.append((matching_terms, project_id))
|
|
622
|
-
return result
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
def _find_terms_in_project(term_id: str,
|
|
626
|
-
session: Session,
|
|
627
|
-
settings: SearchSettings|None) -> Sequence[PTerm]:
|
|
628
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
629
|
-
value=term_id,
|
|
630
|
-
settings=settings)
|
|
631
|
-
statement = select(PTerm).where(where_expression)
|
|
632
|
-
results = session.exec(statement).all()
|
|
633
|
-
return results
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
def find_terms_in_all_projects(term_id: str,
|
|
637
|
-
settings: SearchSettings|None = None) \
|
|
638
|
-
-> list[DataDescriptor]:
|
|
639
|
-
"""
|
|
640
|
-
Finds one or more terms, based on the specified search settings, in all projects.
|
|
641
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
642
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
643
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
644
|
-
Terms are unique within a collection but may have some synonyms within a project.
|
|
645
|
-
If the provided `term_id` is not found, the function returns an empty list.
|
|
646
|
-
|
|
647
|
-
:param term_id: A term id to be found
|
|
648
|
-
:type term_id: str
|
|
649
|
-
:param settings: The search settings
|
|
650
|
-
:type settings: SearchSettings|None
|
|
651
|
-
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
652
|
-
:rtype: list[DataDescriptor]
|
|
653
|
-
"""
|
|
654
|
-
project_ids = get_all_projects()
|
|
655
|
-
result = list()
|
|
656
|
-
for project_id in project_ids:
|
|
657
|
-
result.extend(find_terms_in_project(project_id, term_id, settings))
|
|
658
|
-
return result
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
def find_terms_in_project(project_id: str,
|
|
662
|
-
term_id: str,
|
|
663
|
-
settings: SearchSettings|None = None) \
|
|
664
|
-
-> list[DataDescriptor]:
|
|
665
|
-
"""
|
|
666
|
-
Finds one or more terms, based on the specified search settings, in a project.
|
|
667
|
-
This function performs an exact match on the `project_id` and
|
|
668
|
-
does **not** search for similar or related projects.
|
|
669
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
670
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
671
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
672
|
-
Terms are unique within a collection but may have some synonyms within a project.
|
|
673
|
-
If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
|
|
674
|
-
an empty list.
|
|
675
|
-
|
|
676
|
-
:param project_id: A project id
|
|
677
|
-
:type project_id: str
|
|
678
|
-
:param term_id: A term id to be found
|
|
679
|
-
:type term_id: str
|
|
680
|
-
:param settings: The search settings
|
|
681
|
-
:type settings: SearchSettings|None
|
|
682
|
-
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
683
|
-
:rtype: list[DataDescriptor]
|
|
684
|
-
"""
|
|
685
|
-
result: list[DataDescriptor] = list()
|
|
686
|
-
if connection:=_get_project_connection(project_id):
|
|
687
|
-
with connection.create_session() as session:
|
|
688
|
-
terms = _find_terms_in_project(term_id, session, settings)
|
|
689
|
-
instantiate_pydantic_terms(terms, result,
|
|
690
|
-
settings.selected_term_fields if settings else None)
|
|
691
|
-
return result
|
|
692
|
-
|
|
693
|
-
|
|
694
468
|
def get_all_terms_in_collection(project_id: str,
|
|
695
469
|
collection_id: str,
|
|
696
|
-
selected_term_fields: Iterable[str]|None = None)\
|
|
470
|
+
selected_term_fields: Iterable[str] | None = None)\
|
|
697
471
|
-> list[DataDescriptor]:
|
|
698
472
|
"""
|
|
699
473
|
Gets all terms of the given collection of a project.
|
|
700
474
|
This function performs an exact match on the `project_id` and `collection_id`,
|
|
701
|
-
and does
|
|
475
|
+
and does not search for similar or related projects and collections.
|
|
702
476
|
If any of the provided ids (`project_id` or `collection_id`) is not found, the function
|
|
703
477
|
returns an empty list.
|
|
704
478
|
|
|
@@ -707,87 +481,31 @@ def get_all_terms_in_collection(project_id: str,
|
|
|
707
481
|
:param collection_id: A collection id
|
|
708
482
|
:type collection_id: str
|
|
709
483
|
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
710
|
-
fields of the terms are returned.
|
|
711
|
-
:type selected_term_fields: Iterable[str]|None
|
|
484
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
485
|
+
:type selected_term_fields: Iterable[str] | None
|
|
712
486
|
:returns: a list of term instances. Returns an empty list if no matches are found.
|
|
713
487
|
:rtype: list[DataDescriptor]
|
|
714
488
|
"""
|
|
715
489
|
result = list()
|
|
716
|
-
if connection:=_get_project_connection(project_id):
|
|
490
|
+
if connection := _get_project_connection(project_id):
|
|
717
491
|
with connection.create_session() as session:
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
None)
|
|
721
|
-
if collections:
|
|
722
|
-
collection = collections[0]
|
|
492
|
+
collection = _get_collection_in_project(collection_id, session)
|
|
493
|
+
if collection:
|
|
723
494
|
result = _get_all_terms_in_collection(collection, selected_term_fields)
|
|
724
495
|
return result
|
|
725
496
|
|
|
726
497
|
|
|
727
|
-
def _find_collections_in_project(collection_id: str,
|
|
728
|
-
session: Session,
|
|
729
|
-
settings: SearchSettings|None) \
|
|
730
|
-
-> Sequence[Collection]:
|
|
731
|
-
where_exp = _create_str_comparison_expression(field=Collection.id,
|
|
732
|
-
value=collection_id,
|
|
733
|
-
settings=settings)
|
|
734
|
-
statement = select(Collection).where(where_exp)
|
|
735
|
-
results = session.exec(statement)
|
|
736
|
-
result = results.all()
|
|
737
|
-
return result
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
def find_collections_in_project(project_id: str,
|
|
741
|
-
collection_id: str,
|
|
742
|
-
settings: SearchSettings|None = None) \
|
|
743
|
-
-> list[dict]:
|
|
744
|
-
"""
|
|
745
|
-
Finds one or more collections of the given project.
|
|
746
|
-
This function performs an exact match on the `project_id` and
|
|
747
|
-
does **not** search for similar or related projects.
|
|
748
|
-
The given `collection_id` is searched according to the search type specified in
|
|
749
|
-
the parameter `settings`,
|
|
750
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
751
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
|
|
752
|
-
If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
|
|
753
|
-
an empty list.
|
|
754
|
-
|
|
755
|
-
Behavior based on search type:
|
|
756
|
-
- `EXACT` and absence of `settings`: returns zero or one collection context in the list.
|
|
757
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
758
|
-
collection contexts in the list.
|
|
759
|
-
|
|
760
|
-
:param project_id: A project id
|
|
761
|
-
:type project_id: str
|
|
762
|
-
:param collection_id: A collection id to be found
|
|
763
|
-
:type collection_id: str
|
|
764
|
-
:param settings: The search settings
|
|
765
|
-
:type settings: SearchSettings|None
|
|
766
|
-
:returns: A list of collection contexts. Returns an empty list if no matches are found.
|
|
767
|
-
:rtype: list[dict]
|
|
768
|
-
"""
|
|
769
|
-
result = list()
|
|
770
|
-
if connection:=_get_project_connection(project_id):
|
|
771
|
-
with connection.create_session() as session:
|
|
772
|
-
collections = _find_collections_in_project(collection_id,
|
|
773
|
-
session,
|
|
774
|
-
settings)
|
|
775
|
-
for collection in collections:
|
|
776
|
-
result.append(collection.context)
|
|
777
|
-
return result
|
|
778
|
-
|
|
779
|
-
|
|
780
498
|
def _get_all_collections_in_project(session: Session) -> list[Collection]:
|
|
781
499
|
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
782
500
|
# Project can't be missing if session exists.
|
|
783
|
-
return project.collections
|
|
501
|
+
return project.collections # type: ignore
|
|
784
502
|
|
|
785
503
|
|
|
786
504
|
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
787
505
|
"""
|
|
788
506
|
Gets all collections of the given project.
|
|
789
507
|
This function performs an exact match on the `project_id` and
|
|
790
|
-
does
|
|
508
|
+
does not search for similar or related projects.
|
|
791
509
|
If the provided `project_id` is not found, the function returns an empty list.
|
|
792
510
|
|
|
793
511
|
:param project_id: A project id
|
|
@@ -796,7 +514,7 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
|
796
514
|
:rtype: list[str]
|
|
797
515
|
"""
|
|
798
516
|
result = list()
|
|
799
|
-
if connection:=_get_project_connection(project_id):
|
|
517
|
+
if connection := _get_project_connection(project_id):
|
|
800
518
|
with connection.create_session() as session:
|
|
801
519
|
collections = _get_all_collections_in_project(session)
|
|
802
520
|
for collection in collections:
|
|
@@ -805,31 +523,31 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
|
805
523
|
|
|
806
524
|
|
|
807
525
|
def _get_all_terms_in_collection(collection: Collection,
|
|
808
|
-
selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
|
|
526
|
+
selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
|
|
809
527
|
result: list[DataDescriptor] = list()
|
|
810
528
|
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
811
529
|
return result
|
|
812
530
|
|
|
813
531
|
|
|
814
532
|
def get_all_terms_in_project(project_id: str,
|
|
815
|
-
selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
|
|
533
|
+
selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
|
|
816
534
|
"""
|
|
817
535
|
Gets all terms of the given project.
|
|
818
536
|
This function performs an exact match on the `project_id` and
|
|
819
|
-
does
|
|
537
|
+
does not search for similar or related projects.
|
|
820
538
|
Terms are unique within a collection but may have some synonyms in a project.
|
|
821
539
|
If the provided `project_id` is not found, the function returns an empty list.
|
|
822
540
|
|
|
823
541
|
:param project_id: A project id
|
|
824
542
|
:type project_id: str
|
|
825
543
|
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
826
|
-
fields of the terms are returned.
|
|
827
|
-
:type selected_term_fields: Iterable[str]|None
|
|
544
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
545
|
+
:type selected_term_fields: Iterable[str] | None
|
|
828
546
|
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
829
547
|
:rtype: list[DataDescriptor]
|
|
830
548
|
"""
|
|
831
549
|
result = list()
|
|
832
|
-
if connection:=_get_project_connection(project_id):
|
|
550
|
+
if connection := _get_project_connection(project_id):
|
|
833
551
|
with connection.create_session() as session:
|
|
834
552
|
collections = _get_all_collections_in_project(session)
|
|
835
553
|
for collection in collections:
|
|
@@ -838,14 +556,14 @@ def get_all_terms_in_project(project_id: str,
|
|
|
838
556
|
return result
|
|
839
557
|
|
|
840
558
|
|
|
841
|
-
def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
|
|
559
|
+
def get_all_terms_in_all_projects(selected_term_fields: Iterable[str] | None = None) \
|
|
842
560
|
-> list[tuple[str, list[DataDescriptor]]]:
|
|
843
561
|
"""
|
|
844
562
|
Gets all terms of all projects.
|
|
845
563
|
|
|
846
564
|
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
847
|
-
fields of the terms are returned.
|
|
848
|
-
:type selected_term_fields: Iterable[str]|None
|
|
565
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
566
|
+
:type selected_term_fields: Iterable[str] | None
|
|
849
567
|
:returns: A list of tuple project_id and term instances of that project.
|
|
850
568
|
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
851
569
|
"""
|
|
@@ -857,44 +575,591 @@ def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = Non
|
|
|
857
575
|
return result
|
|
858
576
|
|
|
859
577
|
|
|
860
|
-
def
|
|
578
|
+
def get_all_projects() -> list[str]:
|
|
579
|
+
"""
|
|
580
|
+
Gets all projects.
|
|
581
|
+
|
|
582
|
+
:returns: A list of project ids.
|
|
583
|
+
:rtype: list[str]
|
|
584
|
+
"""
|
|
585
|
+
return list(service.current_state.projects.keys())
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
|
|
589
|
+
statement = select(PTerm).where(PTerm.id == term_id)
|
|
590
|
+
results = session.exec(statement)
|
|
591
|
+
result = results.first() # Term ids are not supposed to be unique within a project.
|
|
592
|
+
return result
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def get_term_in_project(project_id: str, term_id: str,
|
|
596
|
+
selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
|
|
597
|
+
"""
|
|
598
|
+
Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
|
|
599
|
+
the given term id.
|
|
600
|
+
Terms are unique within a collection but may have some synonyms in a project.
|
|
601
|
+
This function performs an exact match on the `project_id` and `term_id`, and does not search
|
|
602
|
+
for similar or related projects and terms.
|
|
603
|
+
If any of the provided ids (`project_id` or `term_id`) is not found,
|
|
604
|
+
the function returns `None`.
|
|
605
|
+
|
|
606
|
+
:param project_id: The id of the given project.
|
|
607
|
+
:type project_id: str
|
|
608
|
+
:param term_id: The id of a term to be found.
|
|
609
|
+
:type term_id: str
|
|
610
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
611
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
612
|
+
:type selected_term_fields: Iterable[str] | None
|
|
613
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
614
|
+
:rtype: DataDescriptor | None
|
|
615
|
+
"""
|
|
616
|
+
result: DataDescriptor | None = None
|
|
617
|
+
if connection := _get_project_connection(project_id):
|
|
618
|
+
with connection.create_session() as session:
|
|
619
|
+
term_found = _get_term_in_project(term_id, session)
|
|
620
|
+
if term_found:
|
|
621
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
622
|
+
return result
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
|
|
626
|
+
statement = select(PTerm).join(Collection).where(Collection.id == collection_id,
|
|
627
|
+
PTerm.id == term_id)
|
|
628
|
+
results = session.exec(statement)
|
|
629
|
+
result = results.one_or_none()
|
|
630
|
+
return result
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
|
|
634
|
+
selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
|
|
635
|
+
"""
|
|
636
|
+
Returns the term, in the given project and collection,
|
|
637
|
+
whose id corresponds exactly to the given term id.
|
|
638
|
+
This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
|
|
639
|
+
and does not search for similar or related projects, collections and terms.
|
|
640
|
+
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
641
|
+
the function returns `None`.
|
|
642
|
+
|
|
643
|
+
:param project_id: The id of the given project.
|
|
644
|
+
:type project_id: str
|
|
645
|
+
:param collection_id: The id of the given collection.
|
|
646
|
+
:type collection_id: str
|
|
647
|
+
:param term_id: The id of a term to be found.
|
|
648
|
+
:type term_id: str
|
|
649
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
650
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
651
|
+
:type selected_term_fields: Iterable[str] | None
|
|
652
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
653
|
+
:rtype: DataDescriptor | None
|
|
654
|
+
"""
|
|
655
|
+
result: DataDescriptor | None = None
|
|
656
|
+
if connection := _get_project_connection(project_id):
|
|
657
|
+
with connection.create_session() as session:
|
|
658
|
+
term_found = _get_term_in_collection(collection_id, term_id, session)
|
|
659
|
+
if term_found:
|
|
660
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
661
|
+
return result
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def _get_collection_in_project(collection_id: str, session: Session) -> Collection | None:
|
|
665
|
+
statement = select(Collection).where(Collection.id == collection_id)
|
|
666
|
+
results = session.exec(statement)
|
|
667
|
+
result = results.one_or_none()
|
|
668
|
+
return result
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
|
|
861
672
|
"""
|
|
862
|
-
|
|
673
|
+
Returns the collection, in the given project, whose id corresponds exactly to
|
|
674
|
+
the given collection id.
|
|
675
|
+
This function performs an exact match on the `project_id` and `collection_id`, and does not search
|
|
676
|
+
for similar or related projects and collections.
|
|
677
|
+
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
678
|
+
the function returns `None`.
|
|
679
|
+
|
|
680
|
+
:param project_id: The id of the given project.
|
|
681
|
+
:type project_id: str
|
|
682
|
+
:param collection_id: The id of a collection to be found.
|
|
683
|
+
:type collection_id: str
|
|
684
|
+
:returns: A collection id and context. Returns `None` if no match is found.
|
|
685
|
+
:rtype: tuple[str, dict] | None
|
|
686
|
+
"""
|
|
687
|
+
result: tuple[str, dict] | None = None
|
|
688
|
+
if connection := _get_project_connection(project_id):
|
|
689
|
+
with connection.create_session() as session:
|
|
690
|
+
collection_found = _get_collection_in_project(collection_id, session)
|
|
691
|
+
if collection_found:
|
|
692
|
+
result = collection_found.id, collection_found.context
|
|
693
|
+
return result
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def get_project(project_id: str) -> ProjectSpecs | None:
|
|
697
|
+
"""
|
|
698
|
+
Get a project and returns its specifications.
|
|
863
699
|
This function performs an exact match on the `project_id` and
|
|
864
|
-
does
|
|
700
|
+
does not search for similar or related projects.
|
|
865
701
|
If the provided `project_id` is not found, the function returns `None`.
|
|
866
702
|
|
|
867
703
|
:param project_id: A project id to be found
|
|
868
704
|
:type project_id: str
|
|
869
705
|
:returns: The specs of the project found. Returns `None` if no matches are found.
|
|
870
|
-
:rtype: ProjectSpecs|None
|
|
706
|
+
:rtype: ProjectSpecs | None
|
|
871
707
|
"""
|
|
872
|
-
result: ProjectSpecs|None = None
|
|
873
|
-
if connection:=_get_project_connection(project_id):
|
|
708
|
+
result: ProjectSpecs | None = None
|
|
709
|
+
if connection := _get_project_connection(project_id):
|
|
874
710
|
with connection.create_session() as session:
|
|
875
711
|
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
876
712
|
try:
|
|
877
713
|
# Project can't be missing if session exists.
|
|
878
|
-
result = ProjectSpecs(**project.specs)
|
|
714
|
+
result = ProjectSpecs(**project.specs) # type: ignore
|
|
879
715
|
except Exception as e:
|
|
880
|
-
msg = f
|
|
881
|
-
raise
|
|
716
|
+
msg = f"unable to read specs in project '{project_id}'"
|
|
717
|
+
raise EsgvocDbError(msg) from e
|
|
882
718
|
return result
|
|
883
719
|
|
|
884
720
|
|
|
885
|
-
def
|
|
721
|
+
def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
|
|
722
|
+
session: Session) -> Collection | None:
|
|
723
|
+
statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
|
|
724
|
+
result = session.exec(statement).one_or_none()
|
|
725
|
+
return result
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def get_collection_from_data_descriptor_in_project(project_id: str,
|
|
729
|
+
data_descriptor_id: str) \
|
|
730
|
+
-> tuple[str, dict] | None:
|
|
886
731
|
"""
|
|
887
|
-
|
|
732
|
+
Returns the collection, in the given project, that corresponds to the given data descriptor
|
|
733
|
+
in the universe.
|
|
734
|
+
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
735
|
+
and does not search for similar or related projects and data descriptors.
|
|
736
|
+
If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
|
|
737
|
+
there is no collection corresponding to the given data descriptor, the function returns `None`.
|
|
888
738
|
|
|
889
|
-
:
|
|
890
|
-
:
|
|
739
|
+
:param project_id: The id of the given project.
|
|
740
|
+
:type project_id: str
|
|
741
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
742
|
+
:type data_descriptor_id: str
|
|
743
|
+
:returns: A collection id and context. Returns `None` if no matches are found.
|
|
744
|
+
:rtype: tuple[str, dict] | None
|
|
891
745
|
"""
|
|
892
|
-
|
|
746
|
+
result: tuple[str, dict] | None = None
|
|
747
|
+
if connection := _get_project_connection(project_id):
|
|
748
|
+
with connection.create_session() as session:
|
|
749
|
+
collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
|
|
750
|
+
session)
|
|
751
|
+
if collection_found:
|
|
752
|
+
result = collection_found.id, collection_found.context
|
|
753
|
+
return result
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) \
|
|
757
|
+
-> list[tuple[str, str, dict]]:
|
|
758
|
+
"""
|
|
759
|
+
Returns the collections, in all projects, that correspond to the given data descriptor
|
|
760
|
+
in the universe.
|
|
761
|
+
This function performs an exact match on `data_descriptor_id`,
|
|
762
|
+
and does not search for similar or related data descriptors.
|
|
763
|
+
If the provided `data_descriptor_id` is not found, or if
|
|
764
|
+
there is no collection corresponding to the given data descriptor, the function returns
|
|
765
|
+
an empty list.
|
|
766
|
+
|
|
767
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
768
|
+
:type data_descriptor_id: str
|
|
769
|
+
:returns: A list of collection ids, their project_ids and contexts. \
|
|
770
|
+
Returns an empty list if no matches are found.
|
|
771
|
+
:rtype: list[tuple[str, str, dict]]
|
|
772
|
+
"""
|
|
773
|
+
result = list()
|
|
774
|
+
project_ids = get_all_projects()
|
|
775
|
+
for project_id in project_ids:
|
|
776
|
+
collection_found = get_collection_from_data_descriptor_in_project(project_id,
|
|
777
|
+
data_descriptor_id)
|
|
778
|
+
if collection_found:
|
|
779
|
+
result.append((project_id, collection_found[0], collection_found[1]))
|
|
780
|
+
return result
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def _get_term_from_universe_term_id_in_project(data_descriptor_id: str,
|
|
784
|
+
universe_term_id: str,
|
|
785
|
+
project_session: Session) -> PTerm | None:
|
|
786
|
+
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id == data_descriptor_id,
|
|
787
|
+
PTerm.id == universe_term_id)
|
|
788
|
+
results = project_session.exec(statement)
|
|
789
|
+
result = results.one_or_none()
|
|
790
|
+
return result
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def get_term_from_universe_term_id_in_project(project_id: str,
|
|
794
|
+
data_descriptor_id: str,
|
|
795
|
+
universe_term_id: str,
|
|
796
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
797
|
+
-> tuple[str, DataDescriptor] | None:
|
|
798
|
+
"""
|
|
799
|
+
Returns the term, in the given project, that corresponds to the given term in the universe.
|
|
800
|
+
This function performs an exact match on the `project_id`, `data_descriptor_id`
|
|
801
|
+
and `universe_term_id`, and does not search for similar or related projects, data descriptors
|
|
802
|
+
and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
|
|
803
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
804
|
+
the function returns `None`.
|
|
805
|
+
|
|
806
|
+
:param project_id: The id of the given project.
|
|
807
|
+
:type project_id: str
|
|
808
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
809
|
+
:type data_descriptor_id: str
|
|
810
|
+
:param universe_term_id: The id of the given universe term.
|
|
811
|
+
:type universe_term_id: str
|
|
812
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
813
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
814
|
+
:type selected_term_fields: Iterable[str] | None
|
|
815
|
+
:returns: A collection id and the project term instance. Returns `None` if no matches are found.
|
|
816
|
+
:rtype: tuple[str, DataDescriptor] | None
|
|
817
|
+
"""
|
|
818
|
+
result: tuple[str, DataDescriptor] | None = None
|
|
819
|
+
if connection := _get_project_connection(project_id):
|
|
820
|
+
with connection.create_session() as session:
|
|
821
|
+
term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
|
|
822
|
+
universe_term_id,
|
|
823
|
+
session)
|
|
824
|
+
if term_found:
|
|
825
|
+
pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
826
|
+
result = (term_found.collection.id, pydantic_term)
|
|
827
|
+
return result
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
|
|
831
|
+
universe_term_id: str,
|
|
832
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
833
|
+
-> list[tuple[str, str, DataDescriptor]]:
|
|
834
|
+
"""
|
|
835
|
+
Returns the terms, in all projects, that correspond to the given term in the universe.
|
|
836
|
+
This function performs an exact match on the `data_descriptor_id`
|
|
837
|
+
and `universe_term_id`, and does not search for similar or related data descriptors
|
|
838
|
+
and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
|
|
839
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
840
|
+
the function returns an empty list.
|
|
841
|
+
|
|
842
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
843
|
+
:type data_descriptor_id: str
|
|
844
|
+
:param universe_term_id: The id of the given universe term.
|
|
845
|
+
:type universe_term_id: str
|
|
846
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
847
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
848
|
+
:type selected_term_fields: Iterable[str] | None
|
|
849
|
+
:returns: A project_id, collection id and the project term instance. \
|
|
850
|
+
Returns an empty list if no matches are found.
|
|
851
|
+
:rtype: list[tuple[str, str, DataDescriptor]]
|
|
852
|
+
"""
|
|
853
|
+
result: list[tuple[str, str, DataDescriptor]] = list()
|
|
854
|
+
project_ids = get_all_projects()
|
|
855
|
+
for project_id in project_ids:
|
|
856
|
+
term_found = get_term_from_universe_term_id_in_project(project_id,
|
|
857
|
+
data_descriptor_id,
|
|
858
|
+
universe_term_id,
|
|
859
|
+
selected_term_fields)
|
|
860
|
+
if term_found:
|
|
861
|
+
result.append((project_id, term_found[0], term_found[1]))
|
|
862
|
+
return result
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _find_collections_in_project(expression: str,
|
|
866
|
+
session: Session,
|
|
867
|
+
only_id: bool = False,
|
|
868
|
+
limit: int | None = None,
|
|
869
|
+
offset: int | None = None) -> Sequence[Collection]:
|
|
870
|
+
matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
|
|
871
|
+
tmp_statement = select(PCollectionFTS5).where(matching_condition)
|
|
872
|
+
statement = select(Collection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
873
|
+
return execute_match_statement(expression, statement, session)
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def find_collections_in_project(expression: str, project_id: str,
|
|
877
|
+
only_id: bool = False,
|
|
878
|
+
limit: int | None = None,
|
|
879
|
+
offset: int | None = None) -> list[tuple[str, dict]]:
|
|
880
|
+
"""
|
|
881
|
+
Find collections in the given project based on a full text search defined by the given `expression`.
|
|
882
|
+
The `expression` comes from the powerful
|
|
883
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
884
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
885
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
886
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
887
|
+
with the wildcard `*`.
|
|
888
|
+
The function returns a list of collection ids and contexts, sorted according to the
|
|
889
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
890
|
+
This function performs an exact match on the `project_id`,
|
|
891
|
+
and does not search for similar or related projects.
|
|
892
|
+
If the provided `expression` does not hit any collection or the given `project_id` does not
|
|
893
|
+
match exactly to an id of a project, the function returns an empty list.
|
|
894
|
+
The function searches for the `expression` in the collection specifications.
|
|
895
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
896
|
+
collections. **At the moment, `only_id` is set to `True` as the collections
|
|
897
|
+
haven't got any description.**
|
|
898
|
+
|
|
899
|
+
:param expression: The full text search expression.
|
|
900
|
+
:type expression: str
|
|
901
|
+
:param project_id: The id of the given project.
|
|
902
|
+
:type project_id: str
|
|
903
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
904
|
+
:type only_id: bool
|
|
905
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
906
|
+
`limit` is either `None`, zero or negative.
|
|
907
|
+
:type limit: int | None
|
|
908
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
909
|
+
either `None`, zero or negative.
|
|
910
|
+
:type offset: int | None
|
|
911
|
+
:returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
|
|
912
|
+
:rtype: list[tuple[str, dict]]
|
|
913
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
914
|
+
"""
|
|
915
|
+
result: list[tuple[str, dict]] = list()
|
|
916
|
+
if connection := _get_project_connection(project_id):
|
|
917
|
+
with connection.create_session() as session:
|
|
918
|
+
collections_found = _find_collections_in_project(expression, session, only_id,
|
|
919
|
+
limit, offset)
|
|
920
|
+
for collection in collections_found:
|
|
921
|
+
result.append((collection.id, collection.context))
|
|
922
|
+
return result
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def _find_terms_in_collection(expression: str,
|
|
926
|
+
collection_id: str,
|
|
927
|
+
session: Session,
|
|
928
|
+
only_id: bool = False,
|
|
929
|
+
limit: int | None = None,
|
|
930
|
+
offset: int | None = None) -> Sequence[PTerm]:
|
|
931
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
932
|
+
where_condition = Collection.id == collection_id, matching_condition
|
|
933
|
+
tmp_statement = select(PTermFTS5).join(Collection).where(*where_condition)
|
|
934
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
935
|
+
return execute_match_statement(expression, statement, session)
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def _find_terms_in_project(expression: str,
|
|
939
|
+
session: Session,
|
|
940
|
+
only_id: bool = False,
|
|
941
|
+
limit: int | None = None,
|
|
942
|
+
offset: int | None = None) -> Sequence[PTerm]:
|
|
943
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
944
|
+
tmp_statement = select(PTermFTS5).where(matching_condition)
|
|
945
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
946
|
+
return execute_match_statement(expression, statement, session)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
def find_terms_in_collection(expression: str, project_id: str,
|
|
950
|
+
collection_id: str,
|
|
951
|
+
only_id: bool = False,
|
|
952
|
+
limit: int | None = None,
|
|
953
|
+
offset: int | None = None,
|
|
954
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
955
|
+
-> list[DataDescriptor]:
|
|
956
|
+
"""
|
|
957
|
+
Find terms in the given project and collection based on a full text search defined by the given
|
|
958
|
+
`expression`. The `expression` comes from the powerful
|
|
959
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
960
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
961
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
962
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
963
|
+
with the wildcard `*`.
|
|
964
|
+
The function returns a list of term instances, sorted according to the
|
|
965
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
966
|
+
This function performs an exact match on the `project_id` and `collection_id`,
|
|
967
|
+
and does not search for similar or related projects and collections.
|
|
968
|
+
If the provided `expression` does not hit any term or if any of the provided ids
|
|
969
|
+
(`project_id` or `collection_id`) is not found, the function returns an empty list.
|
|
970
|
+
The function searches for the `expression` in the term specifications.
|
|
971
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
972
|
+
terms.
|
|
973
|
+
|
|
974
|
+
:param expression: The full text search expression.
|
|
975
|
+
:type expression: str
|
|
976
|
+
:param project_id: The id of the given project.
|
|
977
|
+
:type project_id: str
|
|
978
|
+
:param collection_id: The id of the given collection.
|
|
979
|
+
:type collection_id: str
|
|
980
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
981
|
+
:type only_id: bool
|
|
982
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
983
|
+
`limit` is either `None`, zero or negative.
|
|
984
|
+
:type limit: int | None
|
|
985
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
986
|
+
either `None`, zero or negative.
|
|
987
|
+
:type offset: int | None
|
|
988
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
989
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
990
|
+
:type selected_term_fields: Iterable[str] | None
|
|
991
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
992
|
+
:rtype: list[DataDescriptor]
|
|
993
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
994
|
+
"""
|
|
995
|
+
result: list[DataDescriptor] = list()
|
|
996
|
+
if connection := _get_project_connection(project_id):
|
|
997
|
+
with connection.create_session() as session:
|
|
998
|
+
pterms_found = _find_terms_in_collection(expression, collection_id, session,
|
|
999
|
+
only_id, limit, offset)
|
|
1000
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1001
|
+
return result
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def find_terms_in_project(expression: str,
|
|
1005
|
+
project_id: str,
|
|
1006
|
+
only_id: bool = False,
|
|
1007
|
+
limit: int | None = None,
|
|
1008
|
+
offset: int | None = None,
|
|
1009
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
1010
|
+
-> list[DataDescriptor]:
|
|
1011
|
+
"""
|
|
1012
|
+
Find terms in the given project on a full text search defined by the given
|
|
1013
|
+
`expression`. The `expression` comes from the powerful
|
|
1014
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1015
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1016
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1017
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1018
|
+
with the wildcard `*`.
|
|
1019
|
+
The function returns a list of term instances, sorted according to the
|
|
1020
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1021
|
+
This function performs an exact match on the `project_id`,
|
|
1022
|
+
and does not search for similar or related projects.
|
|
1023
|
+
If the provided `expression` does not hit any term or if any of the provided `project_id` is
|
|
1024
|
+
not found, the function returns an empty list.
|
|
1025
|
+
The function searches for the `expression` in the term specifications.
|
|
1026
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1027
|
+
terms.
|
|
1028
|
+
|
|
1029
|
+
:param expression: The full text search expression.
|
|
1030
|
+
:type expression: str
|
|
1031
|
+
:param project_id: The id of the given project.
|
|
1032
|
+
:type project_id: str
|
|
1033
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1034
|
+
:type only_id: bool
|
|
1035
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1036
|
+
`limit` is either `None`, zero or negative.
|
|
1037
|
+
:type limit: int | None
|
|
1038
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1039
|
+
either `None`, zero or negative.
|
|
1040
|
+
:type offset: int | None
|
|
1041
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
1042
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1043
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1044
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
1045
|
+
:rtype: list[DataDescriptor]
|
|
1046
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1047
|
+
"""
|
|
1048
|
+
result: list[DataDescriptor] = list()
|
|
1049
|
+
if connection := _get_project_connection(project_id):
|
|
1050
|
+
with connection.create_session() as session:
|
|
1051
|
+
pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
|
|
1052
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1053
|
+
return result
|
|
893
1054
|
|
|
894
1055
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1056
|
+
def find_terms_in_all_projects(expression: str,
|
|
1057
|
+
only_id: bool = False,
|
|
1058
|
+
limit: int | None = None,
|
|
1059
|
+
offset: int | None = None,
|
|
1060
|
+
selected_term_fields: Iterable[str] | None = None) \
|
|
1061
|
+
-> list[tuple[str, list[DataDescriptor]]]:
|
|
1062
|
+
"""
|
|
1063
|
+
Find terms in the all projects on a full text search defined by the given
|
|
1064
|
+
`expression`. The `expression` comes from the powerful
|
|
1065
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1066
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1067
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1068
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1069
|
+
with the wildcard `*`.
|
|
1070
|
+
The function returns a list of project ids and term instances, sorted according to the
|
|
1071
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1072
|
+
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
1073
|
+
The function searches for the `expression` in the term specifications.
|
|
1074
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1075
|
+
terms.
|
|
1076
|
+
|
|
1077
|
+
:param expression: The full text search expression.
|
|
1078
|
+
:type expression: str
|
|
1079
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1080
|
+
:type only_id: bool
|
|
1081
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1082
|
+
`limit` is either `None`, zero or negative.
|
|
1083
|
+
:type limit: int | None
|
|
1084
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1085
|
+
either `None`, zero or negative.
|
|
1086
|
+
:type offset: int | None
|
|
1087
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
1088
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1089
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1090
|
+
:returns: A list of project ids and term instances. Returns an empty list if no matches are found.
|
|
1091
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
1092
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1093
|
+
"""
|
|
1094
|
+
result: list[tuple[str, list[DataDescriptor]]] = list()
|
|
1095
|
+
project_ids = get_all_projects()
|
|
1096
|
+
for project_id in project_ids:
|
|
1097
|
+
terms_found = find_terms_in_project(expression, project_id, only_id,
|
|
1098
|
+
limit, offset, selected_term_fields)
|
|
1099
|
+
if terms_found:
|
|
1100
|
+
result.append((project_id, terms_found))
|
|
1101
|
+
return result
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
def find_items_in_project(expression: str,
|
|
1105
|
+
project_id: str,
|
|
1106
|
+
only_id: bool = False,
|
|
1107
|
+
limit: int | None = None,
|
|
1108
|
+
offset: int | None = None) -> list[Item]:
|
|
1109
|
+
"""
|
|
1110
|
+
Find items, at the moment terms and collections, in the given project based on a full-text
|
|
1111
|
+
search defined by the given `expression`. The `expression` comes from the powerful
|
|
1112
|
+
`SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
|
|
1113
|
+
and corresponds to the expression of the `MATCH` operator.
|
|
1114
|
+
It can be composed of one or multiple keywords combined with boolean
|
|
1115
|
+
operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
|
|
1116
|
+
with the wildcard `*`.
|
|
1117
|
+
The function returns a list of item instances sorted according to the
|
|
1118
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1119
|
+
This function performs an exact match on the `project_id`,
|
|
1120
|
+
and does not search for similar or related projects.
|
|
1121
|
+
If the provided `expression` does not hit any item, or the provided `project_id` is not found,
|
|
1122
|
+
the function returns an empty list.
|
|
1123
|
+
The function searches for the `expression` in the term and collection specifications.
|
|
1124
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1125
|
+
terms and collections. **At the moment, `only_id` is set to `True` for the collections because
|
|
1126
|
+
they haven't got any description.**
|
|
1127
|
+
|
|
1128
|
+
:param expression: The full text search expression.
|
|
1129
|
+
:type expression: str
|
|
1130
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1131
|
+
:type only_id: bool
|
|
1132
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1133
|
+
`limit` is either `None`, zero or negative.
|
|
1134
|
+
:type limit: int | None
|
|
1135
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1136
|
+
either `None`, zero or negative.
|
|
1137
|
+
:type offset: int | None
|
|
1138
|
+
:returns: A list of item instances. Returns an empty list if no matches are found.
|
|
1139
|
+
:rtype: list[Item]
|
|
1140
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1141
|
+
"""
|
|
1142
|
+
# TODO: execute union query when it will be possible to compute parent of terms and collections.
|
|
1143
|
+
result = list()
|
|
1144
|
+
if connection := _get_project_connection(project_id):
|
|
1145
|
+
with connection.create_session() as session:
|
|
1146
|
+
if only_id:
|
|
1147
|
+
collection_column = col(PCollectionFTS5.id)
|
|
1148
|
+
term_column = col(PTermFTS5.id)
|
|
1149
|
+
else:
|
|
1150
|
+
collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
|
|
1151
|
+
term_column = col(PTermFTS5.specs) # type: ignore
|
|
1152
|
+
collection_where_condition = collection_column.match(expression)
|
|
1153
|
+
collection_statement = select(PCollectionFTS5.id,
|
|
1154
|
+
text("'collection' AS TYPE"),
|
|
1155
|
+
text(f"'{project_id}' AS TYPE"),
|
|
1156
|
+
text('rank')).where(collection_where_condition)
|
|
1157
|
+
term_where_condition = term_column.match(expression)
|
|
1158
|
+
term_statement = select(PTermFTS5.id,
|
|
1159
|
+
text("'term' AS TYPE"),
|
|
1160
|
+
Collection.id,
|
|
1161
|
+
text('rank')).join(Collection) \
|
|
1162
|
+
.where(term_where_condition)
|
|
1163
|
+
result = execute_find_item_statements(session, expression, collection_statement,
|
|
1164
|
+
term_statement, limit, offset)
|
|
1165
|
+
return result
|