esgvoc 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +30 -30
- esgvoc/api/_utils.py +28 -14
- esgvoc/api/data_descriptors/__init__.py +19 -10
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/project_specs.py +82 -0
- esgvoc/api/projects.py +284 -238
- esgvoc/api/report.py +89 -52
- esgvoc/api/search.py +31 -11
- esgvoc/api/universe.py +57 -48
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/drs/__init__.py +0 -16
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +492 -0
- esgvoc/apps/drs/validator.py +330 -0
- esgvoc/cli/drs.py +248 -0
- esgvoc/cli/get.py +26 -25
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +4 -5
- esgvoc/cli/status.py +14 -2
- esgvoc/cli/valid.py +41 -45
- esgvoc/core/db/models/mixins.py +7 -0
- esgvoc/core/db/models/project.py +3 -8
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +4 -1
- esgvoc/core/db/universe_ingestion.py +8 -7
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +66 -42
- esgvoc-0.3.0.dist-info/METADATA +89 -0
- esgvoc-0.3.0.dist-info/RECORD +78 -0
- esgvoc-0.3.0.dist-info/licenses/LICENSE.txt +519 -0
- esgvoc/apps/drs/models.py +0 -43
- esgvoc/apps/drs/parser.py +0 -27
- esgvoc/cli/config.py +0 -79
- esgvoc/core/service/settings.py +0 -64
- esgvoc/core/service/settings.toml +0 -12
- esgvoc/core/service/settings_default.toml +0 -20
- esgvoc-0.1.2.dist-info/METADATA +0 -54
- esgvoc-0.1.2.dist-info/RECORD +0 -66
- {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
esgvoc/api/projects.py
CHANGED
|
@@ -1,39 +1,51 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Sequence
|
|
3
|
+
|
|
4
|
+
from sqlmodel import Session, and_, select
|
|
3
5
|
|
|
4
6
|
import esgvoc.api.universe as universe
|
|
5
|
-
import esgvoc.core.constants
|
|
7
|
+
import esgvoc.core.constants as constants
|
|
6
8
|
import esgvoc.core.service as service
|
|
7
|
-
from esgvoc.api._utils import (
|
|
9
|
+
from esgvoc.api._utils import (APIException, get_universe_session,
|
|
10
|
+
instantiate_pydantic_term,
|
|
8
11
|
instantiate_pydantic_terms)
|
|
12
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
13
|
+
from esgvoc.api.project_specs import ProjectSpecs
|
|
9
14
|
from esgvoc.api.report import (ProjectTermError, UniverseTermError,
|
|
10
|
-
|
|
11
|
-
from esgvoc.api.search import MatchingTerm, SearchSettings,
|
|
15
|
+
ValidationReport)
|
|
16
|
+
from esgvoc.api.search import (MatchingTerm, SearchSettings,
|
|
17
|
+
_create_str_comparison_expression)
|
|
12
18
|
from esgvoc.core.db.connection import DBConnection
|
|
13
19
|
from esgvoc.core.db.models.mixins import TermKind
|
|
14
20
|
from esgvoc.core.db.models.project import Collection, Project, PTerm
|
|
15
21
|
from esgvoc.core.db.models.universe import UTerm
|
|
16
|
-
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
# [OPTIMIZATION]
|
|
24
|
+
_VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
|
|
25
|
+
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
|
|
18
26
|
|
|
19
27
|
|
|
20
28
|
def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
21
|
-
|
|
29
|
+
if project_id in service.current_state.projects:
|
|
30
|
+
return service.current_state.projects[project_id].db_connection
|
|
31
|
+
else:
|
|
32
|
+
return None
|
|
33
|
+
|
|
22
34
|
|
|
23
35
|
def _get_project_session_with_exception(project_id: str) -> Session:
|
|
24
36
|
if connection:=_get_project_connection(project_id):
|
|
25
37
|
project_session = connection.create_session()
|
|
26
38
|
return project_session
|
|
27
39
|
else:
|
|
28
|
-
raise
|
|
29
|
-
|
|
40
|
+
raise APIException(f'unable to find project {project_id}')
|
|
41
|
+
|
|
30
42
|
|
|
31
|
-
def _resolve_term(
|
|
43
|
+
def _resolve_term(composite_term_part: dict,
|
|
32
44
|
universe_session: Session,
|
|
33
45
|
project_session: Session) -> UTerm|PTerm:
|
|
34
|
-
|
|
35
|
-
term_id =
|
|
36
|
-
term_type =
|
|
46
|
+
# First find the term in the universe than in the current project
|
|
47
|
+
term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
|
|
48
|
+
term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
|
|
37
49
|
uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
|
|
38
50
|
term_id=term_id,
|
|
39
51
|
session=universe_session,
|
|
@@ -49,24 +61,24 @@ def _resolve_term(term_composite_part: dict,
|
|
|
49
61
|
return pterms[0]
|
|
50
62
|
else:
|
|
51
63
|
msg = f'unable to find the term {term_id} in {term_type}'
|
|
52
|
-
raise RuntimeError(msg)
|
|
64
|
+
raise RuntimeError(msg)
|
|
53
65
|
|
|
54
66
|
|
|
55
|
-
def
|
|
56
|
-
separator = term.specs[
|
|
57
|
-
parts = term.specs[
|
|
67
|
+
def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
|
|
68
|
+
separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
|
|
69
|
+
parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
|
|
58
70
|
return separator, parts
|
|
59
71
|
|
|
60
72
|
|
|
61
73
|
# TODO: support optionality of parts of composite.
|
|
62
74
|
# It is backtrack possible for more than one missing parts.
|
|
63
|
-
def
|
|
75
|
+
def _valid_value_composite_term_with_separator(value: str,
|
|
64
76
|
term: UTerm|PTerm,
|
|
65
77
|
universe_session: Session,
|
|
66
78
|
project_session: Session)\
|
|
67
|
-
-> list[
|
|
79
|
+
-> list[UniverseTermError|ProjectTermError]:
|
|
68
80
|
result = list()
|
|
69
|
-
separator, parts =
|
|
81
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
70
82
|
if separator in value:
|
|
71
83
|
splits = value.split(separator)
|
|
72
84
|
if len(splits) == len(parts):
|
|
@@ -92,11 +104,15 @@ def _transform_to_pattern(term: UTerm|PTerm,
|
|
|
92
104
|
project_session: Session) -> str:
|
|
93
105
|
match term.kind:
|
|
94
106
|
case TermKind.PLAIN:
|
|
95
|
-
|
|
107
|
+
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
108
|
+
result = term.specs[constants.DRS_SPECS_JSON_KEY]
|
|
109
|
+
else:
|
|
110
|
+
raise APIException(f"the term {term.id} doesn't have drs name. " +
|
|
111
|
+
"Can't validate it.")
|
|
96
112
|
case TermKind.PATTERN:
|
|
97
|
-
result = term.specs[
|
|
113
|
+
result = term.specs[constants.PATTERN_JSON_KEY]
|
|
98
114
|
case TermKind.COMPOSITE:
|
|
99
|
-
separator, parts =
|
|
115
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
100
116
|
result = ""
|
|
101
117
|
for part in parts:
|
|
102
118
|
resolved_term = _resolve_term(part, universe_session, project_session)
|
|
@@ -104,22 +120,22 @@ def _transform_to_pattern(term: UTerm|PTerm,
|
|
|
104
120
|
result = f'{result}{pattern}{separator}'
|
|
105
121
|
result = result.rstrip(separator)
|
|
106
122
|
case _:
|
|
107
|
-
raise
|
|
123
|
+
raise RuntimeError(f'unsupported term kind {term.kind}')
|
|
108
124
|
return result
|
|
109
125
|
|
|
110
126
|
|
|
111
127
|
# TODO: support optionality of parts of composite.
|
|
112
128
|
# It is backtrack possible for more than one missing parts.
|
|
113
|
-
def
|
|
129
|
+
def _valid_value_composite_term_separator_less(value: str,
|
|
114
130
|
term: UTerm|PTerm,
|
|
115
131
|
universe_session: Session,
|
|
116
132
|
project_session: Session)\
|
|
117
|
-
-> list[
|
|
133
|
+
-> list[UniverseTermError|ProjectTermError]:
|
|
118
134
|
result = list()
|
|
119
135
|
try:
|
|
120
136
|
pattern = _transform_to_pattern(term, universe_session, project_session)
|
|
121
137
|
try:
|
|
122
|
-
#
|
|
138
|
+
# Patterns terms are meant to be validated individually.
|
|
123
139
|
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
124
140
|
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
125
141
|
# The later, must be removed.
|
|
@@ -127,70 +143,74 @@ def _valid_value_term_composite_separator_less(value: str,
|
|
|
127
143
|
pattern = f'^{pattern}$'
|
|
128
144
|
regex = re.compile(pattern)
|
|
129
145
|
except Exception as e:
|
|
130
|
-
msg = f'regex compilation error:\n{e}'
|
|
131
|
-
raise
|
|
146
|
+
msg = f'regex compilation error while processing term {term.id}:\n{e}'
|
|
147
|
+
raise RuntimeError(msg) from e
|
|
132
148
|
match = regex.match(value)
|
|
133
149
|
if match is None:
|
|
134
150
|
result.append(_create_term_error(value, term))
|
|
135
151
|
return result
|
|
136
152
|
except Exception as e:
|
|
137
|
-
msg = f'cannot validate separator less composite term {term.id}:\n{e}'
|
|
153
|
+
msg = f'cannot validate separator less composite term {term.id}:\n{e}'
|
|
138
154
|
raise RuntimeError(msg) from e
|
|
139
155
|
|
|
140
156
|
|
|
141
|
-
def
|
|
157
|
+
def _valid_value_for_composite_term(value: str,
|
|
142
158
|
term: UTerm|PTerm,
|
|
143
159
|
universe_session: Session,
|
|
144
160
|
project_session: Session)\
|
|
145
|
-
-> list[
|
|
161
|
+
-> list[UniverseTermError|ProjectTermError]:
|
|
146
162
|
result = list()
|
|
147
|
-
separator, _ =
|
|
163
|
+
separator, _ = _get_composite_term_separator_parts(term)
|
|
148
164
|
if separator:
|
|
149
|
-
result =
|
|
165
|
+
result = _valid_value_composite_term_with_separator(value, term, universe_session,
|
|
150
166
|
project_session)
|
|
151
167
|
else:
|
|
152
|
-
result =
|
|
168
|
+
result = _valid_value_composite_term_separator_less(value, term, universe_session,
|
|
153
169
|
project_session)
|
|
154
170
|
return result
|
|
155
171
|
|
|
156
172
|
|
|
157
|
-
def _create_term_error(value: str, term: UTerm|PTerm) ->
|
|
173
|
+
def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
|
|
158
174
|
if isinstance(term, UTerm):
|
|
159
|
-
return UniverseTermError(value, term
|
|
175
|
+
return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
176
|
+
data_descriptor_id=term.data_descriptor.id)
|
|
160
177
|
else:
|
|
161
|
-
return ProjectTermError(value, term
|
|
178
|
+
return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
179
|
+
collection_id=term.collection.id)
|
|
162
180
|
|
|
163
181
|
|
|
164
182
|
def _valid_value(value: str,
|
|
165
183
|
term: UTerm|PTerm,
|
|
166
184
|
universe_session: Session,
|
|
167
|
-
project_session: Session) -> list[
|
|
185
|
+
project_session: Session) -> list[UniverseTermError|ProjectTermError]:
|
|
168
186
|
result = list()
|
|
169
187
|
match term.kind:
|
|
170
188
|
case TermKind.PLAIN:
|
|
171
|
-
if
|
|
172
|
-
|
|
189
|
+
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
190
|
+
if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
|
|
191
|
+
result.append(_create_term_error(value, term))
|
|
192
|
+
else:
|
|
193
|
+
raise APIException(f"the term {term.id} doesn't have drs name. " +
|
|
194
|
+
"Can't validate it.")
|
|
173
195
|
case TermKind.PATTERN:
|
|
174
196
|
# OPTIM: Pattern can be compiled and stored for further matching.
|
|
175
|
-
pattern_match = re.match(term.specs[
|
|
197
|
+
pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
|
|
176
198
|
if pattern_match is None:
|
|
177
199
|
result.append(_create_term_error(value, term))
|
|
178
200
|
case TermKind.COMPOSITE:
|
|
179
|
-
result.extend(
|
|
201
|
+
result.extend(_valid_value_for_composite_term(value, term,
|
|
180
202
|
universe_session,
|
|
181
203
|
project_session))
|
|
182
204
|
case _:
|
|
183
|
-
raise
|
|
205
|
+
raise RuntimeError(f'unsupported term kind {term.kind}')
|
|
184
206
|
return result
|
|
185
207
|
|
|
186
208
|
|
|
187
|
-
def
|
|
188
|
-
if not value:
|
|
189
|
-
raise
|
|
190
|
-
if result:= value.strip():
|
|
191
|
-
return result
|
|
209
|
+
def _check_value(value: str) -> str:
|
|
210
|
+
if not value or value.isspace():
|
|
211
|
+
raise APIException('value should be set')
|
|
192
212
|
else:
|
|
193
|
-
|
|
213
|
+
return value
|
|
194
214
|
|
|
195
215
|
|
|
196
216
|
def _search_plain_term_and_valid_value(value: str,
|
|
@@ -198,7 +218,7 @@ def _search_plain_term_and_valid_value(value: str,
|
|
|
198
218
|
project_session: Session) \
|
|
199
219
|
-> str|None:
|
|
200
220
|
where_expression = and_(Collection.id == collection_id,
|
|
201
|
-
PTerm.specs[
|
|
221
|
+
PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
202
222
|
statement = select(PTerm).join(Collection).where(where_expression)
|
|
203
223
|
term = project_session.exec(statement).one_or_none()
|
|
204
224
|
return term.id if term else None
|
|
@@ -223,12 +243,17 @@ def _valid_value_against_all_terms_of_collection(value: str,
|
|
|
223
243
|
|
|
224
244
|
|
|
225
245
|
def _valid_value_against_given_term(value: str,
|
|
246
|
+
project_id: str,
|
|
226
247
|
collection_id: str,
|
|
227
248
|
term_id: str,
|
|
228
249
|
universe_session: Session,
|
|
229
250
|
project_session: Session)\
|
|
230
|
-
-> list[
|
|
231
|
-
|
|
251
|
+
-> list[UniverseTermError|ProjectTermError]:
|
|
252
|
+
# [OPTIMIZATION]
|
|
253
|
+
key = value + project_id + collection_id + term_id
|
|
254
|
+
if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
|
|
255
|
+
result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
|
|
256
|
+
else:
|
|
232
257
|
terms = _find_terms_in_collection(collection_id,
|
|
233
258
|
term_id,
|
|
234
259
|
project_session,
|
|
@@ -237,12 +262,9 @@ def _valid_value_against_given_term(value: str,
|
|
|
237
262
|
term = terms[0]
|
|
238
263
|
result = _valid_value(value, term, universe_session, project_session)
|
|
239
264
|
else:
|
|
240
|
-
raise
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
msg = f'unable to valid term {term_id} ' +\
|
|
244
|
-
f'in collection {collection_id}'
|
|
245
|
-
raise RuntimeError(msg) from e
|
|
265
|
+
raise APIException(f'unable to find term {term_id} ' +
|
|
266
|
+
f'in collection {collection_id}')
|
|
267
|
+
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
|
|
246
268
|
return result
|
|
247
269
|
|
|
248
270
|
|
|
@@ -254,19 +276,19 @@ def valid_term(value: str,
|
|
|
254
276
|
"""
|
|
255
277
|
Check if the given value may or may not represent the given term. The functions returns
|
|
256
278
|
a report that contains the possible errors.
|
|
257
|
-
|
|
279
|
+
|
|
258
280
|
Behavior based on the nature of the term:
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
281
|
+
- plain term: the function try to match the value on the drs_name field.
|
|
282
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
283
|
+
- composite term:
|
|
284
|
+
- if the composite has got a separator, the function splits the value according to the\
|
|
285
|
+
separator of the term then it try to match every part of the composite\
|
|
286
|
+
with every split of the value.
|
|
287
|
+
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
288
|
+
composite so as to compare it as a regex to the value.
|
|
267
289
|
|
|
268
290
|
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
269
|
-
the function raises a
|
|
291
|
+
the function raises a APIException.
|
|
270
292
|
|
|
271
293
|
:param value: A value to be validated
|
|
272
294
|
:type value: str
|
|
@@ -278,14 +300,14 @@ def valid_term(value: str,
|
|
|
278
300
|
:type term_id: str
|
|
279
301
|
:returns: A validation report that contains the possible errors
|
|
280
302
|
:rtype: ValidationReport
|
|
281
|
-
:raises
|
|
303
|
+
:raises APIException: If any of the provided ids is not found
|
|
282
304
|
"""
|
|
283
|
-
value =
|
|
305
|
+
value = _check_value(value)
|
|
284
306
|
with get_universe_session() as universe_session, \
|
|
285
307
|
_get_project_session_with_exception(project_id) as project_session:
|
|
286
|
-
errors = _valid_value_against_given_term(value, collection_id, term_id,
|
|
308
|
+
errors = _valid_value_against_given_term(value, project_id, collection_id, term_id,
|
|
287
309
|
universe_session, project_session)
|
|
288
|
-
return ValidationReport(value, errors)
|
|
310
|
+
return ValidationReport(expression=value, errors=errors)
|
|
289
311
|
|
|
290
312
|
|
|
291
313
|
def _valid_term_in_collection(value: str,
|
|
@@ -294,28 +316,38 @@ def _valid_term_in_collection(value: str,
|
|
|
294
316
|
universe_session: Session,
|
|
295
317
|
project_session: Session) \
|
|
296
318
|
-> list[MatchingTerm]:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
None)
|
|
302
|
-
if collections:
|
|
303
|
-
collection = collections[0]
|
|
304
|
-
match collection.term_kind:
|
|
305
|
-
case TermKind.PLAIN:
|
|
306
|
-
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
307
|
-
project_session)
|
|
308
|
-
if term_id_found:
|
|
309
|
-
result.append(MatchingTerm(project_id, collection_id, term_id_found))
|
|
310
|
-
case _:
|
|
311
|
-
term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
|
|
312
|
-
universe_session,
|
|
313
|
-
project_session)
|
|
314
|
-
for term_id_found in term_ids_found:
|
|
315
|
-
result.append(MatchingTerm(project_id, collection_id, term_id_found))
|
|
319
|
+
# [OPTIMIZATION]
|
|
320
|
+
key = value + project_id + collection_id
|
|
321
|
+
if key in _VALID_TERM_IN_COLLECTION_CACHE:
|
|
322
|
+
result = _VALID_TERM_IN_COLLECTION_CACHE[key]
|
|
316
323
|
else:
|
|
317
|
-
|
|
318
|
-
|
|
324
|
+
value = _check_value(value)
|
|
325
|
+
result = list()
|
|
326
|
+
collections = _find_collections_in_project(collection_id,
|
|
327
|
+
project_session,
|
|
328
|
+
None)
|
|
329
|
+
if collections:
|
|
330
|
+
collection = collections[0]
|
|
331
|
+
match collection.term_kind:
|
|
332
|
+
case TermKind.PLAIN:
|
|
333
|
+
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
334
|
+
project_session)
|
|
335
|
+
if term_id_found:
|
|
336
|
+
result.append(MatchingTerm(project_id=project_id,
|
|
337
|
+
collection_id=collection_id,
|
|
338
|
+
term_id=term_id_found))
|
|
339
|
+
case _:
|
|
340
|
+
term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
|
|
341
|
+
universe_session,
|
|
342
|
+
project_session)
|
|
343
|
+
for term_id_found in term_ids_found:
|
|
344
|
+
result.append(MatchingTerm(project_id=project_id,
|
|
345
|
+
collection_id=collection_id,
|
|
346
|
+
term_id=term_id_found))
|
|
347
|
+
else:
|
|
348
|
+
msg = f'unable to find collection {collection_id}'
|
|
349
|
+
raise APIException(msg)
|
|
350
|
+
_VALID_TERM_IN_COLLECTION_CACHE[key] = result
|
|
319
351
|
return result
|
|
320
352
|
|
|
321
353
|
|
|
@@ -326,19 +358,19 @@ def valid_term_in_collection(value: str,
|
|
|
326
358
|
"""
|
|
327
359
|
Check if the given value may or may not represent a term in the given collection. The function
|
|
328
360
|
returns the terms that the value matches.
|
|
329
|
-
|
|
361
|
+
|
|
330
362
|
Behavior based on the nature of the term:
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
363
|
+
- plain term: the function try to match the value on the drs_name field.
|
|
364
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
365
|
+
- composite term:
|
|
366
|
+
- if the composite has got a separator, the function splits the value according to the \
|
|
367
|
+
separator of the term then it try to match every part of the composite \
|
|
368
|
+
with every split of the value.
|
|
369
|
+
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
370
|
+
composite so as to compare it as a regex to the value.
|
|
339
371
|
|
|
340
372
|
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
341
|
-
the function raises a
|
|
373
|
+
the function raises a APIException.
|
|
342
374
|
|
|
343
375
|
:param value: A value to be validated
|
|
344
376
|
:type value: str
|
|
@@ -348,7 +380,7 @@ def valid_term_in_collection(value: str,
|
|
|
348
380
|
:type collection_id: str
|
|
349
381
|
:returns: The list of terms that the value matches.
|
|
350
382
|
:rtype: list[MatchingTerm]
|
|
351
|
-
:raises
|
|
383
|
+
:raises APIException: If any of the provided ids is not found
|
|
352
384
|
"""
|
|
353
385
|
with get_universe_session() as universe_session, \
|
|
354
386
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -372,18 +404,18 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
372
404
|
"""
|
|
373
405
|
Check if the given value may or may not represent a term in the given project. The function
|
|
374
406
|
returns the terms that the value matches.
|
|
375
|
-
|
|
407
|
+
|
|
376
408
|
Behavior based on the nature of the term:
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
409
|
+
- plain term: the function try to match the value on the drs_name field.
|
|
410
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
411
|
+
- composite term:
|
|
412
|
+
- if the composite has got a separator, the function splits the value according to the \
|
|
413
|
+
separator of the term then it try to match every part of the composite \
|
|
414
|
+
with every split of the value.
|
|
415
|
+
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
416
|
+
composite so as to compare it as a regex to the value.
|
|
385
417
|
|
|
386
|
-
If the `project_id` is not found, the function raises a
|
|
418
|
+
If the `project_id` is not found, the function raises a APIException.
|
|
387
419
|
|
|
388
420
|
:param value: A value to be validated
|
|
389
421
|
:type value: str
|
|
@@ -391,7 +423,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
391
423
|
:type project_id: str
|
|
392
424
|
:returns: The list of terms that the value matches.
|
|
393
425
|
:rtype: list[MatchingTerm]
|
|
394
|
-
:raises
|
|
426
|
+
:raises APIException: If the `project_id` is not found
|
|
395
427
|
"""
|
|
396
428
|
with get_universe_session() as universe_session, \
|
|
397
429
|
_get_project_session_with_exception(project_id) as project_session:
|
|
@@ -402,16 +434,16 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
402
434
|
"""
|
|
403
435
|
Check if the given value may or may not represent a term in all projects. The function
|
|
404
436
|
returns the terms that the value matches.
|
|
405
|
-
|
|
437
|
+
|
|
406
438
|
Behavior based on the nature of the term:
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
439
|
+
- plain term: the function try to match the value on the drs_name field.
|
|
440
|
+
- pattern term: the function try to match the value on the pattern field (regex).
|
|
441
|
+
- composite term:
|
|
442
|
+
- if the composite has got a separator, the function splits the value according to the \
|
|
443
|
+
separator of the term then it try to match every part of the composite \
|
|
444
|
+
with every split of the value.
|
|
445
|
+
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
446
|
+
composite so as to compare it as a regex to the value.
|
|
415
447
|
|
|
416
448
|
:param value: A value to be validated
|
|
417
449
|
:type value: str
|
|
@@ -431,14 +463,14 @@ def _find_terms_in_collection(collection_id: str,
|
|
|
431
463
|
term_id: str,
|
|
432
464
|
session: Session,
|
|
433
465
|
settings: SearchSettings|None = None) -> Sequence[PTerm]:
|
|
434
|
-
|
|
435
|
-
where_expression =
|
|
436
|
-
|
|
437
|
-
|
|
466
|
+
# Settings only apply on the term_id comparison.
|
|
467
|
+
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
468
|
+
value=term_id,
|
|
469
|
+
settings=settings)
|
|
438
470
|
statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
|
|
439
471
|
where_expression)
|
|
440
472
|
results = session.exec(statement)
|
|
441
|
-
result = results.all()
|
|
473
|
+
result = results.all()
|
|
442
474
|
return result
|
|
443
475
|
|
|
444
476
|
|
|
@@ -446,21 +478,21 @@ def find_terms_in_collection(project_id:str,
|
|
|
446
478
|
collection_id: str,
|
|
447
479
|
term_id: str,
|
|
448
480
|
settings: SearchSettings|None = None) \
|
|
449
|
-
-> list[
|
|
481
|
+
-> list[DataDescriptor]:
|
|
450
482
|
"""
|
|
451
483
|
Finds one or more terms, based on the specified search settings, in the given collection of a project.
|
|
452
|
-
This function performs an exact match on the `project_id` and `collection_id`,
|
|
484
|
+
This function performs an exact match on the `project_id` and `collection_id`,
|
|
453
485
|
and does **not** search for similar or related projects and collections.
|
|
454
486
|
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
455
487
|
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
456
488
|
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
457
489
|
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
458
490
|
the function returns an empty list.
|
|
459
|
-
|
|
491
|
+
|
|
460
492
|
Behavior based on search type:
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
493
|
+
- `EXACT` and absence of `settings`: returns zero or one term instance in the list.
|
|
494
|
+
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
495
|
+
term instances in the list.
|
|
464
496
|
|
|
465
497
|
:param project_id: A project id
|
|
466
498
|
:type project_id: str
|
|
@@ -470,14 +502,15 @@ def find_terms_in_collection(project_id:str,
|
|
|
470
502
|
:type term_id: str
|
|
471
503
|
:param settings: The search settings
|
|
472
504
|
:type settings: SearchSettings|None
|
|
473
|
-
:returns: A list of
|
|
474
|
-
:rtype: list[
|
|
505
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
506
|
+
:rtype: list[DataDescriptor]
|
|
475
507
|
"""
|
|
476
|
-
result: list[
|
|
508
|
+
result: list[DataDescriptor] = list()
|
|
477
509
|
if connection:=_get_project_connection(project_id):
|
|
478
510
|
with connection.create_session() as session:
|
|
479
511
|
terms = _find_terms_in_collection(collection_id, term_id, session, settings)
|
|
480
|
-
instantiate_pydantic_terms(terms, result
|
|
512
|
+
instantiate_pydantic_terms(terms, result,
|
|
513
|
+
settings.selected_term_fields if settings else None)
|
|
481
514
|
return result
|
|
482
515
|
|
|
483
516
|
|
|
@@ -486,14 +519,14 @@ def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
|
|
|
486
519
|
session: Session,
|
|
487
520
|
settings: SearchSettings|None = None) \
|
|
488
521
|
-> Sequence[PTerm]:
|
|
489
|
-
|
|
490
|
-
where_expression =
|
|
491
|
-
|
|
492
|
-
|
|
522
|
+
# Settings only apply on the term_id comparison.
|
|
523
|
+
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
524
|
+
value=term_id,
|
|
525
|
+
settings=settings)
|
|
493
526
|
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
|
|
494
527
|
where_expression)
|
|
495
528
|
results = session.exec(statement)
|
|
496
|
-
result = results.all()
|
|
529
|
+
result = results.all()
|
|
497
530
|
return result
|
|
498
531
|
|
|
499
532
|
|
|
@@ -501,23 +534,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
|
|
|
501
534
|
data_descriptor_id: str,
|
|
502
535
|
term_id: str,
|
|
503
536
|
settings: SearchSettings|None = None) \
|
|
504
|
-
-> list[tuple[
|
|
537
|
+
-> list[tuple[DataDescriptor, str]]:
|
|
505
538
|
"""
|
|
506
539
|
Finds one or more terms in the given project which are instances of the given data descriptor
|
|
507
540
|
in the universe, based on the specified search settings, in the given collection of a project.
|
|
508
|
-
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
541
|
+
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
509
542
|
and does **not** search for similar or related projects and data descriptors.
|
|
510
543
|
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
511
544
|
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
512
545
|
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
513
546
|
If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
|
|
514
547
|
the function returns an empty list.
|
|
515
|
-
|
|
548
|
+
|
|
516
549
|
Behavior based on search type:
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
550
|
+
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
551
|
+
collection id in the list.
|
|
552
|
+
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
553
|
+
term instances and collection ids in the list.
|
|
521
554
|
|
|
522
555
|
:param project_id: A project id
|
|
523
556
|
:type project_id: str
|
|
@@ -527,9 +560,9 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
|
|
|
527
560
|
:type term_id: str
|
|
528
561
|
:param settings: The search settings
|
|
529
562
|
:type settings: SearchSettings|None
|
|
530
|
-
:returns: A list of tuple of
|
|
563
|
+
:returns: A list of tuple of term instances and related collection ids. \
|
|
531
564
|
Returns an empty list if no matches are found.
|
|
532
|
-
:rtype: list[tuple[
|
|
565
|
+
:rtype: list[tuple[DataDescriptor, str]]
|
|
533
566
|
"""
|
|
534
567
|
result = list()
|
|
535
568
|
if connection:=_get_project_connection(project_id):
|
|
@@ -540,7 +573,8 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
|
|
|
540
573
|
settings)
|
|
541
574
|
for pterm in terms:
|
|
542
575
|
collection_id = pterm.collection.id
|
|
543
|
-
term = instantiate_pydantic_term(pterm
|
|
576
|
+
term = instantiate_pydantic_term(pterm,
|
|
577
|
+
settings.selected_term_fields if settings else None)
|
|
544
578
|
result.append((term, collection_id))
|
|
545
579
|
return result
|
|
546
580
|
|
|
@@ -548,23 +582,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
|
|
|
548
582
|
def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
|
|
549
583
|
term_id: str,
|
|
550
584
|
settings: SearchSettings|None = None) \
|
|
551
|
-
|
|
585
|
+
-> list[tuple[list[tuple[DataDescriptor, str]], str]]:
|
|
552
586
|
"""
|
|
553
587
|
Finds one or more terms in all projects which are instances of the given data descriptor
|
|
554
588
|
in the universe, based on the specified search settings, in the given collection of a project.
|
|
555
|
-
This function performs an exact match on the `data_descriptor_id`,
|
|
589
|
+
This function performs an exact match on the `data_descriptor_id`,
|
|
556
590
|
and does **not** search for similar or related data descriptors.
|
|
557
591
|
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
558
592
|
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
559
593
|
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
560
594
|
If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
|
|
561
595
|
the function returns an empty list.
|
|
562
|
-
|
|
596
|
+
|
|
563
597
|
Behavior based on search type:
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
598
|
+
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
599
|
+
collection id in the list.
|
|
600
|
+
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
601
|
+
term instances and collection ids in the list.
|
|
568
602
|
|
|
569
603
|
:param data_descriptor_id: A data descriptor
|
|
570
604
|
:type data_descriptor_id: str
|
|
@@ -572,26 +606,28 @@ def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
|
|
|
572
606
|
:type term_id: str
|
|
573
607
|
:param settings: The search settings
|
|
574
608
|
:type settings: SearchSettings|None
|
|
575
|
-
:returns: A list of tuple of
|
|
609
|
+
:returns: A list of tuple of matching terms with their collection id, per project. \
|
|
576
610
|
Returns an empty list if no matches are found.
|
|
577
|
-
:rtype: list[tuple[
|
|
611
|
+
:rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
|
|
578
612
|
"""
|
|
579
613
|
project_ids = get_all_projects()
|
|
580
|
-
result = list()
|
|
614
|
+
result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
|
|
581
615
|
for project_id in project_ids:
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
616
|
+
matching_terms = find_terms_from_data_descriptor_in_project(project_id,
|
|
617
|
+
data_descriptor_id,
|
|
618
|
+
term_id,
|
|
619
|
+
settings)
|
|
620
|
+
if matching_terms:
|
|
621
|
+
result.append((matching_terms, project_id))
|
|
586
622
|
return result
|
|
587
623
|
|
|
588
624
|
|
|
589
625
|
def _find_terms_in_project(term_id: str,
|
|
590
626
|
session: Session,
|
|
591
627
|
settings: SearchSettings|None) -> Sequence[PTerm]:
|
|
592
|
-
where_expression =
|
|
593
|
-
|
|
594
|
-
|
|
628
|
+
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
629
|
+
value=term_id,
|
|
630
|
+
settings=settings)
|
|
595
631
|
statement = select(PTerm).where(where_expression)
|
|
596
632
|
results = session.exec(statement).all()
|
|
597
633
|
return results
|
|
@@ -599,7 +635,7 @@ def _find_terms_in_project(term_id: str,
|
|
|
599
635
|
|
|
600
636
|
def find_terms_in_all_projects(term_id: str,
|
|
601
637
|
settings: SearchSettings|None = None) \
|
|
602
|
-
-> list[
|
|
638
|
+
-> list[DataDescriptor]:
|
|
603
639
|
"""
|
|
604
640
|
Finds one or more terms, based on the specified search settings, in all projects.
|
|
605
641
|
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
@@ -612,8 +648,8 @@ def find_terms_in_all_projects(term_id: str,
|
|
|
612
648
|
:type term_id: str
|
|
613
649
|
:param settings: The search settings
|
|
614
650
|
:type settings: SearchSettings|None
|
|
615
|
-
:returns: A list of
|
|
616
|
-
:rtype: list[
|
|
651
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
652
|
+
:rtype: list[DataDescriptor]
|
|
617
653
|
"""
|
|
618
654
|
project_ids = get_all_projects()
|
|
619
655
|
result = list()
|
|
@@ -625,10 +661,10 @@ def find_terms_in_all_projects(term_id: str,
|
|
|
625
661
|
def find_terms_in_project(project_id: str,
|
|
626
662
|
term_id: str,
|
|
627
663
|
settings: SearchSettings|None = None) \
|
|
628
|
-
-> list[
|
|
664
|
+
-> list[DataDescriptor]:
|
|
629
665
|
"""
|
|
630
666
|
Finds one or more terms, based on the specified search settings, in a project.
|
|
631
|
-
This function performs an exact match on the `project_id` and
|
|
667
|
+
This function performs an exact match on the `project_id` and
|
|
632
668
|
does **not** search for similar or related projects.
|
|
633
669
|
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
634
670
|
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
@@ -643,20 +679,22 @@ def find_terms_in_project(project_id: str,
|
|
|
643
679
|
:type term_id: str
|
|
644
680
|
:param settings: The search settings
|
|
645
681
|
:type settings: SearchSettings|None
|
|
646
|
-
:returns: A list of
|
|
647
|
-
:rtype: list[
|
|
682
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
683
|
+
:rtype: list[DataDescriptor]
|
|
648
684
|
"""
|
|
649
|
-
result: list[
|
|
685
|
+
result: list[DataDescriptor] = list()
|
|
650
686
|
if connection:=_get_project_connection(project_id):
|
|
651
687
|
with connection.create_session() as session:
|
|
652
688
|
terms = _find_terms_in_project(term_id, session, settings)
|
|
653
|
-
instantiate_pydantic_terms(terms, result
|
|
689
|
+
instantiate_pydantic_terms(terms, result,
|
|
690
|
+
settings.selected_term_fields if settings else None)
|
|
654
691
|
return result
|
|
655
692
|
|
|
656
693
|
|
|
657
694
|
def get_all_terms_in_collection(project_id: str,
|
|
658
|
-
collection_id: str
|
|
659
|
-
|
|
695
|
+
collection_id: str,
|
|
696
|
+
selected_term_fields: Iterable[str]|None = None)\
|
|
697
|
+
-> list[DataDescriptor]:
|
|
660
698
|
"""
|
|
661
699
|
Gets all terms of the given collection of a project.
|
|
662
700
|
This function performs an exact match on the `project_id` and `collection_id`,
|
|
@@ -668,9 +706,11 @@ def get_all_terms_in_collection(project_id: str,
|
|
|
668
706
|
:type project_id: str
|
|
669
707
|
:param collection_id: A collection id
|
|
670
708
|
:type collection_id: str
|
|
671
|
-
:
|
|
672
|
-
|
|
673
|
-
:
|
|
709
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
710
|
+
fields of the terms are returned.
|
|
711
|
+
:type selected_term_fields: Iterable[str]|None
|
|
712
|
+
:returns: a list of term instances. Returns an empty list if no matches are found.
|
|
713
|
+
:rtype: list[DataDescriptor]
|
|
674
714
|
"""
|
|
675
715
|
result = list()
|
|
676
716
|
if connection:=_get_project_connection(project_id):
|
|
@@ -680,7 +720,7 @@ def get_all_terms_in_collection(project_id: str,
|
|
|
680
720
|
None)
|
|
681
721
|
if collections:
|
|
682
722
|
collection = collections[0]
|
|
683
|
-
result = _get_all_terms_in_collection(collection)
|
|
723
|
+
result = _get_all_terms_in_collection(collection, selected_term_fields)
|
|
684
724
|
return result
|
|
685
725
|
|
|
686
726
|
|
|
@@ -688,9 +728,9 @@ def _find_collections_in_project(collection_id: str,
|
|
|
688
728
|
session: Session,
|
|
689
729
|
settings: SearchSettings|None) \
|
|
690
730
|
-> Sequence[Collection]:
|
|
691
|
-
where_exp =
|
|
692
|
-
|
|
693
|
-
|
|
731
|
+
where_exp = _create_str_comparison_expression(field=Collection.id,
|
|
732
|
+
value=collection_id,
|
|
733
|
+
settings=settings)
|
|
694
734
|
statement = select(Collection).where(where_exp)
|
|
695
735
|
results = session.exec(statement)
|
|
696
736
|
result = results.all()
|
|
@@ -703,19 +743,19 @@ def find_collections_in_project(project_id: str,
|
|
|
703
743
|
-> list[dict]:
|
|
704
744
|
"""
|
|
705
745
|
Finds one or more collections of the given project.
|
|
706
|
-
This function performs an exact match on the `project_id` and
|
|
746
|
+
This function performs an exact match on the `project_id` and
|
|
707
747
|
does **not** search for similar or related projects.
|
|
708
|
-
The given `collection_id` is searched according to the search type specified in
|
|
748
|
+
The given `collection_id` is searched according to the search type specified in
|
|
709
749
|
the parameter `settings`,
|
|
710
750
|
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
711
751
|
If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
|
|
712
752
|
If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
|
|
713
753
|
an empty list.
|
|
714
|
-
|
|
754
|
+
|
|
715
755
|
Behavior based on search type:
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
756
|
+
- `EXACT` and absence of `settings`: returns zero or one collection context in the list.
|
|
757
|
+
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
758
|
+
collection contexts in the list.
|
|
719
759
|
|
|
720
760
|
:param project_id: A project id
|
|
721
761
|
:type project_id: str
|
|
@@ -723,8 +763,7 @@ def find_collections_in_project(project_id: str,
|
|
|
723
763
|
:type collection_id: str
|
|
724
764
|
:param settings: The search settings
|
|
725
765
|
:type settings: SearchSettings|None
|
|
726
|
-
:returns: A list of collection contexts.
|
|
727
|
-
Returns an empty list if no matches are found.
|
|
766
|
+
:returns: A list of collection contexts. Returns an empty list if no matches are found.
|
|
728
767
|
:rtype: list[dict]
|
|
729
768
|
"""
|
|
730
769
|
result = list()
|
|
@@ -739,7 +778,7 @@ def find_collections_in_project(project_id: str,
|
|
|
739
778
|
|
|
740
779
|
|
|
741
780
|
def _get_all_collections_in_project(session: Session) -> list[Collection]:
|
|
742
|
-
project = session.get(Project,
|
|
781
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
743
782
|
# Project can't be missing if session exists.
|
|
744
783
|
return project.collections # type: ignore
|
|
745
784
|
|
|
@@ -747,14 +786,13 @@ def _get_all_collections_in_project(session: Session) -> list[Collection]:
|
|
|
747
786
|
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
748
787
|
"""
|
|
749
788
|
Gets all collections of the given project.
|
|
750
|
-
This function performs an exact match on the `project_id` and
|
|
789
|
+
This function performs an exact match on the `project_id` and
|
|
751
790
|
does **not** search for similar or related projects.
|
|
752
791
|
If the provided `project_id` is not found, the function returns an empty list.
|
|
753
792
|
|
|
754
793
|
:param project_id: A project id
|
|
755
794
|
:type project_id: str
|
|
756
|
-
:returns: A list of collection ids.
|
|
757
|
-
Returns an empty list if no matches are found.
|
|
795
|
+
:returns: A list of collection ids. Returns an empty list if no matches are found.
|
|
758
796
|
:rtype: list[str]
|
|
759
797
|
"""
|
|
760
798
|
result = list()
|
|
@@ -766,25 +804,29 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
|
766
804
|
return result
|
|
767
805
|
|
|
768
806
|
|
|
769
|
-
def _get_all_terms_in_collection(collection: Collection
|
|
770
|
-
|
|
771
|
-
|
|
807
|
+
def _get_all_terms_in_collection(collection: Collection,
|
|
808
|
+
selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
|
|
809
|
+
result: list[DataDescriptor] = list()
|
|
810
|
+
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
772
811
|
return result
|
|
773
812
|
|
|
774
813
|
|
|
775
|
-
def get_all_terms_in_project(project_id: str
|
|
814
|
+
def get_all_terms_in_project(project_id: str,
|
|
815
|
+
selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
|
|
776
816
|
"""
|
|
777
817
|
Gets all terms of the given project.
|
|
778
|
-
This function performs an exact match on the `project_id` and
|
|
818
|
+
This function performs an exact match on the `project_id` and
|
|
779
819
|
does **not** search for similar or related projects.
|
|
780
820
|
Terms are unique within a collection but may have some synonyms in a project.
|
|
781
821
|
If the provided `project_id` is not found, the function returns an empty list.
|
|
782
822
|
|
|
783
823
|
:param project_id: A project id
|
|
784
824
|
:type project_id: str
|
|
785
|
-
:
|
|
786
|
-
|
|
787
|
-
:
|
|
825
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
826
|
+
fields of the terms are returned.
|
|
827
|
+
:type selected_term_fields: Iterable[str]|None
|
|
828
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
829
|
+
:rtype: list[DataDescriptor]
|
|
788
830
|
"""
|
|
789
831
|
result = list()
|
|
790
832
|
if connection:=_get_project_connection(project_id):
|
|
@@ -792,63 +834,67 @@ def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
|
|
|
792
834
|
collections = _get_all_collections_in_project(session)
|
|
793
835
|
for collection in collections:
|
|
794
836
|
# Term may have some synonyms in a project.
|
|
795
|
-
result.extend(_get_all_terms_in_collection(collection))
|
|
837
|
+
result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
|
|
796
838
|
return result
|
|
797
839
|
|
|
798
840
|
|
|
799
|
-
def get_all_terms_in_all_projects(
|
|
841
|
+
def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
|
|
842
|
+
-> list[tuple[str, list[DataDescriptor]]]:
|
|
800
843
|
"""
|
|
801
844
|
Gets all terms of all projects.
|
|
802
845
|
|
|
803
|
-
:
|
|
804
|
-
|
|
846
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
847
|
+
fields of the terms are returned.
|
|
848
|
+
:type selected_term_fields: Iterable[str]|None
|
|
849
|
+
:returns: A list of tuple project_id and term instances of that project.
|
|
850
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
805
851
|
"""
|
|
806
852
|
project_ids = get_all_projects()
|
|
807
853
|
result = list()
|
|
808
854
|
for project_id in project_ids:
|
|
809
|
-
|
|
855
|
+
terms = get_all_terms_in_project(project_id, selected_term_fields)
|
|
856
|
+
result.append((project_id, terms))
|
|
810
857
|
return result
|
|
811
858
|
|
|
812
859
|
|
|
813
|
-
def find_project(project_id: str) ->
|
|
860
|
+
def find_project(project_id: str) -> ProjectSpecs|None:
|
|
814
861
|
"""
|
|
815
|
-
Finds a project.
|
|
816
|
-
This function performs an exact match on the `project_id` and
|
|
862
|
+
Finds a project and returns its specifications.
|
|
863
|
+
This function performs an exact match on the `project_id` and
|
|
817
864
|
does **not** search for similar or related projects.
|
|
818
865
|
If the provided `project_id` is not found, the function returns `None`.
|
|
819
|
-
|
|
866
|
+
|
|
820
867
|
:param project_id: A project id to be found
|
|
821
868
|
:type project_id: str
|
|
822
|
-
:returns: The specs of the project found.
|
|
823
|
-
|
|
824
|
-
:rtype: dict|None
|
|
869
|
+
:returns: The specs of the project found. Returns `None` if no matches are found.
|
|
870
|
+
:rtype: ProjectSpecs|None
|
|
825
871
|
"""
|
|
826
|
-
result = None
|
|
872
|
+
result: ProjectSpecs|None = None
|
|
827
873
|
if connection:=_get_project_connection(project_id):
|
|
828
874
|
with connection.create_session() as session:
|
|
829
|
-
project = session.get(Project,
|
|
830
|
-
|
|
831
|
-
|
|
875
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
876
|
+
try:
|
|
877
|
+
# Project can't be missing if session exists.
|
|
878
|
+
result = ProjectSpecs(**project.specs) # type: ignore
|
|
879
|
+
except Exception as e:
|
|
880
|
+
msg = f'Unable to read specs in project {project_id}'
|
|
881
|
+
raise RuntimeError(msg) from e
|
|
832
882
|
return result
|
|
833
883
|
|
|
834
884
|
|
|
835
885
|
def get_all_projects() -> list[str]:
|
|
836
886
|
"""
|
|
837
887
|
Gets all projects.
|
|
838
|
-
|
|
888
|
+
|
|
839
889
|
:returns: A list of project ids.
|
|
840
890
|
:rtype: list[str]
|
|
841
891
|
"""
|
|
842
|
-
return list(service.
|
|
892
|
+
return list(service.current_state.projects.keys())
|
|
843
893
|
|
|
844
894
|
|
|
845
895
|
if __name__ == "__main__":
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
from esgvoc.api import BasicValidationErrorVisitor
|
|
852
|
-
visitor = BasicValidationErrorVisitor()
|
|
853
|
-
for error in vr.errors:
|
|
854
|
-
print(error.accept(visitor))
|
|
896
|
+
settings = SearchSettings()
|
|
897
|
+
settings.selected_term_fields = ('id', 'drs_name')
|
|
898
|
+
settings.case_sensitive = False
|
|
899
|
+
matching_terms = find_terms_from_data_descriptor_in_all_projects('organisation', 'IpsL', settings)
|
|
900
|
+
print(matching_terms)
|