esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +120 -4
- esgvoc/api/projects.py +733 -505
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +168 -98
- esgvoc/api/universe.py +368 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +46 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +57 -15
- esgvoc/core/db/models/universe.py +49 -10
- esgvoc/core/db/project_ingestion.py +79 -65
- esgvoc/core/db/universe_ingestion.py +71 -40
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/api/_utils.py +0 -53
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.3.0.dist-info/RECORD +0 -78
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py
CHANGED
|
@@ -1,31 +1,39 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from
|
|
2
|
+
from typing import Iterable, Sequence
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from sqlalchemy import text
|
|
5
|
+
from sqlmodel import Session, and_, col, select
|
|
5
6
|
|
|
6
7
|
import esgvoc.api.universe as universe
|
|
7
8
|
import esgvoc.core.constants as constants
|
|
8
9
|
import esgvoc.core.service as service
|
|
9
|
-
from esgvoc.api._utils import (APIException, get_universe_session,
|
|
10
|
-
instantiate_pydantic_term,
|
|
11
|
-
instantiate_pydantic_terms)
|
|
12
10
|
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
13
11
|
from esgvoc.api.project_specs import ProjectSpecs
|
|
14
|
-
from esgvoc.api.report import
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
|
|
13
|
+
from esgvoc.api.search import (
|
|
14
|
+
Item,
|
|
15
|
+
MatchingTerm,
|
|
16
|
+
execute_find_item_statements,
|
|
17
|
+
execute_match_statement,
|
|
18
|
+
generate_matching_condition,
|
|
19
|
+
get_universe_session,
|
|
20
|
+
handle_rank_limit_offset,
|
|
21
|
+
instantiate_pydantic_term,
|
|
22
|
+
instantiate_pydantic_terms,
|
|
23
|
+
process_expression,
|
|
24
|
+
)
|
|
18
25
|
from esgvoc.core.db.connection import DBConnection
|
|
19
26
|
from esgvoc.core.db.models.mixins import TermKind
|
|
20
|
-
from esgvoc.core.db.models.project import
|
|
27
|
+
from esgvoc.core.db.models.project import PCollection, PCollectionFTS5, Project, PTerm, PTermFTS5
|
|
21
28
|
from esgvoc.core.db.models.universe import UTerm
|
|
29
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
|
|
22
30
|
|
|
23
31
|
# [OPTIMIZATION]
|
|
24
32
|
_VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
|
|
25
|
-
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
|
|
33
|
+
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
|
|
26
34
|
|
|
27
35
|
|
|
28
|
-
def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
36
|
+
def _get_project_connection(project_id: str) -> DBConnection | None:
|
|
29
37
|
if project_id in service.current_state.projects:
|
|
30
38
|
return service.current_state.projects[project_id].db_connection
|
|
31
39
|
else:
|
|
@@ -33,38 +41,32 @@ def _get_project_connection(project_id: str) -> DBConnection|None:
|
|
|
33
41
|
|
|
34
42
|
|
|
35
43
|
def _get_project_session_with_exception(project_id: str) -> Session:
|
|
36
|
-
if connection:=_get_project_connection(project_id):
|
|
44
|
+
if connection := _get_project_connection(project_id):
|
|
37
45
|
project_session = connection.create_session()
|
|
38
46
|
return project_session
|
|
39
47
|
else:
|
|
40
|
-
raise
|
|
48
|
+
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
41
49
|
|
|
42
50
|
|
|
43
|
-
def _resolve_term(composite_term_part: dict,
|
|
44
|
-
universe_session: Session,
|
|
45
|
-
project_session: Session) -> UTerm|PTerm:
|
|
51
|
+
def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
|
|
46
52
|
# First find the term in the universe than in the current project
|
|
47
53
|
term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
|
|
48
54
|
term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
return uterms[0]
|
|
55
|
+
uterm = universe._get_term_in_data_descriptor(
|
|
56
|
+
data_descriptor_id=term_type, term_id=term_id, session=universe_session
|
|
57
|
+
)
|
|
58
|
+
if uterm:
|
|
59
|
+
return uterm
|
|
55
60
|
else:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
settings=None)
|
|
60
|
-
if pterms:
|
|
61
|
-
return pterms[0]
|
|
61
|
+
pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
|
|
62
|
+
if pterm:
|
|
63
|
+
return pterm
|
|
62
64
|
else:
|
|
63
|
-
msg = f
|
|
64
|
-
raise
|
|
65
|
+
msg = f"unable to find the term '{term_id}' in '{term_type}'"
|
|
66
|
+
raise EsgvocNotFoundError(msg)
|
|
65
67
|
|
|
66
68
|
|
|
67
|
-
def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
|
|
69
|
+
def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
|
|
68
70
|
separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
|
|
69
71
|
parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
|
|
70
72
|
return separator, parts
|
|
@@ -72,11 +74,9 @@ def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
|
|
|
72
74
|
|
|
73
75
|
# TODO: support optionality of parts of composite.
|
|
74
76
|
# It is backtrack possible for more than one missing parts.
|
|
75
|
-
def _valid_value_composite_term_with_separator(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
project_session: Session)\
|
|
79
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
77
|
+
def _valid_value_composite_term_with_separator(
|
|
78
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
79
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
80
80
|
result = list()
|
|
81
81
|
separator, parts = _get_composite_term_separator_parts(term)
|
|
82
82
|
if separator in value:
|
|
@@ -84,14 +84,25 @@ def _valid_value_composite_term_with_separator(value: str,
|
|
|
84
84
|
if len(splits) == len(parts):
|
|
85
85
|
for index in range(0, len(splits)):
|
|
86
86
|
given_value = splits[index]
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
87
|
+
if "id" not in parts[index].keys():
|
|
88
|
+
terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
|
|
89
|
+
parts[index]["id"] = [term.id for term in terms]
|
|
90
|
+
if type(parts[index]["id"]) is str:
|
|
91
|
+
parts[index]["id"] = [parts[index]["id"]]
|
|
92
|
+
|
|
93
|
+
errors_list = list()
|
|
94
|
+
for id in parts[index]["id"]:
|
|
95
|
+
part_parts = dict(parts[index])
|
|
96
|
+
part_parts["id"] = id
|
|
97
|
+
resolved_term = _resolve_term(part_parts, universe_session, project_session)
|
|
98
|
+
errors = _valid_value(given_value, resolved_term, universe_session, project_session)
|
|
99
|
+
if len(errors) == 0:
|
|
100
|
+
errors_list = errors
|
|
101
|
+
break
|
|
102
|
+
else:
|
|
103
|
+
errors_list.extend(errors)
|
|
104
|
+
else:
|
|
105
|
+
result.append(_create_term_error(value, term))
|
|
95
106
|
else:
|
|
96
107
|
result.append(_create_term_error(value, term))
|
|
97
108
|
else:
|
|
@@ -99,38 +110,33 @@ def _valid_value_composite_term_with_separator(value: str,
|
|
|
99
110
|
return result
|
|
100
111
|
|
|
101
112
|
|
|
102
|
-
def _transform_to_pattern(term: UTerm|PTerm,
|
|
103
|
-
universe_session: Session,
|
|
104
|
-
project_session: Session) -> str:
|
|
113
|
+
def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
|
|
105
114
|
match term.kind:
|
|
106
115
|
case TermKind.PLAIN:
|
|
107
116
|
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
108
117
|
result = term.specs[constants.DRS_SPECS_JSON_KEY]
|
|
109
118
|
else:
|
|
110
|
-
raise
|
|
111
|
-
"Can't validate it.")
|
|
119
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
|
|
112
120
|
case TermKind.PATTERN:
|
|
113
121
|
result = term.specs[constants.PATTERN_JSON_KEY]
|
|
114
122
|
case TermKind.COMPOSITE:
|
|
115
|
-
separator, parts =
|
|
123
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
116
124
|
result = ""
|
|
117
125
|
for part in parts:
|
|
118
126
|
resolved_term = _resolve_term(part, universe_session, project_session)
|
|
119
127
|
pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
|
|
120
|
-
result = f
|
|
128
|
+
result = f"{result}{pattern}{separator}"
|
|
121
129
|
result = result.rstrip(separator)
|
|
122
130
|
case _:
|
|
123
|
-
raise
|
|
131
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
124
132
|
return result
|
|
125
133
|
|
|
126
134
|
|
|
127
135
|
# TODO: support optionality of parts of composite.
|
|
128
136
|
# It is backtrack possible for more than one missing parts.
|
|
129
|
-
def _valid_value_composite_term_separator_less(
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
project_session: Session)\
|
|
133
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
137
|
+
def _valid_value_composite_term_separator_less(
|
|
138
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
139
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
134
140
|
result = list()
|
|
135
141
|
try:
|
|
136
142
|
pattern = _transform_to_pattern(term, universe_session, project_session)
|
|
@@ -139,50 +145,45 @@ def _valid_value_composite_term_separator_less(value: str,
|
|
|
139
145
|
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
140
146
|
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
141
147
|
# The later, must be removed.
|
|
142
|
-
pattern = pattern.replace(
|
|
143
|
-
pattern = f
|
|
148
|
+
pattern = pattern.replace("^", "").replace("$", "")
|
|
149
|
+
pattern = f"^{pattern}$"
|
|
144
150
|
regex = re.compile(pattern)
|
|
145
151
|
except Exception as e:
|
|
146
|
-
msg = f
|
|
147
|
-
raise
|
|
152
|
+
msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
|
|
153
|
+
raise EsgvocDbError(msg) from e
|
|
148
154
|
match = regex.match(value)
|
|
149
155
|
if match is None:
|
|
150
156
|
result.append(_create_term_error(value, term))
|
|
151
157
|
return result
|
|
152
158
|
except Exception as e:
|
|
153
|
-
msg = f
|
|
154
|
-
raise
|
|
159
|
+
msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
|
|
160
|
+
raise EsgvocNotImplementedError(msg) from e
|
|
155
161
|
|
|
156
162
|
|
|
157
|
-
def _valid_value_for_composite_term(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
project_session: Session)\
|
|
161
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
163
|
+
def _valid_value_for_composite_term(
|
|
164
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
165
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
162
166
|
result = list()
|
|
163
167
|
separator, _ = _get_composite_term_separator_parts(term)
|
|
164
168
|
if separator:
|
|
165
|
-
result = _valid_value_composite_term_with_separator(value, term, universe_session,
|
|
166
|
-
project_session)
|
|
169
|
+
result = _valid_value_composite_term_with_separator(value, term, universe_session, project_session)
|
|
167
170
|
else:
|
|
168
|
-
result = _valid_value_composite_term_separator_less(value, term, universe_session,
|
|
169
|
-
project_session)
|
|
171
|
+
result = _valid_value_composite_term_separator_less(value, term, universe_session, project_session)
|
|
170
172
|
return result
|
|
171
173
|
|
|
172
174
|
|
|
173
|
-
def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
|
|
175
|
+
def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
|
|
174
176
|
if isinstance(term, UTerm):
|
|
175
|
-
return UniverseTermError(
|
|
176
|
-
|
|
177
|
+
return UniverseTermError(
|
|
178
|
+
value=value, term=term.specs, term_kind=term.kind, data_descriptor_id=term.data_descriptor.id
|
|
179
|
+
)
|
|
177
180
|
else:
|
|
178
|
-
return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
179
|
-
collection_id=term.collection.id)
|
|
181
|
+
return ProjectTermError(value=value, term=term.specs, term_kind=term.kind, collection_id=term.collection.id)
|
|
180
182
|
|
|
181
183
|
|
|
182
|
-
def _valid_value(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
project_session: Session) -> list[UniverseTermError|ProjectTermError]:
|
|
184
|
+
def _valid_value(
|
|
185
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
186
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
186
187
|
result = list()
|
|
187
188
|
match term.kind:
|
|
188
189
|
case TermKind.PLAIN:
|
|
@@ -190,89 +191,65 @@ def _valid_value(value: str,
|
|
|
190
191
|
if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
|
|
191
192
|
result.append(_create_term_error(value, term))
|
|
192
193
|
else:
|
|
193
|
-
raise
|
|
194
|
-
"Can't validate it.")
|
|
194
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
|
|
195
195
|
case TermKind.PATTERN:
|
|
196
|
-
#
|
|
196
|
+
# TODO: Pattern can be compiled and stored for further matching.
|
|
197
197
|
pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
|
|
198
198
|
if pattern_match is None:
|
|
199
199
|
result.append(_create_term_error(value, term))
|
|
200
200
|
case TermKind.COMPOSITE:
|
|
201
|
-
result.extend(_valid_value_for_composite_term(value, term,
|
|
202
|
-
universe_session,
|
|
203
|
-
project_session))
|
|
201
|
+
result.extend(_valid_value_for_composite_term(value, term, universe_session, project_session))
|
|
204
202
|
case _:
|
|
205
|
-
raise
|
|
203
|
+
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
206
204
|
return result
|
|
207
205
|
|
|
208
206
|
|
|
209
207
|
def _check_value(value: str) -> str:
|
|
210
208
|
if not value or value.isspace():
|
|
211
|
-
raise
|
|
209
|
+
raise EsgvocValueError("value should be set")
|
|
212
210
|
else:
|
|
213
211
|
return value
|
|
214
212
|
|
|
215
213
|
|
|
216
|
-
def _search_plain_term_and_valid_value(value: str,
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
-> str|None:
|
|
220
|
-
where_expression = and_(Collection.id == collection_id,
|
|
221
|
-
PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
222
|
-
statement = select(PTerm).join(Collection).where(where_expression)
|
|
214
|
+
def _search_plain_term_and_valid_value(value: str, collection_id: str, project_session: Session) -> str | None:
|
|
215
|
+
where_expression = and_(PCollection.id == collection_id, PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
216
|
+
statement = select(PTerm).join(PCollection).where(where_expression)
|
|
223
217
|
term = project_session.exec(statement).one_or_none()
|
|
224
218
|
return term.id if term else None
|
|
225
219
|
|
|
226
220
|
|
|
227
|
-
def _valid_value_against_all_terms_of_collection(
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
project_session: Session) \
|
|
231
|
-
-> list[str]:
|
|
221
|
+
def _valid_value_against_all_terms_of_collection(
|
|
222
|
+
value: str, collection: PCollection, universe_session: Session, project_session: Session
|
|
223
|
+
) -> list[str]:
|
|
232
224
|
if collection.terms:
|
|
233
225
|
result = list()
|
|
234
226
|
for pterm in collection.terms:
|
|
235
|
-
_errors = _valid_value(value, pterm,
|
|
236
|
-
universe_session,
|
|
237
|
-
project_session)
|
|
227
|
+
_errors = _valid_value(value, pterm, universe_session, project_session)
|
|
238
228
|
if not _errors:
|
|
239
229
|
result.append(pterm.id)
|
|
240
230
|
return result
|
|
241
231
|
else:
|
|
242
|
-
raise
|
|
232
|
+
raise EsgvocDbError(f"collection '{collection.id}' has no term")
|
|
243
233
|
|
|
244
234
|
|
|
245
|
-
def _valid_value_against_given_term(
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
universe_session: Session,
|
|
250
|
-
project_session: Session)\
|
|
251
|
-
-> list[UniverseTermError|ProjectTermError]:
|
|
252
|
-
# [OPTIMIZATION]
|
|
235
|
+
def _valid_value_against_given_term(
|
|
236
|
+
value: str, project_id: str, collection_id: str, term_id: str, universe_session: Session, project_session: Session
|
|
237
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
238
|
+
# [OPTIMIZATION]
|
|
253
239
|
key = value + project_id + collection_id + term_id
|
|
254
240
|
if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
|
|
255
241
|
result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
|
|
256
242
|
else:
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
project_session,
|
|
260
|
-
None)
|
|
261
|
-
if terms:
|
|
262
|
-
term = terms[0]
|
|
243
|
+
term = _get_term_in_collection(collection_id, term_id, project_session)
|
|
244
|
+
if term:
|
|
263
245
|
result = _valid_value(value, term, universe_session, project_session)
|
|
264
246
|
else:
|
|
265
|
-
raise
|
|
266
|
-
f'in collection {collection_id}')
|
|
247
|
+
raise EsgvocNotFoundError(f"unable to find term '{term_id}' " + f"in collection '{collection_id}'")
|
|
267
248
|
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
|
|
268
249
|
return result
|
|
269
250
|
|
|
270
251
|
|
|
271
|
-
def valid_term(value: str,
|
|
272
|
-
project_id: str,
|
|
273
|
-
collection_id: str,
|
|
274
|
-
term_id: str) \
|
|
275
|
-
-> ValidationReport:
|
|
252
|
+
def valid_term(value: str, project_id: str, collection_id: str, term_id: str) -> ValidationReport:
|
|
276
253
|
"""
|
|
277
254
|
Check if the given value may or may not represent the given term. The functions returns
|
|
278
255
|
a report that contains the possible errors.
|
|
@@ -288,7 +265,7 @@ def valid_term(value: str,
|
|
|
288
265
|
composite so as to compare it as a regex to the value.
|
|
289
266
|
|
|
290
267
|
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
291
|
-
the function raises a
|
|
268
|
+
the function raises a EsgvocNotFoundError.
|
|
292
269
|
|
|
293
270
|
:param value: A value to be validated
|
|
294
271
|
:type value: str
|
|
@@ -300,61 +277,51 @@ def valid_term(value: str,
|
|
|
300
277
|
:type term_id: str
|
|
301
278
|
:returns: A validation report that contains the possible errors
|
|
302
279
|
:rtype: ValidationReport
|
|
303
|
-
:raises
|
|
280
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
304
281
|
"""
|
|
305
282
|
value = _check_value(value)
|
|
306
|
-
with get_universe_session() as universe_session,
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
283
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
284
|
+
errors = _valid_value_against_given_term(
|
|
285
|
+
value, project_id, collection_id, term_id, universe_session, project_session
|
|
286
|
+
)
|
|
310
287
|
return ValidationReport(expression=value, errors=errors)
|
|
311
288
|
|
|
312
289
|
|
|
313
|
-
def _valid_term_in_collection(
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
project_session: Session) \
|
|
318
|
-
-> list[MatchingTerm]:
|
|
319
|
-
# [OPTIMIZATION]
|
|
290
|
+
def _valid_term_in_collection(
|
|
291
|
+
value: str, project_id: str, collection_id: str, universe_session: Session, project_session: Session
|
|
292
|
+
) -> list[MatchingTerm]:
|
|
293
|
+
# [OPTIMIZATION]
|
|
320
294
|
key = value + project_id + collection_id
|
|
321
295
|
if key in _VALID_TERM_IN_COLLECTION_CACHE:
|
|
322
296
|
result = _VALID_TERM_IN_COLLECTION_CACHE[key]
|
|
323
297
|
else:
|
|
324
298
|
value = _check_value(value)
|
|
325
299
|
result = list()
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
None)
|
|
329
|
-
if collections:
|
|
330
|
-
collection = collections[0]
|
|
300
|
+
collection = _get_collection_in_project(collection_id, project_session)
|
|
301
|
+
if collection:
|
|
331
302
|
match collection.term_kind:
|
|
332
303
|
case TermKind.PLAIN:
|
|
333
|
-
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
334
|
-
project_session)
|
|
304
|
+
term_id_found = _search_plain_term_and_valid_value(value, collection_id, project_session)
|
|
335
305
|
if term_id_found:
|
|
336
|
-
result.append(
|
|
337
|
-
|
|
338
|
-
|
|
306
|
+
result.append(
|
|
307
|
+
MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
|
|
308
|
+
)
|
|
339
309
|
case _:
|
|
340
|
-
term_ids_found = _valid_value_against_all_terms_of_collection(
|
|
341
|
-
|
|
342
|
-
|
|
310
|
+
term_ids_found = _valid_value_against_all_terms_of_collection(
|
|
311
|
+
value, collection, universe_session, project_session
|
|
312
|
+
)
|
|
343
313
|
for term_id_found in term_ids_found:
|
|
344
|
-
result.append(
|
|
345
|
-
|
|
346
|
-
|
|
314
|
+
result.append(
|
|
315
|
+
MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
|
|
316
|
+
)
|
|
347
317
|
else:
|
|
348
|
-
msg = f
|
|
349
|
-
raise
|
|
318
|
+
msg = f"unable to find collection '{collection_id}'"
|
|
319
|
+
raise EsgvocNotFoundError(msg)
|
|
350
320
|
_VALID_TERM_IN_COLLECTION_CACHE[key] = result
|
|
351
321
|
return result
|
|
352
322
|
|
|
353
323
|
|
|
354
|
-
def valid_term_in_collection(value: str,
|
|
355
|
-
project_id: str,
|
|
356
|
-
collection_id: str) \
|
|
357
|
-
-> list[MatchingTerm]:
|
|
324
|
+
def valid_term_in_collection(value: str, project_id: str, collection_id: str) -> list[MatchingTerm]:
|
|
358
325
|
"""
|
|
359
326
|
Check if the given value may or may not represent a term in the given collection. The function
|
|
360
327
|
returns the terms that the value matches.
|
|
@@ -370,7 +337,7 @@ def valid_term_in_collection(value: str,
|
|
|
370
337
|
composite so as to compare it as a regex to the value.
|
|
371
338
|
|
|
372
339
|
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
373
|
-
the function raises a
|
|
340
|
+
the function raises a EsgvocNotFoundError.
|
|
374
341
|
|
|
375
342
|
:param value: A value to be validated
|
|
376
343
|
:type value: str
|
|
@@ -380,23 +347,19 @@ def valid_term_in_collection(value: str,
|
|
|
380
347
|
:type collection_id: str
|
|
381
348
|
:returns: The list of terms that the value matches.
|
|
382
349
|
:rtype: list[MatchingTerm]
|
|
383
|
-
:raises
|
|
350
|
+
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
384
351
|
"""
|
|
385
|
-
with get_universe_session() as universe_session,
|
|
386
|
-
|
|
387
|
-
return _valid_term_in_collection(value, project_id, collection_id,
|
|
388
|
-
universe_session, project_session)
|
|
352
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
353
|
+
return _valid_term_in_collection(value, project_id, collection_id, universe_session, project_session)
|
|
389
354
|
|
|
390
355
|
|
|
391
|
-
def _valid_term_in_project(
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
project_session: Session) -> list[MatchingTerm]:
|
|
356
|
+
def _valid_term_in_project(
|
|
357
|
+
value: str, project_id: str, universe_session: Session, project_session: Session
|
|
358
|
+
) -> list[MatchingTerm]:
|
|
395
359
|
result = list()
|
|
396
360
|
collections = _get_all_collections_in_project(project_session)
|
|
397
361
|
for collection in collections:
|
|
398
|
-
result.extend(_valid_term_in_collection(value, project_id, collection.id,
|
|
399
|
-
universe_session, project_session))
|
|
362
|
+
result.extend(_valid_term_in_collection(value, project_id, collection.id, universe_session, project_session))
|
|
400
363
|
return result
|
|
401
364
|
|
|
402
365
|
|
|
@@ -415,7 +378,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
415
378
|
- if the composite hasn't got a separator, the function aggregates the parts of the \
|
|
416
379
|
composite so as to compare it as a regex to the value.
|
|
417
380
|
|
|
418
|
-
If the `project_id` is not found, the function raises a
|
|
381
|
+
If the `project_id` is not found, the function raises a EsgvocNotFoundError.
|
|
419
382
|
|
|
420
383
|
:param value: A value to be validated
|
|
421
384
|
:type value: str
|
|
@@ -423,10 +386,9 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
423
386
|
:type project_id: str
|
|
424
387
|
:returns: The list of terms that the value matches.
|
|
425
388
|
:rtype: list[MatchingTerm]
|
|
426
|
-
:raises
|
|
389
|
+
:raises EsgvocNotFoundError: If the `project_id` is not found
|
|
427
390
|
"""
|
|
428
|
-
with get_universe_session() as universe_session,
|
|
429
|
-
_get_project_session_with_exception(project_id) as project_session:
|
|
391
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
430
392
|
return _valid_term_in_project(value, project_id, universe_session, project_session)
|
|
431
393
|
|
|
432
394
|
|
|
@@ -454,447 +416,713 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
454
416
|
with get_universe_session() as universe_session:
|
|
455
417
|
for project_id in get_all_projects():
|
|
456
418
|
with _get_project_session_with_exception(project_id) as project_session:
|
|
457
|
-
result.extend(_valid_term_in_project(value, project_id,
|
|
458
|
-
universe_session, project_session))
|
|
459
|
-
return result
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
def _find_terms_in_collection(collection_id: str,
|
|
463
|
-
term_id: str,
|
|
464
|
-
session: Session,
|
|
465
|
-
settings: SearchSettings|None = None) -> Sequence[PTerm]:
|
|
466
|
-
# Settings only apply on the term_id comparison.
|
|
467
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
468
|
-
value=term_id,
|
|
469
|
-
settings=settings)
|
|
470
|
-
statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
|
|
471
|
-
where_expression)
|
|
472
|
-
results = session.exec(statement)
|
|
473
|
-
result = results.all()
|
|
419
|
+
result.extend(_valid_term_in_project(value, project_id, universe_session, project_session))
|
|
474
420
|
return result
|
|
475
421
|
|
|
476
422
|
|
|
477
|
-
def
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
settings: SearchSettings|None = None) \
|
|
481
|
-
-> list[DataDescriptor]:
|
|
423
|
+
def get_all_terms_in_collection(
|
|
424
|
+
project_id: str, collection_id: str, selected_term_fields: Iterable[str] | None = None
|
|
425
|
+
) -> list[DataDescriptor]:
|
|
482
426
|
"""
|
|
483
|
-
|
|
427
|
+
Gets all terms of the given collection of a project.
|
|
484
428
|
This function performs an exact match on the `project_id` and `collection_id`,
|
|
485
|
-
and does
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
489
|
-
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
490
|
-
the function returns an empty list.
|
|
491
|
-
|
|
492
|
-
Behavior based on search type:
|
|
493
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance in the list.
|
|
494
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
495
|
-
term instances in the list.
|
|
429
|
+
and does not search for similar or related projects and collections.
|
|
430
|
+
If any of the provided ids (`project_id` or `collection_id`) is not found, the function
|
|
431
|
+
returns an empty list.
|
|
496
432
|
|
|
497
433
|
:param project_id: A project id
|
|
498
434
|
:type project_id: str
|
|
499
|
-
:param collection_id: A collection
|
|
435
|
+
:param collection_id: A collection id
|
|
500
436
|
:type collection_id: str
|
|
501
|
-
:param
|
|
502
|
-
|
|
503
|
-
:
|
|
504
|
-
:
|
|
505
|
-
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
437
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
438
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
439
|
+
:type selected_term_fields: Iterable[str] | None
|
|
440
|
+
:returns: a list of term instances. Returns an empty list if no matches are found.
|
|
506
441
|
:rtype: list[DataDescriptor]
|
|
507
442
|
"""
|
|
508
|
-
result
|
|
509
|
-
if connection:=_get_project_connection(project_id):
|
|
443
|
+
result = list()
|
|
444
|
+
if connection := _get_project_connection(project_id):
|
|
510
445
|
with connection.create_session() as session:
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
446
|
+
collection = _get_collection_in_project(collection_id, session)
|
|
447
|
+
if collection:
|
|
448
|
+
result = _get_all_terms_in_collection(collection, selected_term_fields)
|
|
514
449
|
return result
|
|
515
450
|
|
|
516
451
|
|
|
517
|
-
def
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
-> Sequence[PTerm]:
|
|
522
|
-
# Settings only apply on the term_id comparison.
|
|
523
|
-
where_expression = _create_str_comparison_expression(field=PTerm.id,
|
|
524
|
-
value=term_id,
|
|
525
|
-
settings=settings)
|
|
526
|
-
statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
|
|
527
|
-
where_expression)
|
|
528
|
-
results = session.exec(statement)
|
|
529
|
-
result = results.all()
|
|
530
|
-
return result
|
|
452
|
+
def _get_all_collections_in_project(session: Session) -> list[PCollection]:
|
|
453
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
454
|
+
# Project can't be missing if session exists.
|
|
455
|
+
return project.collections # type: ignore
|
|
531
456
|
|
|
532
457
|
|
|
533
|
-
def
|
|
534
|
-
data_descriptor_id: str,
|
|
535
|
-
term_id: str,
|
|
536
|
-
settings: SearchSettings|None = None) \
|
|
537
|
-
-> list[tuple[DataDescriptor, str]]:
|
|
458
|
+
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
538
459
|
"""
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
544
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
545
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
546
|
-
If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
|
|
547
|
-
the function returns an empty list.
|
|
548
|
-
|
|
549
|
-
Behavior based on search type:
|
|
550
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
551
|
-
collection id in the list.
|
|
552
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
553
|
-
term instances and collection ids in the list.
|
|
460
|
+
Gets all collections of the given project.
|
|
461
|
+
This function performs an exact match on the `project_id` and
|
|
462
|
+
does not search for similar or related projects.
|
|
463
|
+
If the provided `project_id` is not found, the function returns an empty list.
|
|
554
464
|
|
|
555
465
|
:param project_id: A project id
|
|
556
466
|
:type project_id: str
|
|
557
|
-
:
|
|
558
|
-
:
|
|
559
|
-
:param term_id: A term id to be found
|
|
560
|
-
:type term_id: str
|
|
561
|
-
:param settings: The search settings
|
|
562
|
-
:type settings: SearchSettings|None
|
|
563
|
-
:returns: A list of tuple of term instances and related collection ids. \
|
|
564
|
-
Returns an empty list if no matches are found.
|
|
565
|
-
:rtype: list[tuple[DataDescriptor, str]]
|
|
467
|
+
:returns: A list of collection ids. Returns an empty list if no matches are found.
|
|
468
|
+
:rtype: list[str]
|
|
566
469
|
"""
|
|
567
470
|
result = list()
|
|
568
|
-
if connection:=_get_project_connection(project_id):
|
|
471
|
+
if connection := _get_project_connection(project_id):
|
|
569
472
|
with connection.create_session() as session:
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
settings)
|
|
574
|
-
for pterm in terms:
|
|
575
|
-
collection_id = pterm.collection.id
|
|
576
|
-
term = instantiate_pydantic_term(pterm,
|
|
577
|
-
settings.selected_term_fields if settings else None)
|
|
578
|
-
result.append((term, collection_id))
|
|
473
|
+
collections = _get_all_collections_in_project(session)
|
|
474
|
+
for collection in collections:
|
|
475
|
+
result.append(collection.id)
|
|
579
476
|
return result
|
|
580
477
|
|
|
581
478
|
|
|
582
|
-
def
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
in the universe, based on the specified search settings, in the given collection of a project.
|
|
589
|
-
This function performs an exact match on the `data_descriptor_id`,
|
|
590
|
-
and does **not** search for similar or related data descriptors.
|
|
591
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
592
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
593
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
594
|
-
If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
|
|
595
|
-
the function returns an empty list.
|
|
479
|
+
def _get_all_terms_in_collection(
|
|
480
|
+
collection: PCollection, selected_term_fields: Iterable[str] | None
|
|
481
|
+
) -> list[DataDescriptor]:
|
|
482
|
+
result: list[DataDescriptor] = list()
|
|
483
|
+
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
484
|
+
return result
|
|
596
485
|
|
|
597
|
-
Behavior based on search type:
|
|
598
|
-
- `EXACT` and absence of `settings`: returns zero or one term instance and \
|
|
599
|
-
collection id in the list.
|
|
600
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
601
|
-
term instances and collection ids in the list.
|
|
602
486
|
|
|
603
|
-
|
|
604
|
-
:
|
|
605
|
-
|
|
606
|
-
:type term_id: str
|
|
607
|
-
:param settings: The search settings
|
|
608
|
-
:type settings: SearchSettings|None
|
|
609
|
-
:returns: A list of tuple of matching terms with their collection id, per project. \
|
|
610
|
-
Returns an empty list if no matches are found.
|
|
611
|
-
:rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
|
|
487
|
+
def get_all_terms_in_project(
|
|
488
|
+
project_id: str, selected_term_fields: Iterable[str] | None = None
|
|
489
|
+
) -> list[DataDescriptor]:
|
|
612
490
|
"""
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
for
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
term_id,
|
|
619
|
-
settings)
|
|
620
|
-
if matching_terms:
|
|
621
|
-
result.append((matching_terms, project_id))
|
|
622
|
-
return result
|
|
623
|
-
|
|
491
|
+
Gets all terms of the given project.
|
|
492
|
+
This function performs an exact match on the `project_id` and
|
|
493
|
+
does not search for similar or related projects.
|
|
494
|
+
Terms are unique within a collection but may have some synonyms in a project.
|
|
495
|
+
If the provided `project_id` is not found, the function returns an empty list.
|
|
624
496
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
497
|
+
:param project_id: A project id
|
|
498
|
+
:type project_id: str
|
|
499
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
500
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
501
|
+
:type selected_term_fields: Iterable[str] | None
|
|
502
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
503
|
+
:rtype: list[DataDescriptor]
|
|
504
|
+
"""
|
|
505
|
+
result = list()
|
|
506
|
+
if connection := _get_project_connection(project_id):
|
|
507
|
+
with connection.create_session() as session:
|
|
508
|
+
collections = _get_all_collections_in_project(session)
|
|
509
|
+
for collection in collections:
|
|
510
|
+
# Term may have some synonyms in a project.
|
|
511
|
+
result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
|
|
512
|
+
return result
|
|
634
513
|
|
|
635
514
|
|
|
636
|
-
def
|
|
637
|
-
|
|
638
|
-
|
|
515
|
+
def get_all_terms_in_all_projects(
|
|
516
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
517
|
+
) -> list[tuple[str, list[DataDescriptor]]]:
|
|
639
518
|
"""
|
|
640
|
-
|
|
641
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
642
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
643
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
644
|
-
Terms are unique within a collection but may have some synonyms within a project.
|
|
645
|
-
If the provided `term_id` is not found, the function returns an empty list.
|
|
519
|
+
Gets all terms of all projects.
|
|
646
520
|
|
|
647
|
-
:param
|
|
648
|
-
|
|
649
|
-
:
|
|
650
|
-
:
|
|
651
|
-
:
|
|
652
|
-
:rtype: list[DataDescriptor]
|
|
521
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
522
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
523
|
+
:type selected_term_fields: Iterable[str] | None
|
|
524
|
+
:returns: A list of tuple project_id and term instances of that project.
|
|
525
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
653
526
|
"""
|
|
654
527
|
project_ids = get_all_projects()
|
|
655
528
|
result = list()
|
|
656
529
|
for project_id in project_ids:
|
|
657
|
-
|
|
530
|
+
terms = get_all_terms_in_project(project_id, selected_term_fields)
|
|
531
|
+
result.append((project_id, terms))
|
|
658
532
|
return result
|
|
659
533
|
|
|
660
534
|
|
|
661
|
-
def
|
|
662
|
-
term_id: str,
|
|
663
|
-
settings: SearchSettings|None = None) \
|
|
664
|
-
-> list[DataDescriptor]:
|
|
535
|
+
def get_all_projects() -> list[str]:
|
|
665
536
|
"""
|
|
666
|
-
|
|
667
|
-
This function performs an exact match on the `project_id` and
|
|
668
|
-
does **not** search for similar or related projects.
|
|
669
|
-
The given `term_id` is searched according to the search type specified in the parameter `settings`,
|
|
670
|
-
which allows a flexible matching (e.g., `LIKE` may return multiple results).
|
|
671
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
|
|
672
|
-
Terms are unique within a collection but may have some synonyms within a project.
|
|
673
|
-
If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
|
|
674
|
-
an empty list.
|
|
537
|
+
Gets all projects.
|
|
675
538
|
|
|
676
|
-
:
|
|
539
|
+
:returns: A list of project ids.
|
|
540
|
+
:rtype: list[str]
|
|
541
|
+
"""
|
|
542
|
+
return list(service.current_state.projects.keys())
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
|
|
546
|
+
statement = select(PTerm).where(PTerm.id == term_id)
|
|
547
|
+
results = session.exec(statement)
|
|
548
|
+
result = results.first() # Term ids are not supposed to be unique within a project.
|
|
549
|
+
return result
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def get_term_in_project(
|
|
553
|
+
project_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
554
|
+
) -> DataDescriptor | None:
|
|
555
|
+
"""
|
|
556
|
+
Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
|
|
557
|
+
the given term id.
|
|
558
|
+
Terms are unique within a collection but may have some synonyms in a project.
|
|
559
|
+
This function performs an exact match on the `project_id` and `term_id`, and does not search
|
|
560
|
+
for similar or related projects and terms.
|
|
561
|
+
If any of the provided ids (`project_id` or `term_id`) is not found,
|
|
562
|
+
the function returns `None`.
|
|
563
|
+
|
|
564
|
+
:param project_id: The id of the given project.
|
|
677
565
|
:type project_id: str
|
|
678
|
-
:param term_id:
|
|
566
|
+
:param term_id: The id of a term to be found.
|
|
679
567
|
:type term_id: str
|
|
680
|
-
:param
|
|
681
|
-
|
|
682
|
-
:
|
|
683
|
-
:
|
|
568
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
569
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
570
|
+
:type selected_term_fields: Iterable[str] | None
|
|
571
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
572
|
+
:rtype: DataDescriptor | None
|
|
684
573
|
"""
|
|
685
|
-
result:
|
|
686
|
-
if connection:=_get_project_connection(project_id):
|
|
574
|
+
result: DataDescriptor | None = None
|
|
575
|
+
if connection := _get_project_connection(project_id):
|
|
687
576
|
with connection.create_session() as session:
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
577
|
+
term_found = _get_term_in_project(term_id, session)
|
|
578
|
+
if term_found:
|
|
579
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
691
580
|
return result
|
|
692
581
|
|
|
693
582
|
|
|
694
|
-
def
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
583
|
+
def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
|
|
584
|
+
statement = select(PTerm).join(PCollection).where(PCollection.id == collection_id, PTerm.id == term_id)
|
|
585
|
+
results = session.exec(statement)
|
|
586
|
+
result = results.one_or_none()
|
|
587
|
+
return result
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def get_term_in_collection(
|
|
591
|
+
project_id: str, collection_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
592
|
+
) -> DataDescriptor | None:
|
|
698
593
|
"""
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
594
|
+
Returns the term, in the given project and collection,
|
|
595
|
+
whose id corresponds exactly to the given term id.
|
|
596
|
+
This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
|
|
597
|
+
and does not search for similar or related projects, collections and terms.
|
|
598
|
+
If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
|
|
599
|
+
the function returns `None`.
|
|
704
600
|
|
|
705
|
-
:param project_id:
|
|
601
|
+
:param project_id: The id of the given project.
|
|
706
602
|
:type project_id: str
|
|
707
|
-
:param collection_id:
|
|
603
|
+
:param collection_id: The id of the given collection.
|
|
708
604
|
:type collection_id: str
|
|
605
|
+
:param term_id: The id of a term to be found.
|
|
606
|
+
:type term_id: str
|
|
709
607
|
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
710
|
-
fields of the terms are returned.
|
|
711
|
-
:type selected_term_fields: Iterable[str]|None
|
|
712
|
-
:returns:
|
|
713
|
-
:rtype:
|
|
608
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
609
|
+
:type selected_term_fields: Iterable[str] | None
|
|
610
|
+
:returns: A term instance. Returns `None` if no match is found.
|
|
611
|
+
:rtype: DataDescriptor | None
|
|
714
612
|
"""
|
|
715
|
-
result =
|
|
716
|
-
if connection:=_get_project_connection(project_id):
|
|
613
|
+
result: DataDescriptor | None = None
|
|
614
|
+
if connection := _get_project_connection(project_id):
|
|
717
615
|
with connection.create_session() as session:
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
if collections:
|
|
722
|
-
collection = collections[0]
|
|
723
|
-
result = _get_all_terms_in_collection(collection, selected_term_fields)
|
|
616
|
+
term_found = _get_term_in_collection(collection_id, term_id, session)
|
|
617
|
+
if term_found:
|
|
618
|
+
result = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
724
619
|
return result
|
|
725
620
|
|
|
726
621
|
|
|
727
|
-
def
|
|
728
|
-
|
|
729
|
-
settings: SearchSettings|None) \
|
|
730
|
-
-> Sequence[Collection]:
|
|
731
|
-
where_exp = _create_str_comparison_expression(field=Collection.id,
|
|
732
|
-
value=collection_id,
|
|
733
|
-
settings=settings)
|
|
734
|
-
statement = select(Collection).where(where_exp)
|
|
622
|
+
def _get_collection_in_project(collection_id: str, session: Session) -> PCollection | None:
|
|
623
|
+
statement = select(PCollection).where(PCollection.id == collection_id)
|
|
735
624
|
results = session.exec(statement)
|
|
736
|
-
result = results.
|
|
625
|
+
result = results.one_or_none()
|
|
737
626
|
return result
|
|
738
627
|
|
|
739
628
|
|
|
740
|
-
def
|
|
741
|
-
collection_id: str,
|
|
742
|
-
settings: SearchSettings|None = None) \
|
|
743
|
-
-> list[dict]:
|
|
629
|
+
def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
|
|
744
630
|
"""
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
the
|
|
750
|
-
|
|
751
|
-
If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
|
|
752
|
-
If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
|
|
753
|
-
an empty list.
|
|
754
|
-
|
|
755
|
-
Behavior based on search type:
|
|
756
|
-
- `EXACT` and absence of `settings`: returns zero or one collection context in the list.
|
|
757
|
-
- `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
|
|
758
|
-
collection contexts in the list.
|
|
631
|
+
Returns the collection, in the given project, whose id corresponds exactly to
|
|
632
|
+
the given collection id.
|
|
633
|
+
This function performs an exact match on the `project_id` and `collection_id`, and does not search
|
|
634
|
+
for similar or related projects and collections.
|
|
635
|
+
If any of the provided ids (`project_id` or `collection_id`) is not found,
|
|
636
|
+
the function returns `None`.
|
|
759
637
|
|
|
760
|
-
:param project_id:
|
|
638
|
+
:param project_id: The id of the given project.
|
|
761
639
|
:type project_id: str
|
|
762
|
-
:param collection_id:
|
|
640
|
+
:param collection_id: The id of a collection to be found.
|
|
763
641
|
:type collection_id: str
|
|
764
|
-
:
|
|
765
|
-
:
|
|
766
|
-
:returns: A list of collection contexts. Returns an empty list if no matches are found.
|
|
767
|
-
:rtype: list[dict]
|
|
642
|
+
:returns: A collection id and context. Returns `None` if no match is found.
|
|
643
|
+
:rtype: tuple[str, dict] | None
|
|
768
644
|
"""
|
|
769
|
-
result =
|
|
770
|
-
if connection:=_get_project_connection(project_id):
|
|
645
|
+
result: tuple[str, dict] | None = None
|
|
646
|
+
if connection := _get_project_connection(project_id):
|
|
771
647
|
with connection.create_session() as session:
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
for collection in collections:
|
|
776
|
-
result.append(collection.context)
|
|
648
|
+
collection_found = _get_collection_in_project(collection_id, session)
|
|
649
|
+
if collection_found:
|
|
650
|
+
result = collection_found.id, collection_found.context
|
|
777
651
|
return result
|
|
778
652
|
|
|
779
653
|
|
|
780
|
-
def
|
|
781
|
-
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
782
|
-
# Project can't be missing if session exists.
|
|
783
|
-
return project.collections # type: ignore
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
654
|
+
def get_project(project_id: str) -> ProjectSpecs | None:
|
|
787
655
|
"""
|
|
788
|
-
|
|
656
|
+
Get a project and returns its specifications.
|
|
789
657
|
This function performs an exact match on the `project_id` and
|
|
790
|
-
does
|
|
791
|
-
If the provided `project_id` is not found, the function returns
|
|
658
|
+
does not search for similar or related projects.
|
|
659
|
+
If the provided `project_id` is not found, the function returns `None`.
|
|
792
660
|
|
|
793
|
-
:param project_id: A project id
|
|
661
|
+
:param project_id: A project id to be found
|
|
794
662
|
:type project_id: str
|
|
795
|
-
:returns:
|
|
796
|
-
:rtype:
|
|
663
|
+
:returns: The specs of the project found. Returns `None` if no matches are found.
|
|
664
|
+
:rtype: ProjectSpecs | None
|
|
797
665
|
"""
|
|
798
|
-
result =
|
|
799
|
-
if connection:=_get_project_connection(project_id):
|
|
666
|
+
result: ProjectSpecs | None = None
|
|
667
|
+
if connection := _get_project_connection(project_id):
|
|
800
668
|
with connection.create_session() as session:
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
669
|
+
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
670
|
+
try:
|
|
671
|
+
# Project can't be missing if session exists.
|
|
672
|
+
result = ProjectSpecs(**project.specs) # type: ignore
|
|
673
|
+
except Exception as e:
|
|
674
|
+
msg = f"unable to read specs in project '{project_id}'"
|
|
675
|
+
raise EsgvocDbError(msg) from e
|
|
804
676
|
return result
|
|
805
677
|
|
|
806
678
|
|
|
807
|
-
def
|
|
808
|
-
|
|
809
|
-
result
|
|
810
|
-
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
679
|
+
def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str, session: Session) -> PCollection | None:
|
|
680
|
+
statement = select(PCollection).where(PCollection.data_descriptor_id == data_descriptor_id)
|
|
681
|
+
result = session.exec(statement).one_or_none()
|
|
811
682
|
return result
|
|
812
683
|
|
|
813
684
|
|
|
814
|
-
def
|
|
815
|
-
selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
|
|
685
|
+
def get_collection_from_data_descriptor_in_project(project_id: str, data_descriptor_id: str) -> tuple[str, dict] | None:
|
|
816
686
|
"""
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
If the provided `project_id` is not found,
|
|
687
|
+
Returns the collection, in the given project, that corresponds to the given data descriptor
|
|
688
|
+
in the universe.
|
|
689
|
+
This function performs an exact match on the `project_id` and `data_descriptor_id`,
|
|
690
|
+
and does not search for similar or related projects and data descriptors.
|
|
691
|
+
If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
|
|
692
|
+
there is no collection corresponding to the given data descriptor, the function returns `None`.
|
|
822
693
|
|
|
823
|
-
:param project_id:
|
|
694
|
+
:param project_id: The id of the given project.
|
|
824
695
|
:type project_id: str
|
|
825
|
-
:param
|
|
826
|
-
|
|
827
|
-
:
|
|
828
|
-
:
|
|
829
|
-
|
|
696
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
697
|
+
:type data_descriptor_id: str
|
|
698
|
+
:returns: A collection id and context. Returns `None` if no matches are found.
|
|
699
|
+
:rtype: tuple[str, dict] | None
|
|
700
|
+
"""
|
|
701
|
+
result: tuple[str, dict] | None = None
|
|
702
|
+
if connection := _get_project_connection(project_id):
|
|
703
|
+
with connection.create_session() as session:
|
|
704
|
+
collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id, session)
|
|
705
|
+
if collection_found:
|
|
706
|
+
result = collection_found.id, collection_found.context
|
|
707
|
+
return result
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) -> list[tuple[str, str, dict]]:
|
|
711
|
+
"""
|
|
712
|
+
Returns the collections, in all projects, that correspond to the given data descriptor
|
|
713
|
+
in the universe.
|
|
714
|
+
This function performs an exact match on `data_descriptor_id`,
|
|
715
|
+
and does not search for similar or related data descriptors.
|
|
716
|
+
If the provided `data_descriptor_id` is not found, or if
|
|
717
|
+
there is no collection corresponding to the given data descriptor, the function returns
|
|
718
|
+
an empty list.
|
|
719
|
+
|
|
720
|
+
:param data_descriptor_id: The id of the given data descriptor.
|
|
721
|
+
:type data_descriptor_id: str
|
|
722
|
+
:returns: A list of collection ids, their project_ids and contexts. \
|
|
723
|
+
Returns an empty list if no matches are found.
|
|
724
|
+
:rtype: list[tuple[str, str, dict]]
|
|
830
725
|
"""
|
|
831
726
|
result = list()
|
|
832
|
-
|
|
727
|
+
project_ids = get_all_projects()
|
|
728
|
+
for project_id in project_ids:
|
|
729
|
+
collection_found = get_collection_from_data_descriptor_in_project(project_id, data_descriptor_id)
|
|
730
|
+
if collection_found:
|
|
731
|
+
result.append((project_id, collection_found[0], collection_found[1]))
|
|
732
|
+
return result
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def _get_term_from_universe_term_id_in_project(
|
|
736
|
+
data_descriptor_id: str, universe_term_id: str, project_session: Session
|
|
737
|
+
) -> PTerm | None:
|
|
738
|
+
statement = (
|
|
739
|
+
select(PTerm)
|
|
740
|
+
.join(PCollection)
|
|
741
|
+
.where(PCollection.data_descriptor_id == data_descriptor_id, PTerm.id == universe_term_id)
|
|
742
|
+
)
|
|
743
|
+
results = project_session.exec(statement)
|
|
744
|
+
result = results.one_or_none()
|
|
745
|
+
return result
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def get_term_from_universe_term_id_in_project(
|
|
749
|
+
project_id: str, data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
750
|
+
) -> tuple[str, DataDescriptor] | None:
|
|
751
|
+
"""
|
|
752
|
+
Returns the term, in the given project, that corresponds to the given term in the universe.
|
|
753
|
+
This function performs an exact match on the `project_id`, `data_descriptor_id`
|
|
754
|
+
and `universe_term_id`, and does not search for similar or related projects, data descriptors
|
|
755
|
+
and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
|
|
756
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
757
|
+
the function returns `None`.
|
|
758
|
+
|
|
759
|
+
:param project_id: The id of the given project.
|
|
760
|
+
:type project_id: str
|
|
761
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
762
|
+
:type data_descriptor_id: str
|
|
763
|
+
:param universe_term_id: The id of the given universe term.
|
|
764
|
+
:type universe_term_id: str
|
|
765
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
766
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
767
|
+
:type selected_term_fields: Iterable[str] | None
|
|
768
|
+
:returns: A collection id and the project term instance. Returns `None` if no matches are found.
|
|
769
|
+
:rtype: tuple[str, DataDescriptor] | None
|
|
770
|
+
"""
|
|
771
|
+
result: tuple[str, DataDescriptor] | None = None
|
|
772
|
+
if connection := _get_project_connection(project_id):
|
|
833
773
|
with connection.create_session() as session:
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
result.
|
|
774
|
+
term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id, universe_term_id, session)
|
|
775
|
+
if term_found:
|
|
776
|
+
pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
777
|
+
result = (term_found.collection.id, pydantic_term)
|
|
838
778
|
return result
|
|
839
779
|
|
|
840
780
|
|
|
841
|
-
def
|
|
842
|
-
|
|
781
|
+
def get_term_from_universe_term_id_in_all_projects(
|
|
782
|
+
data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
783
|
+
) -> list[tuple[str, str, DataDescriptor]]:
|
|
843
784
|
"""
|
|
844
|
-
|
|
785
|
+
Returns the terms, in all projects, that correspond to the given term in the universe.
|
|
786
|
+
This function performs an exact match on the `data_descriptor_id`
|
|
787
|
+
and `universe_term_id`, and does not search for similar or related data descriptors
|
|
788
|
+
and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
|
|
789
|
+
is not found, or if there is no project term corresponding to the given universe term
|
|
790
|
+
the function returns an empty list.
|
|
845
791
|
|
|
792
|
+
:param data_descriptor_id: The id of the data descriptor that contains the given universe term.
|
|
793
|
+
:type data_descriptor_id: str
|
|
794
|
+
:param universe_term_id: The id of the given universe term.
|
|
795
|
+
:type universe_term_id: str
|
|
846
796
|
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
847
|
-
fields of the terms are returned.
|
|
848
|
-
:type selected_term_fields: Iterable[str]|None
|
|
849
|
-
:returns: A
|
|
850
|
-
|
|
797
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
798
|
+
:type selected_term_fields: Iterable[str] | None
|
|
799
|
+
:returns: A project_id, collection id and the project term instance. \
|
|
800
|
+
Returns an empty list if no matches are found.
|
|
801
|
+
:rtype: list[tuple[str, str, DataDescriptor]]
|
|
851
802
|
"""
|
|
803
|
+
result: list[tuple[str, str, DataDescriptor]] = list()
|
|
852
804
|
project_ids = get_all_projects()
|
|
853
|
-
result = list()
|
|
854
805
|
for project_id in project_ids:
|
|
855
|
-
|
|
856
|
-
|
|
806
|
+
term_found = get_term_from_universe_term_id_in_project(
|
|
807
|
+
project_id, data_descriptor_id, universe_term_id, selected_term_fields
|
|
808
|
+
)
|
|
809
|
+
if term_found:
|
|
810
|
+
result.append((project_id, term_found[0], term_found[1]))
|
|
857
811
|
return result
|
|
858
812
|
|
|
859
813
|
|
|
860
|
-
def
|
|
814
|
+
def _find_collections_in_project(
|
|
815
|
+
expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
816
|
+
) -> Sequence[PCollection]:
|
|
817
|
+
matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
|
|
818
|
+
tmp_statement = select(PCollectionFTS5).where(matching_condition)
|
|
819
|
+
statement = select(PCollection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
820
|
+
return execute_match_statement(expression, statement, session)
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def find_collections_in_project(
|
|
824
|
+
expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
825
|
+
) -> list[tuple[str, dict]]:
|
|
861
826
|
"""
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
827
|
+
Find collections in the given project based on a full text search defined by the given `expression`.
|
|
828
|
+
The `expression` can be composed of one or multiple keywords.
|
|
829
|
+
The keywords can combined with boolean operators: `AND`,
|
|
830
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
831
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
832
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
833
|
+
function does not provide any priority operator (parenthesis).
|
|
834
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
835
|
+
If the expression is composed of only one keyword, the function
|
|
836
|
+
automatically defines it as a prefix.
|
|
837
|
+
The function returns a list of collection ids and contexts, sorted according to the
|
|
838
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
839
|
+
This function performs an exact match on the `project_id`,
|
|
840
|
+
and does not search for similar or related projects.
|
|
841
|
+
If the provided `expression` does not hit any collection or the given `project_id` does not
|
|
842
|
+
match exactly to an id of a project, the function returns an empty list.
|
|
843
|
+
The function searches for the `expression` in the collection specifications.
|
|
844
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
845
|
+
collections. **At the moment, `only_id` is set to `True` as the collections
|
|
846
|
+
haven't got any description.**
|
|
847
|
+
|
|
848
|
+
:param expression: The full text search expression.
|
|
849
|
+
:type expression: str
|
|
850
|
+
:param project_id: The id of the given project.
|
|
851
|
+
:type project_id: str
|
|
852
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
853
|
+
:type only_id: bool
|
|
854
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
855
|
+
`limit` is either `None`, zero or negative.
|
|
856
|
+
:type limit: int | None
|
|
857
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
858
|
+
either `None`, zero or negative.
|
|
859
|
+
:type offset: int | None
|
|
860
|
+
:returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
|
|
861
|
+
:rtype: list[tuple[str, dict]]
|
|
862
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
863
|
+
"""
|
|
864
|
+
result: list[tuple[str, dict]] = list()
|
|
865
|
+
if connection := _get_project_connection(project_id):
|
|
866
|
+
with connection.create_session() as session:
|
|
867
|
+
collections_found = _find_collections_in_project(expression, session, only_id, limit, offset)
|
|
868
|
+
for collection in collections_found:
|
|
869
|
+
result.append((collection.id, collection.context))
|
|
870
|
+
return result
|
|
866
871
|
|
|
867
|
-
|
|
872
|
+
|
|
873
|
+
def _find_terms_in_collection(
|
|
874
|
+
expression: str,
|
|
875
|
+
collection_id: str,
|
|
876
|
+
session: Session,
|
|
877
|
+
only_id: bool = False,
|
|
878
|
+
limit: int | None = None,
|
|
879
|
+
offset: int | None = None,
|
|
880
|
+
) -> Sequence[PTerm]:
|
|
881
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
882
|
+
where_condition = PCollection.id == collection_id, matching_condition
|
|
883
|
+
tmp_statement = select(PTermFTS5).join(PCollection).where(*where_condition)
|
|
884
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
885
|
+
return execute_match_statement(expression, statement, session)
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
def _find_terms_in_project(
|
|
889
|
+
expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
890
|
+
) -> Sequence[PTerm]:
|
|
891
|
+
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
892
|
+
tmp_statement = select(PTermFTS5).where(matching_condition)
|
|
893
|
+
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
894
|
+
return execute_match_statement(expression, statement, session)
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def find_terms_in_collection(
|
|
898
|
+
expression: str,
|
|
899
|
+
project_id: str,
|
|
900
|
+
collection_id: str,
|
|
901
|
+
only_id: bool = False,
|
|
902
|
+
limit: int | None = None,
|
|
903
|
+
offset: int | None = None,
|
|
904
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
905
|
+
) -> list[DataDescriptor]:
|
|
906
|
+
"""
|
|
907
|
+
Find terms in the given project and collection based on a full text search defined by the given
|
|
908
|
+
`expression`.
|
|
909
|
+
The `expression` can be composed of one or multiple keywords.
|
|
910
|
+
The keywords can combined with boolean operators: `AND`,
|
|
911
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
912
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
913
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
914
|
+
function does not provide any priority operator (parenthesis).
|
|
915
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
916
|
+
If the expression is composed of only one keyword, the function
|
|
917
|
+
automatically defines it as a prefix.
|
|
918
|
+
The function returns a list of term instances, sorted according to the
|
|
919
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
920
|
+
This function performs an exact match on the `project_id` and `collection_id`,
|
|
921
|
+
and does not search for similar or related projects and collections.
|
|
922
|
+
If the provided `expression` does not hit any term or if any of the provided ids
|
|
923
|
+
(`project_id` or `collection_id`) is not found, the function returns an empty list.
|
|
924
|
+
The function searches for the `expression` in the term specifications.
|
|
925
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
926
|
+
terms.
|
|
927
|
+
|
|
928
|
+
:param expression: The full text search expression.
|
|
929
|
+
:type expression: str
|
|
930
|
+
:param project_id: The id of the given project.
|
|
868
931
|
:type project_id: str
|
|
869
|
-
:
|
|
870
|
-
:
|
|
932
|
+
:param collection_id: The id of the given collection.
|
|
933
|
+
:type collection_id: str
|
|
934
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
935
|
+
:type only_id: bool
|
|
936
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
937
|
+
`limit` is either `None`, zero or negative.
|
|
938
|
+
:type limit: int | None
|
|
939
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
940
|
+
either `None`, zero or negative.
|
|
941
|
+
:type offset: int | None
|
|
942
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
943
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
944
|
+
:type selected_term_fields: Iterable[str] | None
|
|
945
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
946
|
+
:rtype: list[DataDescriptor]
|
|
947
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
871
948
|
"""
|
|
872
|
-
result:
|
|
873
|
-
if connection:=_get_project_connection(project_id):
|
|
949
|
+
result: list[DataDescriptor] = list()
|
|
950
|
+
if connection := _get_project_connection(project_id):
|
|
874
951
|
with connection.create_session() as session:
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
# Project can't be missing if session exists.
|
|
878
|
-
result = ProjectSpecs(**project.specs) # type: ignore
|
|
879
|
-
except Exception as e:
|
|
880
|
-
msg = f'Unable to read specs in project {project_id}'
|
|
881
|
-
raise RuntimeError(msg) from e
|
|
952
|
+
pterms_found = _find_terms_in_collection(expression, collection_id, session, only_id, limit, offset)
|
|
953
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
882
954
|
return result
|
|
883
955
|
|
|
884
956
|
|
|
885
|
-
def
|
|
957
|
+
def find_terms_in_project(
|
|
958
|
+
expression: str,
|
|
959
|
+
project_id: str,
|
|
960
|
+
only_id: bool = False,
|
|
961
|
+
limit: int | None = None,
|
|
962
|
+
offset: int | None = None,
|
|
963
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
964
|
+
) -> list[DataDescriptor]:
|
|
886
965
|
"""
|
|
887
|
-
|
|
966
|
+
Find terms in the given project based on a full text search defined by the given `expression`.
|
|
967
|
+
The `expression` can be composed of one or multiple keywords.
|
|
968
|
+
The keywords can combined with boolean operators: `AND`,
|
|
969
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
970
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
971
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
972
|
+
function does not provide any priority operator (parenthesis).
|
|
973
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
974
|
+
If the expression is composed of only one keyword, the function
|
|
975
|
+
automatically defines it as a prefix.
|
|
976
|
+
The function returns a list of term instances, sorted according to the
|
|
977
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
978
|
+
This function performs an exact match on the `project_id`,
|
|
979
|
+
and does not search for similar or related projects.
|
|
980
|
+
If the provided `expression` does not hit any term or if any of the provided `project_id` is
|
|
981
|
+
not found, the function returns an empty list.
|
|
982
|
+
The function searches for the `expression` in the term specifications.
|
|
983
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
984
|
+
terms.
|
|
985
|
+
|
|
986
|
+
:param expression: The full text search expression.
|
|
987
|
+
:type expression: str
|
|
988
|
+
:param project_id: The id of the given project.
|
|
989
|
+
:type project_id: str
|
|
990
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
991
|
+
:type only_id: bool
|
|
992
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
993
|
+
`limit` is either `None`, zero or negative.
|
|
994
|
+
:type limit: int | None
|
|
995
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
996
|
+
either `None`, zero or negative.
|
|
997
|
+
:type offset: int | None
|
|
998
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
999
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1000
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1001
|
+
:returns: A list of term instances. Returns an empty list if no matches are found.
|
|
1002
|
+
:rtype: list[DataDescriptor]
|
|
1003
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1004
|
+
"""
|
|
1005
|
+
result: list[DataDescriptor] = list()
|
|
1006
|
+
if connection := _get_project_connection(project_id):
|
|
1007
|
+
with connection.create_session() as session:
|
|
1008
|
+
pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
|
|
1009
|
+
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1010
|
+
return result
|
|
888
1011
|
|
|
889
|
-
|
|
890
|
-
|
|
1012
|
+
|
|
1013
|
+
def find_terms_in_all_projects(
|
|
1014
|
+
expression: str,
|
|
1015
|
+
only_id: bool = False,
|
|
1016
|
+
limit: int | None = None,
|
|
1017
|
+
offset: int | None = None,
|
|
1018
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
1019
|
+
) -> list[tuple[str, list[DataDescriptor]]]:
|
|
891
1020
|
"""
|
|
892
|
-
|
|
1021
|
+
Find terms in all projects based on a full text search defined by the given `expression`.
|
|
1022
|
+
The `expression` can be composed of one or multiple keywords.
|
|
1023
|
+
The keywords can combined with boolean operators: `AND`,
|
|
1024
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
1025
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
1026
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
1027
|
+
function does not provide any priority operator (parenthesis).
|
|
1028
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
1029
|
+
If the expression is composed of only one keyword, the function
|
|
1030
|
+
automatically defines it as a prefix.
|
|
1031
|
+
The function returns a list of project ids and term instances, sorted according to the
|
|
1032
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1033
|
+
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
1034
|
+
The function searches for the `expression` in the term specifications.
|
|
1035
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1036
|
+
terms.
|
|
1037
|
+
|
|
1038
|
+
:param expression: The full text search expression.
|
|
1039
|
+
:type expression: str
|
|
1040
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1041
|
+
:type only_id: bool
|
|
1042
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1043
|
+
`limit` is either `None`, zero or negative.
|
|
1044
|
+
:type limit: int | None
|
|
1045
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1046
|
+
either `None`, zero or negative.
|
|
1047
|
+
:type offset: int | None
|
|
1048
|
+
:param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
|
|
1049
|
+
fields of the terms are returned. If empty, selects the id and type fields.
|
|
1050
|
+
:type selected_term_fields: Iterable[str] | None
|
|
1051
|
+
:returns: A list of project ids and term instances. Returns an empty list if no matches are found.
|
|
1052
|
+
:rtype: list[tuple[str, list[DataDescriptor]]]
|
|
1053
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1054
|
+
"""
|
|
1055
|
+
result: list[tuple[str, list[DataDescriptor]]] = list()
|
|
1056
|
+
project_ids = get_all_projects()
|
|
1057
|
+
for project_id in project_ids:
|
|
1058
|
+
terms_found = find_terms_in_project(expression, project_id, only_id, limit, offset, selected_term_fields)
|
|
1059
|
+
if terms_found:
|
|
1060
|
+
result.append((project_id, terms_found))
|
|
1061
|
+
return result
|
|
893
1062
|
|
|
894
1063
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1064
|
+
def find_items_in_project(
|
|
1065
|
+
expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
1066
|
+
) -> list[Item]:
|
|
1067
|
+
"""
|
|
1068
|
+
Find items, at the moment terms and collections, in the given project based on a full-text
|
|
1069
|
+
search defined by the given `expression`.
|
|
1070
|
+
The `expression` can be composed of one or multiple keywords.
|
|
1071
|
+
The keywords can combined with boolean operators: `AND`,
|
|
1072
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
1073
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
1074
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
1075
|
+
function does not provide any priority operator (parenthesis).
|
|
1076
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
1077
|
+
If the expression is composed of only one keyword, the function
|
|
1078
|
+
automatically defines it as a prefix.
|
|
1079
|
+
The function returns a list of item instances sorted according to the
|
|
1080
|
+
bm25 ranking metric (list index `0` has the highest rank).
|
|
1081
|
+
This function performs an exact match on the `project_id`,
|
|
1082
|
+
and does not search for similar or related projects.
|
|
1083
|
+
If the provided `expression` does not hit any item, or the provided `project_id` is not found,
|
|
1084
|
+
the function returns an empty list.
|
|
1085
|
+
The function searches for the `expression` in the term and collection specifications.
|
|
1086
|
+
However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
|
|
1087
|
+
terms and collections. **At the moment, `only_id` is set to `True` for the collections because
|
|
1088
|
+
they haven't got any description.**
|
|
1089
|
+
|
|
1090
|
+
:param expression: The full text search expression.
|
|
1091
|
+
:type expression: str
|
|
1092
|
+
:param only_id: Performs the search only on ids, otherwise on all the specifications.
|
|
1093
|
+
:type only_id: bool
|
|
1094
|
+
:param limit: Limit the number of returned items found. Returns all items found the if \
|
|
1095
|
+
`limit` is either `None`, zero or negative.
|
|
1096
|
+
:type limit: int | None
|
|
1097
|
+
:param offset: Skips `offset` number of items found. Ignored if `offset` is \
|
|
1098
|
+
either `None`, zero or negative.
|
|
1099
|
+
:type offset: int | None
|
|
1100
|
+
:returns: A list of item instances. Returns an empty list if no matches are found.
|
|
1101
|
+
:rtype: list[Item]
|
|
1102
|
+
:raises EsgvocValueError: If the `expression` cannot be interpreted.
|
|
1103
|
+
"""
|
|
1104
|
+
# TODO: execute union query when it will be possible to compute parent of terms and collections.
|
|
1105
|
+
result = list()
|
|
1106
|
+
if connection := _get_project_connection(project_id):
|
|
1107
|
+
with connection.create_session() as session:
|
|
1108
|
+
processed_expression = process_expression(expression)
|
|
1109
|
+
if only_id:
|
|
1110
|
+
collection_column = col(PCollectionFTS5.id)
|
|
1111
|
+
term_column = col(PTermFTS5.id)
|
|
1112
|
+
else:
|
|
1113
|
+
collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
|
|
1114
|
+
term_column = col(PTermFTS5.specs) # type: ignore
|
|
1115
|
+
collection_where_condition = collection_column.match(processed_expression)
|
|
1116
|
+
collection_statement = select(PCollectionFTS5.id,
|
|
1117
|
+
text("'collection' AS TYPE"),
|
|
1118
|
+
text(f"'{project_id}' AS TYPE"),
|
|
1119
|
+
text('rank')).where(collection_where_condition)
|
|
1120
|
+
term_where_condition = term_column.match(processed_expression)
|
|
1121
|
+
term_statement = select(PTermFTS5.id,
|
|
1122
|
+
text("'term' AS TYPE"),
|
|
1123
|
+
PCollection.id,
|
|
1124
|
+
text('rank')).join(PCollection) \
|
|
1125
|
+
.where(term_where_condition)
|
|
1126
|
+
result = execute_find_item_statements(session, processed_expression, collection_statement,
|
|
1127
|
+
term_statement, limit, offset)
|
|
1128
|
+
return result
|