esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +117 -2
- esgvoc/api/projects.py +242 -279
- esgvoc/api/search.py +30 -3
- esgvoc/api/universe.py +42 -27
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +43 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/db/models/project.py +11 -11
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +40 -40
- esgvoc/core/db/universe_ingestion.py +36 -33
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.4.0.dist-info/RECORD +0 -80
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py
CHANGED
|
@@ -20,16 +20,11 @@ from esgvoc.api.search import (
|
|
|
20
20
|
handle_rank_limit_offset,
|
|
21
21
|
instantiate_pydantic_term,
|
|
22
22
|
instantiate_pydantic_terms,
|
|
23
|
+
process_expression,
|
|
23
24
|
)
|
|
24
25
|
from esgvoc.core.db.connection import DBConnection
|
|
25
26
|
from esgvoc.core.db.models.mixins import TermKind
|
|
26
|
-
from esgvoc.core.db.models.project import
|
|
27
|
-
Collection,
|
|
28
|
-
PCollectionFTS5,
|
|
29
|
-
Project,
|
|
30
|
-
PTerm,
|
|
31
|
-
PTermFTS5,
|
|
32
|
-
)
|
|
27
|
+
from esgvoc.core.db.models.project import PCollection, PCollectionFTS5, Project, PTerm, PTermFTS5
|
|
33
28
|
from esgvoc.core.db.models.universe import UTerm
|
|
34
29
|
from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
|
|
35
30
|
|
|
@@ -53,21 +48,17 @@ def _get_project_session_with_exception(project_id: str) -> Session:
|
|
|
53
48
|
raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
|
|
54
49
|
|
|
55
50
|
|
|
56
|
-
def _resolve_term(composite_term_part: dict,
|
|
57
|
-
universe_session: Session,
|
|
58
|
-
project_session: Session) -> UTerm | PTerm:
|
|
51
|
+
def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
|
|
59
52
|
# First find the term in the universe than in the current project
|
|
60
53
|
term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
|
|
61
54
|
term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
|
|
62
|
-
uterm = universe._get_term_in_data_descriptor(
|
|
63
|
-
|
|
64
|
-
|
|
55
|
+
uterm = universe._get_term_in_data_descriptor(
|
|
56
|
+
data_descriptor_id=term_type, term_id=term_id, session=universe_session
|
|
57
|
+
)
|
|
65
58
|
if uterm:
|
|
66
59
|
return uterm
|
|
67
60
|
else:
|
|
68
|
-
pterm = _get_term_in_collection(collection_id=term_type,
|
|
69
|
-
term_id=term_id,
|
|
70
|
-
session=project_session)
|
|
61
|
+
pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
|
|
71
62
|
if pterm:
|
|
72
63
|
return pterm
|
|
73
64
|
else:
|
|
@@ -83,11 +74,9 @@ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]
|
|
|
83
74
|
|
|
84
75
|
# TODO: support optionality of parts of composite.
|
|
85
76
|
# It is backtrack possible for more than one missing parts.
|
|
86
|
-
def _valid_value_composite_term_with_separator(
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
project_session: Session)\
|
|
90
|
-
-> list[UniverseTermError | ProjectTermError]:
|
|
77
|
+
def _valid_value_composite_term_with_separator(
|
|
78
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
79
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
91
80
|
result = list()
|
|
92
81
|
separator, parts = _get_composite_term_separator_parts(term)
|
|
93
82
|
if separator in value:
|
|
@@ -95,14 +84,25 @@ def _valid_value_composite_term_with_separator(value: str,
|
|
|
95
84
|
if len(splits) == len(parts):
|
|
96
85
|
for index in range(0, len(splits)):
|
|
97
86
|
given_value = splits[index]
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
87
|
+
if "id" not in parts[index].keys():
|
|
88
|
+
terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
|
|
89
|
+
parts[index]["id"] = [term.id for term in terms]
|
|
90
|
+
if type(parts[index]["id"]) is str:
|
|
91
|
+
parts[index]["id"] = [parts[index]["id"]]
|
|
92
|
+
|
|
93
|
+
errors_list = list()
|
|
94
|
+
for id in parts[index]["id"]:
|
|
95
|
+
part_parts = dict(parts[index])
|
|
96
|
+
part_parts["id"] = id
|
|
97
|
+
resolved_term = _resolve_term(part_parts, universe_session, project_session)
|
|
98
|
+
errors = _valid_value(given_value, resolved_term, universe_session, project_session)
|
|
99
|
+
if len(errors) == 0:
|
|
100
|
+
errors_list = errors
|
|
101
|
+
break
|
|
102
|
+
else:
|
|
103
|
+
errors_list.extend(errors)
|
|
104
|
+
else:
|
|
105
|
+
result.append(_create_term_error(value, term))
|
|
106
106
|
else:
|
|
107
107
|
result.append(_create_term_error(value, term))
|
|
108
108
|
else:
|
|
@@ -110,16 +110,13 @@ def _valid_value_composite_term_with_separator(value: str,
|
|
|
110
110
|
return result
|
|
111
111
|
|
|
112
112
|
|
|
113
|
-
def _transform_to_pattern(term: UTerm | PTerm,
|
|
114
|
-
universe_session: Session,
|
|
115
|
-
project_session: Session) -> str:
|
|
113
|
+
def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
|
|
116
114
|
match term.kind:
|
|
117
115
|
case TermKind.PLAIN:
|
|
118
116
|
if constants.DRS_SPECS_JSON_KEY in term.specs:
|
|
119
117
|
result = term.specs[constants.DRS_SPECS_JSON_KEY]
|
|
120
118
|
else:
|
|
121
|
-
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
122
|
-
"Can't validate it.")
|
|
119
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
|
|
123
120
|
case TermKind.PATTERN:
|
|
124
121
|
result = term.specs[constants.PATTERN_JSON_KEY]
|
|
125
122
|
case TermKind.COMPOSITE:
|
|
@@ -128,7 +125,7 @@ def _transform_to_pattern(term: UTerm | PTerm,
|
|
|
128
125
|
for part in parts:
|
|
129
126
|
resolved_term = _resolve_term(part, universe_session, project_session)
|
|
130
127
|
pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
|
|
131
|
-
result = f
|
|
128
|
+
result = f"{result}{pattern}{separator}"
|
|
132
129
|
result = result.rstrip(separator)
|
|
133
130
|
case _:
|
|
134
131
|
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
@@ -137,11 +134,9 @@ def _transform_to_pattern(term: UTerm | PTerm,
|
|
|
137
134
|
|
|
138
135
|
# TODO: support optionality of parts of composite.
|
|
139
136
|
# It is backtrack possible for more than one missing parts.
|
|
140
|
-
def _valid_value_composite_term_separator_less(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
project_session: Session)\
|
|
144
|
-
-> list[UniverseTermError | ProjectTermError]:
|
|
137
|
+
def _valid_value_composite_term_separator_less(
|
|
138
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
139
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
145
140
|
result = list()
|
|
146
141
|
try:
|
|
147
142
|
pattern = _transform_to_pattern(term, universe_session, project_session)
|
|
@@ -150,8 +145,8 @@ def _valid_value_composite_term_separator_less(value: str,
|
|
|
150
145
|
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
151
146
|
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
152
147
|
# The later, must be removed.
|
|
153
|
-
pattern = pattern.replace(
|
|
154
|
-
pattern = f
|
|
148
|
+
pattern = pattern.replace("^", "").replace("$", "")
|
|
149
|
+
pattern = f"^{pattern}$"
|
|
155
150
|
regex = re.compile(pattern)
|
|
156
151
|
except Exception as e:
|
|
157
152
|
msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
|
|
@@ -165,35 +160,30 @@ def _valid_value_composite_term_separator_less(value: str,
|
|
|
165
160
|
raise EsgvocNotImplementedError(msg) from e
|
|
166
161
|
|
|
167
162
|
|
|
168
|
-
def _valid_value_for_composite_term(
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
project_session: Session)\
|
|
172
|
-
-> list[UniverseTermError | ProjectTermError]:
|
|
163
|
+
def _valid_value_for_composite_term(
|
|
164
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
165
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
173
166
|
result = list()
|
|
174
167
|
separator, _ = _get_composite_term_separator_parts(term)
|
|
175
168
|
if separator:
|
|
176
|
-
result = _valid_value_composite_term_with_separator(value, term, universe_session,
|
|
177
|
-
project_session)
|
|
169
|
+
result = _valid_value_composite_term_with_separator(value, term, universe_session, project_session)
|
|
178
170
|
else:
|
|
179
|
-
result = _valid_value_composite_term_separator_less(value, term, universe_session,
|
|
180
|
-
project_session)
|
|
171
|
+
result = _valid_value_composite_term_separator_less(value, term, universe_session, project_session)
|
|
181
172
|
return result
|
|
182
173
|
|
|
183
174
|
|
|
184
175
|
def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
|
|
185
176
|
if isinstance(term, UTerm):
|
|
186
|
-
return UniverseTermError(
|
|
187
|
-
|
|
177
|
+
return UniverseTermError(
|
|
178
|
+
value=value, term=term.specs, term_kind=term.kind, data_descriptor_id=term.data_descriptor.id
|
|
179
|
+
)
|
|
188
180
|
else:
|
|
189
|
-
return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
|
|
190
|
-
collection_id=term.collection.id)
|
|
181
|
+
return ProjectTermError(value=value, term=term.specs, term_kind=term.kind, collection_id=term.collection.id)
|
|
191
182
|
|
|
192
183
|
|
|
193
|
-
def _valid_value(
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
project_session: Session) -> list[UniverseTermError | ProjectTermError]:
|
|
184
|
+
def _valid_value(
|
|
185
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
186
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
197
187
|
result = list()
|
|
198
188
|
match term.kind:
|
|
199
189
|
case TermKind.PLAIN:
|
|
@@ -201,17 +191,14 @@ def _valid_value(value: str,
|
|
|
201
191
|
if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
|
|
202
192
|
result.append(_create_term_error(value, term))
|
|
203
193
|
else:
|
|
204
|
-
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
|
|
205
|
-
"Can't validate it.")
|
|
194
|
+
raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
|
|
206
195
|
case TermKind.PATTERN:
|
|
207
196
|
# TODO: Pattern can be compiled and stored for further matching.
|
|
208
197
|
pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
|
|
209
198
|
if pattern_match is None:
|
|
210
199
|
result.append(_create_term_error(value, term))
|
|
211
200
|
case TermKind.COMPOSITE:
|
|
212
|
-
result.extend(_valid_value_for_composite_term(value, term,
|
|
213
|
-
universe_session,
|
|
214
|
-
project_session))
|
|
201
|
+
result.extend(_valid_value_for_composite_term(value, term, universe_session, project_session))
|
|
215
202
|
case _:
|
|
216
203
|
raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
|
|
217
204
|
return result
|
|
@@ -219,33 +206,25 @@ def _valid_value(value: str,
|
|
|
219
206
|
|
|
220
207
|
def _check_value(value: str) -> str:
|
|
221
208
|
if not value or value.isspace():
|
|
222
|
-
raise EsgvocValueError(
|
|
209
|
+
raise EsgvocValueError("value should be set")
|
|
223
210
|
else:
|
|
224
211
|
return value
|
|
225
212
|
|
|
226
213
|
|
|
227
|
-
def _search_plain_term_and_valid_value(value: str,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
-> str | None:
|
|
231
|
-
where_expression = and_(Collection.id == collection_id,
|
|
232
|
-
PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
233
|
-
statement = select(PTerm).join(Collection).where(where_expression)
|
|
214
|
+
def _search_plain_term_and_valid_value(value: str, collection_id: str, project_session: Session) -> str | None:
|
|
215
|
+
where_expression = and_(PCollection.id == collection_id, PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
|
|
216
|
+
statement = select(PTerm).join(PCollection).where(where_expression)
|
|
234
217
|
term = project_session.exec(statement).one_or_none()
|
|
235
218
|
return term.id if term else None
|
|
236
219
|
|
|
237
220
|
|
|
238
|
-
def _valid_value_against_all_terms_of_collection(
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
project_session: Session) \
|
|
242
|
-
-> list[str]:
|
|
221
|
+
def _valid_value_against_all_terms_of_collection(
|
|
222
|
+
value: str, collection: PCollection, universe_session: Session, project_session: Session
|
|
223
|
+
) -> list[str]:
|
|
243
224
|
if collection.terms:
|
|
244
225
|
result = list()
|
|
245
226
|
for pterm in collection.terms:
|
|
246
|
-
_errors = _valid_value(value, pterm,
|
|
247
|
-
universe_session,
|
|
248
|
-
project_session)
|
|
227
|
+
_errors = _valid_value(value, pterm, universe_session, project_session)
|
|
249
228
|
if not _errors:
|
|
250
229
|
result.append(pterm.id)
|
|
251
230
|
return result
|
|
@@ -253,35 +232,24 @@ def _valid_value_against_all_terms_of_collection(value: str,
|
|
|
253
232
|
raise EsgvocDbError(f"collection '{collection.id}' has no term")
|
|
254
233
|
|
|
255
234
|
|
|
256
|
-
def _valid_value_against_given_term(
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
term_id: str,
|
|
260
|
-
universe_session: Session,
|
|
261
|
-
project_session: Session)\
|
|
262
|
-
-> list[UniverseTermError | ProjectTermError]:
|
|
235
|
+
def _valid_value_against_given_term(
|
|
236
|
+
value: str, project_id: str, collection_id: str, term_id: str, universe_session: Session, project_session: Session
|
|
237
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
263
238
|
# [OPTIMIZATION]
|
|
264
239
|
key = value + project_id + collection_id + term_id
|
|
265
240
|
if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
|
|
266
241
|
result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
|
|
267
242
|
else:
|
|
268
|
-
term = _get_term_in_collection(collection_id,
|
|
269
|
-
term_id,
|
|
270
|
-
project_session)
|
|
243
|
+
term = _get_term_in_collection(collection_id, term_id, project_session)
|
|
271
244
|
if term:
|
|
272
245
|
result = _valid_value(value, term, universe_session, project_session)
|
|
273
246
|
else:
|
|
274
|
-
raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
|
|
275
|
-
f"in collection '{collection_id}'")
|
|
247
|
+
raise EsgvocNotFoundError(f"unable to find term '{term_id}' " + f"in collection '{collection_id}'")
|
|
276
248
|
_VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
|
|
277
249
|
return result
|
|
278
250
|
|
|
279
251
|
|
|
280
|
-
def valid_term(value: str,
|
|
281
|
-
project_id: str,
|
|
282
|
-
collection_id: str,
|
|
283
|
-
term_id: str) \
|
|
284
|
-
-> ValidationReport:
|
|
252
|
+
def valid_term(value: str, project_id: str, collection_id: str, term_id: str) -> ValidationReport:
|
|
285
253
|
"""
|
|
286
254
|
Check if the given value may or may not represent the given term. The functions returns
|
|
287
255
|
a report that contains the possible errors.
|
|
@@ -312,19 +280,16 @@ def valid_term(value: str,
|
|
|
312
280
|
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
313
281
|
"""
|
|
314
282
|
value = _check_value(value)
|
|
315
|
-
with get_universe_session() as universe_session,
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
283
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
284
|
+
errors = _valid_value_against_given_term(
|
|
285
|
+
value, project_id, collection_id, term_id, universe_session, project_session
|
|
286
|
+
)
|
|
319
287
|
return ValidationReport(expression=value, errors=errors)
|
|
320
288
|
|
|
321
289
|
|
|
322
|
-
def _valid_term_in_collection(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
universe_session: Session,
|
|
326
|
-
project_session: Session) \
|
|
327
|
-
-> list[MatchingTerm]:
|
|
290
|
+
def _valid_term_in_collection(
|
|
291
|
+
value: str, project_id: str, collection_id: str, universe_session: Session, project_session: Session
|
|
292
|
+
) -> list[MatchingTerm]:
|
|
328
293
|
# [OPTIMIZATION]
|
|
329
294
|
key = value + project_id + collection_id
|
|
330
295
|
if key in _VALID_TERM_IN_COLLECTION_CACHE:
|
|
@@ -336,20 +301,19 @@ def _valid_term_in_collection(value: str,
|
|
|
336
301
|
if collection:
|
|
337
302
|
match collection.term_kind:
|
|
338
303
|
case TermKind.PLAIN:
|
|
339
|
-
term_id_found = _search_plain_term_and_valid_value(value, collection_id,
|
|
340
|
-
project_session)
|
|
304
|
+
term_id_found = _search_plain_term_and_valid_value(value, collection_id, project_session)
|
|
341
305
|
if term_id_found:
|
|
342
|
-
result.append(
|
|
343
|
-
|
|
344
|
-
|
|
306
|
+
result.append(
|
|
307
|
+
MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
|
|
308
|
+
)
|
|
345
309
|
case _:
|
|
346
|
-
term_ids_found = _valid_value_against_all_terms_of_collection(
|
|
347
|
-
|
|
348
|
-
|
|
310
|
+
term_ids_found = _valid_value_against_all_terms_of_collection(
|
|
311
|
+
value, collection, universe_session, project_session
|
|
312
|
+
)
|
|
349
313
|
for term_id_found in term_ids_found:
|
|
350
|
-
result.append(
|
|
351
|
-
|
|
352
|
-
|
|
314
|
+
result.append(
|
|
315
|
+
MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
|
|
316
|
+
)
|
|
353
317
|
else:
|
|
354
318
|
msg = f"unable to find collection '{collection_id}'"
|
|
355
319
|
raise EsgvocNotFoundError(msg)
|
|
@@ -357,10 +321,7 @@ def _valid_term_in_collection(value: str,
|
|
|
357
321
|
return result
|
|
358
322
|
|
|
359
323
|
|
|
360
|
-
def valid_term_in_collection(value: str,
|
|
361
|
-
project_id: str,
|
|
362
|
-
collection_id: str) \
|
|
363
|
-
-> list[MatchingTerm]:
|
|
324
|
+
def valid_term_in_collection(value: str, project_id: str, collection_id: str) -> list[MatchingTerm]:
|
|
364
325
|
"""
|
|
365
326
|
Check if the given value may or may not represent a term in the given collection. The function
|
|
366
327
|
returns the terms that the value matches.
|
|
@@ -388,21 +349,17 @@ def valid_term_in_collection(value: str,
|
|
|
388
349
|
:rtype: list[MatchingTerm]
|
|
389
350
|
:raises EsgvocNotFoundError: If any of the provided ids is not found
|
|
390
351
|
"""
|
|
391
|
-
with get_universe_session() as universe_session,
|
|
392
|
-
|
|
393
|
-
return _valid_term_in_collection(value, project_id, collection_id,
|
|
394
|
-
universe_session, project_session)
|
|
352
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
353
|
+
return _valid_term_in_collection(value, project_id, collection_id, universe_session, project_session)
|
|
395
354
|
|
|
396
355
|
|
|
397
|
-
def _valid_term_in_project(
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
project_session: Session) -> list[MatchingTerm]:
|
|
356
|
+
def _valid_term_in_project(
|
|
357
|
+
value: str, project_id: str, universe_session: Session, project_session: Session
|
|
358
|
+
) -> list[MatchingTerm]:
|
|
401
359
|
result = list()
|
|
402
360
|
collections = _get_all_collections_in_project(project_session)
|
|
403
361
|
for collection in collections:
|
|
404
|
-
result.extend(_valid_term_in_collection(value, project_id, collection.id,
|
|
405
|
-
universe_session, project_session))
|
|
362
|
+
result.extend(_valid_term_in_collection(value, project_id, collection.id, universe_session, project_session))
|
|
406
363
|
return result
|
|
407
364
|
|
|
408
365
|
|
|
@@ -431,8 +388,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
|
|
|
431
388
|
:rtype: list[MatchingTerm]
|
|
432
389
|
:raises EsgvocNotFoundError: If the `project_id` is not found
|
|
433
390
|
"""
|
|
434
|
-
with get_universe_session() as universe_session,
|
|
435
|
-
_get_project_session_with_exception(project_id) as project_session:
|
|
391
|
+
with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
|
|
436
392
|
return _valid_term_in_project(value, project_id, universe_session, project_session)
|
|
437
393
|
|
|
438
394
|
|
|
@@ -460,15 +416,13 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
|
|
|
460
416
|
with get_universe_session() as universe_session:
|
|
461
417
|
for project_id in get_all_projects():
|
|
462
418
|
with _get_project_session_with_exception(project_id) as project_session:
|
|
463
|
-
result.extend(_valid_term_in_project(value, project_id,
|
|
464
|
-
universe_session, project_session))
|
|
419
|
+
result.extend(_valid_term_in_project(value, project_id, universe_session, project_session))
|
|
465
420
|
return result
|
|
466
421
|
|
|
467
422
|
|
|
468
|
-
def get_all_terms_in_collection(
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
-> list[DataDescriptor]:
|
|
423
|
+
def get_all_terms_in_collection(
|
|
424
|
+
project_id: str, collection_id: str, selected_term_fields: Iterable[str] | None = None
|
|
425
|
+
) -> list[DataDescriptor]:
|
|
472
426
|
"""
|
|
473
427
|
Gets all terms of the given collection of a project.
|
|
474
428
|
This function performs an exact match on the `project_id` and `collection_id`,
|
|
@@ -495,7 +449,7 @@ def get_all_terms_in_collection(project_id: str,
|
|
|
495
449
|
return result
|
|
496
450
|
|
|
497
451
|
|
|
498
|
-
def _get_all_collections_in_project(session: Session) -> list[
|
|
452
|
+
def _get_all_collections_in_project(session: Session) -> list[PCollection]:
|
|
499
453
|
project = session.get(Project, constants.SQLITE_FIRST_PK)
|
|
500
454
|
# Project can't be missing if session exists.
|
|
501
455
|
return project.collections # type: ignore
|
|
@@ -522,15 +476,17 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
|
|
|
522
476
|
return result
|
|
523
477
|
|
|
524
478
|
|
|
525
|
-
def _get_all_terms_in_collection(
|
|
526
|
-
|
|
479
|
+
def _get_all_terms_in_collection(
|
|
480
|
+
collection: PCollection, selected_term_fields: Iterable[str] | None
|
|
481
|
+
) -> list[DataDescriptor]:
|
|
527
482
|
result: list[DataDescriptor] = list()
|
|
528
483
|
instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
|
|
529
484
|
return result
|
|
530
485
|
|
|
531
486
|
|
|
532
|
-
def get_all_terms_in_project(
|
|
533
|
-
|
|
487
|
+
def get_all_terms_in_project(
|
|
488
|
+
project_id: str, selected_term_fields: Iterable[str] | None = None
|
|
489
|
+
) -> list[DataDescriptor]:
|
|
534
490
|
"""
|
|
535
491
|
Gets all terms of the given project.
|
|
536
492
|
This function performs an exact match on the `project_id` and
|
|
@@ -556,8 +512,9 @@ def get_all_terms_in_project(project_id: str,
|
|
|
556
512
|
return result
|
|
557
513
|
|
|
558
514
|
|
|
559
|
-
def get_all_terms_in_all_projects(
|
|
560
|
-
|
|
515
|
+
def get_all_terms_in_all_projects(
|
|
516
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
517
|
+
) -> list[tuple[str, list[DataDescriptor]]]:
|
|
561
518
|
"""
|
|
562
519
|
Gets all terms of all projects.
|
|
563
520
|
|
|
@@ -592,8 +549,9 @@ def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
|
|
|
592
549
|
return result
|
|
593
550
|
|
|
594
551
|
|
|
595
|
-
def get_term_in_project(
|
|
596
|
-
|
|
552
|
+
def get_term_in_project(
|
|
553
|
+
project_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
554
|
+
) -> DataDescriptor | None:
|
|
597
555
|
"""
|
|
598
556
|
Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
|
|
599
557
|
the given term id.
|
|
@@ -623,15 +581,15 @@ def get_term_in_project(project_id: str, term_id: str,
|
|
|
623
581
|
|
|
624
582
|
|
|
625
583
|
def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
|
|
626
|
-
statement = select(PTerm).join(
|
|
627
|
-
PTerm.id == term_id)
|
|
584
|
+
statement = select(PTerm).join(PCollection).where(PCollection.id == collection_id, PTerm.id == term_id)
|
|
628
585
|
results = session.exec(statement)
|
|
629
586
|
result = results.one_or_none()
|
|
630
587
|
return result
|
|
631
588
|
|
|
632
589
|
|
|
633
|
-
def get_term_in_collection(
|
|
634
|
-
|
|
590
|
+
def get_term_in_collection(
|
|
591
|
+
project_id: str, collection_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
592
|
+
) -> DataDescriptor | None:
|
|
635
593
|
"""
|
|
636
594
|
Returns the term, in the given project and collection,
|
|
637
595
|
whose id corresponds exactly to the given term id.
|
|
@@ -661,8 +619,8 @@ def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
|
|
|
661
619
|
return result
|
|
662
620
|
|
|
663
621
|
|
|
664
|
-
def _get_collection_in_project(collection_id: str, session: Session) ->
|
|
665
|
-
statement = select(
|
|
622
|
+
def _get_collection_in_project(collection_id: str, session: Session) -> PCollection | None:
|
|
623
|
+
statement = select(PCollection).where(PCollection.id == collection_id)
|
|
666
624
|
results = session.exec(statement)
|
|
667
625
|
result = results.one_or_none()
|
|
668
626
|
return result
|
|
@@ -718,16 +676,13 @@ def get_project(project_id: str) -> ProjectSpecs | None:
|
|
|
718
676
|
return result
|
|
719
677
|
|
|
720
678
|
|
|
721
|
-
def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
|
|
722
|
-
|
|
723
|
-
statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
|
|
679
|
+
def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str, session: Session) -> PCollection | None:
|
|
680
|
+
statement = select(PCollection).where(PCollection.data_descriptor_id == data_descriptor_id)
|
|
724
681
|
result = session.exec(statement).one_or_none()
|
|
725
682
|
return result
|
|
726
683
|
|
|
727
684
|
|
|
728
|
-
def get_collection_from_data_descriptor_in_project(project_id: str,
|
|
729
|
-
data_descriptor_id: str) \
|
|
730
|
-
-> tuple[str, dict] | None:
|
|
685
|
+
def get_collection_from_data_descriptor_in_project(project_id: str, data_descriptor_id: str) -> tuple[str, dict] | None:
|
|
731
686
|
"""
|
|
732
687
|
Returns the collection, in the given project, that corresponds to the given data descriptor
|
|
733
688
|
in the universe.
|
|
@@ -746,15 +701,13 @@ def get_collection_from_data_descriptor_in_project(project_id: str,
|
|
|
746
701
|
result: tuple[str, dict] | None = None
|
|
747
702
|
if connection := _get_project_connection(project_id):
|
|
748
703
|
with connection.create_session() as session:
|
|
749
|
-
collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
|
|
750
|
-
session)
|
|
704
|
+
collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id, session)
|
|
751
705
|
if collection_found:
|
|
752
706
|
result = collection_found.id, collection_found.context
|
|
753
707
|
return result
|
|
754
708
|
|
|
755
709
|
|
|
756
|
-
def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str)
|
|
757
|
-
-> list[tuple[str, str, dict]]:
|
|
710
|
+
def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) -> list[tuple[str, str, dict]]:
|
|
758
711
|
"""
|
|
759
712
|
Returns the collections, in all projects, that correspond to the given data descriptor
|
|
760
713
|
in the universe.
|
|
@@ -773,28 +726,28 @@ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str)
|
|
|
773
726
|
result = list()
|
|
774
727
|
project_ids = get_all_projects()
|
|
775
728
|
for project_id in project_ids:
|
|
776
|
-
collection_found = get_collection_from_data_descriptor_in_project(project_id,
|
|
777
|
-
data_descriptor_id)
|
|
729
|
+
collection_found = get_collection_from_data_descriptor_in_project(project_id, data_descriptor_id)
|
|
778
730
|
if collection_found:
|
|
779
731
|
result.append((project_id, collection_found[0], collection_found[1]))
|
|
780
732
|
return result
|
|
781
733
|
|
|
782
734
|
|
|
783
|
-
def _get_term_from_universe_term_id_in_project(
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
statement =
|
|
787
|
-
|
|
735
|
+
def _get_term_from_universe_term_id_in_project(
|
|
736
|
+
data_descriptor_id: str, universe_term_id: str, project_session: Session
|
|
737
|
+
) -> PTerm | None:
|
|
738
|
+
statement = (
|
|
739
|
+
select(PTerm)
|
|
740
|
+
.join(PCollection)
|
|
741
|
+
.where(PCollection.data_descriptor_id == data_descriptor_id, PTerm.id == universe_term_id)
|
|
742
|
+
)
|
|
788
743
|
results = project_session.exec(statement)
|
|
789
744
|
result = results.one_or_none()
|
|
790
745
|
return result
|
|
791
746
|
|
|
792
747
|
|
|
793
|
-
def get_term_from_universe_term_id_in_project(
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
selected_term_fields: Iterable[str] | None = None) \
|
|
797
|
-
-> tuple[str, DataDescriptor] | None:
|
|
748
|
+
def get_term_from_universe_term_id_in_project(
|
|
749
|
+
project_id: str, data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
750
|
+
) -> tuple[str, DataDescriptor] | None:
|
|
798
751
|
"""
|
|
799
752
|
Returns the term, in the given project, that corresponds to the given term in the universe.
|
|
800
753
|
This function performs an exact match on the `project_id`, `data_descriptor_id`
|
|
@@ -818,19 +771,16 @@ def get_term_from_universe_term_id_in_project(project_id: str,
|
|
|
818
771
|
result: tuple[str, DataDescriptor] | None = None
|
|
819
772
|
if connection := _get_project_connection(project_id):
|
|
820
773
|
with connection.create_session() as session:
|
|
821
|
-
term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
|
|
822
|
-
universe_term_id,
|
|
823
|
-
session)
|
|
774
|
+
term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id, universe_term_id, session)
|
|
824
775
|
if term_found:
|
|
825
776
|
pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
|
|
826
777
|
result = (term_found.collection.id, pydantic_term)
|
|
827
778
|
return result
|
|
828
779
|
|
|
829
780
|
|
|
830
|
-
def get_term_from_universe_term_id_in_all_projects(
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
-> list[tuple[str, str, DataDescriptor]]:
|
|
781
|
+
def get_term_from_universe_term_id_in_all_projects(
|
|
782
|
+
data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
|
|
783
|
+
) -> list[tuple[str, str, DataDescriptor]]:
|
|
834
784
|
"""
|
|
835
785
|
Returns the terms, in all projects, that correspond to the given term in the universe.
|
|
836
786
|
This function performs an exact match on the `data_descriptor_id`
|
|
@@ -853,38 +803,37 @@ def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
|
|
|
853
803
|
result: list[tuple[str, str, DataDescriptor]] = list()
|
|
854
804
|
project_ids = get_all_projects()
|
|
855
805
|
for project_id in project_ids:
|
|
856
|
-
term_found = get_term_from_universe_term_id_in_project(
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
selected_term_fields)
|
|
806
|
+
term_found = get_term_from_universe_term_id_in_project(
|
|
807
|
+
project_id, data_descriptor_id, universe_term_id, selected_term_fields
|
|
808
|
+
)
|
|
860
809
|
if term_found:
|
|
861
810
|
result.append((project_id, term_found[0], term_found[1]))
|
|
862
811
|
return result
|
|
863
812
|
|
|
864
813
|
|
|
865
|
-
def _find_collections_in_project(
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
limit: int | None = None,
|
|
869
|
-
offset: int | None = None) -> Sequence[Collection]:
|
|
814
|
+
def _find_collections_in_project(
|
|
815
|
+
expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
816
|
+
) -> Sequence[PCollection]:
|
|
870
817
|
matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
|
|
871
818
|
tmp_statement = select(PCollectionFTS5).where(matching_condition)
|
|
872
|
-
statement = select(
|
|
819
|
+
statement = select(PCollection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
873
820
|
return execute_match_statement(expression, statement, session)
|
|
874
821
|
|
|
875
822
|
|
|
876
|
-
def find_collections_in_project(
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
offset: int | None = None) -> list[tuple[str, dict]]:
|
|
823
|
+
def find_collections_in_project(
|
|
824
|
+
expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
825
|
+
) -> list[tuple[str, dict]]:
|
|
880
826
|
"""
|
|
881
827
|
Find collections in the given project based on a full text search defined by the given `expression`.
|
|
882
|
-
The `expression`
|
|
883
|
-
|
|
884
|
-
and
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
828
|
+
The `expression` can be composed of one or multiple keywords.
|
|
829
|
+
The keywords can combined with boolean operators: `AND`,
|
|
830
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
831
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
832
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
833
|
+
function does not provide any priority operator (parenthesis).
|
|
834
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
835
|
+
If the expression is composed of only one keyword, the function
|
|
836
|
+
automatically defines it as a prefix.
|
|
888
837
|
The function returns a list of collection ids and contexts, sorted according to the
|
|
889
838
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
890
839
|
This function performs an exact match on the `project_id`,
|
|
@@ -915,52 +864,57 @@ def find_collections_in_project(expression: str, project_id: str,
|
|
|
915
864
|
result: list[tuple[str, dict]] = list()
|
|
916
865
|
if connection := _get_project_connection(project_id):
|
|
917
866
|
with connection.create_session() as session:
|
|
918
|
-
collections_found = _find_collections_in_project(expression, session, only_id,
|
|
919
|
-
limit, offset)
|
|
867
|
+
collections_found = _find_collections_in_project(expression, session, only_id, limit, offset)
|
|
920
868
|
for collection in collections_found:
|
|
921
869
|
result.append((collection.id, collection.context))
|
|
922
870
|
return result
|
|
923
871
|
|
|
924
872
|
|
|
925
|
-
def _find_terms_in_collection(
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
873
|
+
def _find_terms_in_collection(
|
|
874
|
+
expression: str,
|
|
875
|
+
collection_id: str,
|
|
876
|
+
session: Session,
|
|
877
|
+
only_id: bool = False,
|
|
878
|
+
limit: int | None = None,
|
|
879
|
+
offset: int | None = None,
|
|
880
|
+
) -> Sequence[PTerm]:
|
|
931
881
|
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
932
|
-
where_condition =
|
|
933
|
-
tmp_statement = select(PTermFTS5).join(
|
|
882
|
+
where_condition = PCollection.id == collection_id, matching_condition
|
|
883
|
+
tmp_statement = select(PTermFTS5).join(PCollection).where(*where_condition)
|
|
934
884
|
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
935
885
|
return execute_match_statement(expression, statement, session)
|
|
936
886
|
|
|
937
887
|
|
|
938
|
-
def _find_terms_in_project(
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
limit: int | None = None,
|
|
942
|
-
offset: int | None = None) -> Sequence[PTerm]:
|
|
888
|
+
def _find_terms_in_project(
|
|
889
|
+
expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
890
|
+
) -> Sequence[PTerm]:
|
|
943
891
|
matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
|
|
944
892
|
tmp_statement = select(PTermFTS5).where(matching_condition)
|
|
945
893
|
statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
|
|
946
894
|
return execute_match_statement(expression, statement, session)
|
|
947
895
|
|
|
948
896
|
|
|
949
|
-
def find_terms_in_collection(
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
897
|
+
def find_terms_in_collection(
|
|
898
|
+
expression: str,
|
|
899
|
+
project_id: str,
|
|
900
|
+
collection_id: str,
|
|
901
|
+
only_id: bool = False,
|
|
902
|
+
limit: int | None = None,
|
|
903
|
+
offset: int | None = None,
|
|
904
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
905
|
+
) -> list[DataDescriptor]:
|
|
956
906
|
"""
|
|
957
907
|
Find terms in the given project and collection based on a full text search defined by the given
|
|
958
|
-
`expression`.
|
|
959
|
-
`
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
908
|
+
`expression`.
|
|
909
|
+
The `expression` can be composed of one or multiple keywords.
|
|
910
|
+
The keywords can combined with boolean operators: `AND`,
|
|
911
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
912
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
913
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
914
|
+
function does not provide any priority operator (parenthesis).
|
|
915
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
916
|
+
If the expression is composed of only one keyword, the function
|
|
917
|
+
automatically defines it as a prefix.
|
|
964
918
|
The function returns a list of term instances, sorted according to the
|
|
965
919
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
966
920
|
This function performs an exact match on the `project_id` and `collection_id`,
|
|
@@ -995,27 +949,30 @@ def find_terms_in_collection(expression: str, project_id: str,
|
|
|
995
949
|
result: list[DataDescriptor] = list()
|
|
996
950
|
if connection := _get_project_connection(project_id):
|
|
997
951
|
with connection.create_session() as session:
|
|
998
|
-
pterms_found = _find_terms_in_collection(expression, collection_id, session,
|
|
999
|
-
only_id, limit, offset)
|
|
952
|
+
pterms_found = _find_terms_in_collection(expression, collection_id, session, only_id, limit, offset)
|
|
1000
953
|
instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
|
|
1001
954
|
return result
|
|
1002
955
|
|
|
1003
956
|
|
|
1004
|
-
def find_terms_in_project(
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
957
|
+
def find_terms_in_project(
|
|
958
|
+
expression: str,
|
|
959
|
+
project_id: str,
|
|
960
|
+
only_id: bool = False,
|
|
961
|
+
limit: int | None = None,
|
|
962
|
+
offset: int | None = None,
|
|
963
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
964
|
+
) -> list[DataDescriptor]:
|
|
1011
965
|
"""
|
|
1012
|
-
Find terms in the given project on a full text search defined by the given
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
and
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
966
|
+
Find terms in the given project based on a full text search defined by the given `expression`.
|
|
967
|
+
The `expression` can be composed of one or multiple keywords.
|
|
968
|
+
The keywords can combined with boolean operators: `AND`,
|
|
969
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
970
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
971
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
972
|
+
function does not provide any priority operator (parenthesis).
|
|
973
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
974
|
+
If the expression is composed of only one keyword, the function
|
|
975
|
+
automatically defines it as a prefix.
|
|
1019
976
|
The function returns a list of term instances, sorted according to the
|
|
1020
977
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
1021
978
|
This function performs an exact match on the `project_id`,
|
|
@@ -1053,20 +1010,24 @@ def find_terms_in_project(expression: str,
|
|
|
1053
1010
|
return result
|
|
1054
1011
|
|
|
1055
1012
|
|
|
1056
|
-
def find_terms_in_all_projects(
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1013
|
+
def find_terms_in_all_projects(
|
|
1014
|
+
expression: str,
|
|
1015
|
+
only_id: bool = False,
|
|
1016
|
+
limit: int | None = None,
|
|
1017
|
+
offset: int | None = None,
|
|
1018
|
+
selected_term_fields: Iterable[str] | None = None,
|
|
1019
|
+
) -> list[tuple[str, list[DataDescriptor]]]:
|
|
1062
1020
|
"""
|
|
1063
|
-
Find terms in
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
and
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1021
|
+
Find terms in all projects based on a full text search defined by the given `expression`.
|
|
1022
|
+
The `expression` can be composed of one or multiple keywords.
|
|
1023
|
+
The keywords can combined with boolean operators: `AND`,
|
|
1024
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
1025
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
1026
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
1027
|
+
function does not provide any priority operator (parenthesis).
|
|
1028
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
1029
|
+
If the expression is composed of only one keyword, the function
|
|
1030
|
+
automatically defines it as a prefix.
|
|
1070
1031
|
The function returns a list of project ids and term instances, sorted according to the
|
|
1071
1032
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
1072
1033
|
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
@@ -1094,26 +1055,27 @@ def find_terms_in_all_projects(expression: str,
|
|
|
1094
1055
|
result: list[tuple[str, list[DataDescriptor]]] = list()
|
|
1095
1056
|
project_ids = get_all_projects()
|
|
1096
1057
|
for project_id in project_ids:
|
|
1097
|
-
terms_found = find_terms_in_project(expression, project_id, only_id,
|
|
1098
|
-
limit, offset, selected_term_fields)
|
|
1058
|
+
terms_found = find_terms_in_project(expression, project_id, only_id, limit, offset, selected_term_fields)
|
|
1099
1059
|
if terms_found:
|
|
1100
1060
|
result.append((project_id, terms_found))
|
|
1101
1061
|
return result
|
|
1102
1062
|
|
|
1103
1063
|
|
|
1104
|
-
def find_items_in_project(
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
limit: int | None = None,
|
|
1108
|
-
offset: int | None = None) -> list[Item]:
|
|
1064
|
+
def find_items_in_project(
|
|
1065
|
+
expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
|
|
1066
|
+
) -> list[Item]:
|
|
1109
1067
|
"""
|
|
1110
1068
|
Find items, at the moment terms and collections, in the given project based on a full-text
|
|
1111
|
-
search defined by the given `expression`.
|
|
1112
|
-
`
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1069
|
+
search defined by the given `expression`.
|
|
1070
|
+
The `expression` can be composed of one or multiple keywords.
|
|
1071
|
+
The keywords can combined with boolean operators: `AND`,
|
|
1072
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
1073
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
1074
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
1075
|
+
function does not provide any priority operator (parenthesis).
|
|
1076
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
1077
|
+
If the expression is composed of only one keyword, the function
|
|
1078
|
+
automatically defines it as a prefix.
|
|
1117
1079
|
The function returns a list of item instances sorted according to the
|
|
1118
1080
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
1119
1081
|
This function performs an exact match on the `project_id`,
|
|
@@ -1143,23 +1105,24 @@ def find_items_in_project(expression: str,
|
|
|
1143
1105
|
result = list()
|
|
1144
1106
|
if connection := _get_project_connection(project_id):
|
|
1145
1107
|
with connection.create_session() as session:
|
|
1108
|
+
processed_expression = process_expression(expression)
|
|
1146
1109
|
if only_id:
|
|
1147
1110
|
collection_column = col(PCollectionFTS5.id)
|
|
1148
1111
|
term_column = col(PTermFTS5.id)
|
|
1149
1112
|
else:
|
|
1150
1113
|
collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
|
|
1151
1114
|
term_column = col(PTermFTS5.specs) # type: ignore
|
|
1152
|
-
collection_where_condition = collection_column.match(
|
|
1115
|
+
collection_where_condition = collection_column.match(processed_expression)
|
|
1153
1116
|
collection_statement = select(PCollectionFTS5.id,
|
|
1154
1117
|
text("'collection' AS TYPE"),
|
|
1155
1118
|
text(f"'{project_id}' AS TYPE"),
|
|
1156
1119
|
text('rank')).where(collection_where_condition)
|
|
1157
|
-
term_where_condition = term_column.match(
|
|
1120
|
+
term_where_condition = term_column.match(processed_expression)
|
|
1158
1121
|
term_statement = select(PTermFTS5.id,
|
|
1159
1122
|
text("'term' AS TYPE"),
|
|
1160
|
-
|
|
1161
|
-
text('rank')).join(
|
|
1123
|
+
PCollection.id,
|
|
1124
|
+
text('rank')).join(PCollection) \
|
|
1162
1125
|
.where(term_where_condition)
|
|
1163
|
-
result = execute_find_item_statements(session,
|
|
1126
|
+
result = execute_find_item_statements(session, processed_expression, collection_statement,
|
|
1164
1127
|
term_statement, limit, offset)
|
|
1165
1128
|
return result
|