esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +96 -72
  3. esgvoc/api/data_descriptors/__init__.py +18 -12
  4. esgvoc/api/data_descriptors/activity.py +8 -45
  5. esgvoc/api/data_descriptors/area_label.py +6 -0
  6. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  8. esgvoc/api/data_descriptors/consortium.py +16 -56
  9. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  10. esgvoc/api/data_descriptors/date.py +3 -46
  11. esgvoc/api/data_descriptors/directory_date.py +3 -46
  12. esgvoc/api/data_descriptors/experiment.py +19 -54
  13. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  14. esgvoc/api/data_descriptors/frequency.py +6 -43
  15. esgvoc/api/data_descriptors/grid_label.py +6 -44
  16. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  17. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  18. esgvoc/api/data_descriptors/institution.py +11 -54
  19. esgvoc/api/data_descriptors/license.py +4 -44
  20. esgvoc/api/data_descriptors/mip_era.py +6 -44
  21. esgvoc/api/data_descriptors/model_component.py +7 -45
  22. esgvoc/api/data_descriptors/organisation.py +3 -40
  23. esgvoc/api/data_descriptors/physic_index.py +3 -45
  24. esgvoc/api/data_descriptors/product.py +4 -43
  25. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  26. esgvoc/api/data_descriptors/realm.py +4 -42
  27. esgvoc/api/data_descriptors/resolution.py +6 -44
  28. esgvoc/api/data_descriptors/source.py +18 -53
  29. esgvoc/api/data_descriptors/source_type.py +3 -41
  30. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  31. esgvoc/api/data_descriptors/table.py +6 -48
  32. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  33. esgvoc/api/data_descriptors/time_range.py +3 -27
  34. esgvoc/api/data_descriptors/variable.py +13 -71
  35. esgvoc/api/data_descriptors/variant_label.py +3 -47
  36. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  37. esgvoc/api/project_specs.py +3 -2
  38. esgvoc/api/projects.py +727 -446
  39. esgvoc/api/py.typed +0 -0
  40. esgvoc/api/report.py +29 -16
  41. esgvoc/api/search.py +140 -95
  42. esgvoc/api/universe.py +362 -156
  43. esgvoc/apps/__init__.py +3 -4
  44. esgvoc/apps/drs/constants.py +1 -1
  45. esgvoc/apps/drs/generator.py +185 -198
  46. esgvoc/apps/drs/report.py +272 -136
  47. esgvoc/apps/drs/validator.py +132 -145
  48. esgvoc/apps/py.typed +0 -0
  49. esgvoc/cli/drs.py +32 -21
  50. esgvoc/cli/get.py +35 -31
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +0 -2
  53. esgvoc/cli/status.py +5 -5
  54. esgvoc/cli/valid.py +40 -40
  55. esgvoc/core/constants.py +1 -1
  56. esgvoc/core/db/__init__.py +2 -4
  57. esgvoc/core/db/connection.py +5 -3
  58. esgvoc/core/db/models/project.py +50 -8
  59. esgvoc/core/db/models/universe.py +51 -12
  60. esgvoc/core/db/project_ingestion.py +60 -46
  61. esgvoc/core/db/universe_ingestion.py +58 -29
  62. esgvoc/core/exceptions.py +33 -0
  63. esgvoc/core/logging_handler.py +1 -1
  64. esgvoc/core/repo_fetcher.py +4 -3
  65. esgvoc/core/service/__init__.py +37 -5
  66. esgvoc/core/service/configuration/config_manager.py +188 -0
  67. esgvoc/core/service/configuration/setting.py +88 -0
  68. esgvoc/core/service/state.py +49 -32
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
  70. esgvoc-0.4.0.dist-info/RECORD +80 -0
  71. esgvoc/api/_utils.py +0 -39
  72. esgvoc/cli/config.py +0 -82
  73. esgvoc/core/service/settings.py +0 -73
  74. esgvoc/core/service/settings.toml +0 -17
  75. esgvoc/core/service/settings_default.toml +0 -17
  76. esgvoc-0.2.1.dist-info/RECORD +0 -73
  77. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
  79. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py CHANGED
@@ -1,91 +1,95 @@
1
1
  import re
2
- from typing import Sequence
2
+ from typing import Iterable, Sequence
3
+
4
+ from sqlalchemy import text
5
+ from sqlmodel import Session, and_, col, select
3
6
 
4
7
  import esgvoc.api.universe as universe
5
- import esgvoc.core.constants
8
+ import esgvoc.core.constants as constants
6
9
  import esgvoc.core.service as service
7
- from esgvoc.api._utils import (get_universe_session, instantiate_pydantic_term,
8
- instantiate_pydantic_terms)
9
- from esgvoc.api.report import (ProjectTermError, UniverseTermError,
10
- ValidationError, ValidationReport)
11
- from esgvoc.api.search import MatchingTerm, SearchSettings, _create_str_comparison_expression
10
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
12
11
  from esgvoc.api.project_specs import ProjectSpecs
12
+ from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
13
+ from esgvoc.api.search import (
14
+ Item,
15
+ MatchingTerm,
16
+ execute_find_item_statements,
17
+ execute_match_statement,
18
+ generate_matching_condition,
19
+ get_universe_session,
20
+ handle_rank_limit_offset,
21
+ instantiate_pydantic_term,
22
+ instantiate_pydantic_terms,
23
+ )
13
24
  from esgvoc.core.db.connection import DBConnection
14
25
  from esgvoc.core.db.models.mixins import TermKind
15
- from esgvoc.core.db.models.project import Collection, Project, PTerm
26
+ from esgvoc.core.db.models.project import (
27
+ Collection,
28
+ PCollectionFTS5,
29
+ Project,
30
+ PTerm,
31
+ PTermFTS5,
32
+ )
16
33
  from esgvoc.core.db.models.universe import UTerm
17
- from pydantic import BaseModel
18
- from sqlmodel import Session, and_, select
19
-
34
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
20
35
 
21
36
  # [OPTIMIZATION]
22
37
  _VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
23
- _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[ValidationError]] = dict()
38
+ _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
24
39
 
25
40
 
26
- def get_project_specs(project_id: str) -> ProjectSpecs:
27
- project_specs = find_project(project_id)
28
- if not project_specs:
29
- msg = f'Unable to find project {project_id}'
30
- raise ValueError(msg)
31
- try:
32
- result = ProjectSpecs(**project_specs)
33
- except Exception as e:
34
- msg = f'Unable to read specs in project {project_id}'
35
- raise RuntimeError(msg) from e
36
- return result
37
-
41
+ def _get_project_connection(project_id: str) -> DBConnection | None:
42
+ if project_id in service.current_state.projects:
43
+ return service.current_state.projects[project_id].db_connection
44
+ else:
45
+ return None
38
46
 
39
- def _get_project_connection(project_id: str) -> DBConnection|None:
40
- return service.state_service.projects[project_id].db_connection
41
47
 
42
48
  def _get_project_session_with_exception(project_id: str) -> Session:
43
- if connection:=_get_project_connection(project_id):
49
+ if connection := _get_project_connection(project_id):
44
50
  project_session = connection.create_session()
45
51
  return project_session
46
52
  else:
47
- raise ValueError(f'unable to find project {project_id}')
48
-
53
+ raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
49
54
 
50
- def _resolve_term(term_composite_part: dict,
55
+
56
+ def _resolve_term(composite_term_part: dict,
51
57
  universe_session: Session,
52
- project_session: Session) -> UTerm|PTerm:
58
+ project_session: Session) -> UTerm | PTerm:
53
59
  # First find the term in the universe than in the current project
54
- term_id = term_composite_part[esgvoc.core.constants.TERM_ID_JSON_KEY]
55
- term_type = term_composite_part[esgvoc.core.constants.TERM_TYPE_JSON_KEY]
56
- uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
57
- term_id=term_id,
58
- session=universe_session,
59
- settings=None)
60
- if uterms:
61
- return uterms[0]
60
+ term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
61
+ term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
62
+ uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
63
+ term_id=term_id,
64
+ session=universe_session)
65
+ if uterm:
66
+ return uterm
62
67
  else:
63
- pterms = _find_terms_in_collection(collection_id=term_type,
64
- term_id=term_id,
65
- session=project_session,
66
- settings=None)
67
- if pterms:
68
- return pterms[0]
68
+ pterm = _get_term_in_collection(collection_id=term_type,
69
+ term_id=term_id,
70
+ session=project_session)
71
+ if pterm:
72
+ return pterm
69
73
  else:
70
- msg = f'unable to find the term {term_id} in {term_type}'
71
- raise RuntimeError(msg)
74
+ msg = f"unable to find the term '{term_id}' in '{term_type}'"
75
+ raise EsgvocNotFoundError(msg)
72
76
 
73
77
 
74
- def _get_term_composite_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
75
- separator = term.specs[esgvoc.core.constants.COMPOSITE_SEPARATOR_JSON_KEY]
76
- parts = term.specs[esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY]
78
+ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
79
+ separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
80
+ parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
77
81
  return separator, parts
78
82
 
79
83
 
80
84
  # TODO: support optionality of parts of composite.
81
85
  # It is backtrack possible for more than one missing parts.
82
- def _valid_value_term_composite_with_separator(value: str,
83
- term: UTerm|PTerm,
86
+ def _valid_value_composite_term_with_separator(value: str,
87
+ term: UTerm | PTerm,
84
88
  universe_session: Session,
85
89
  project_session: Session)\
86
- -> list[ValidationError]:
90
+ -> list[UniverseTermError | ProjectTermError]:
87
91
  result = list()
88
- separator, parts = _get_term_composite_separator_parts(term)
92
+ separator, parts = _get_composite_term_separator_parts(term)
89
93
  if separator in value:
90
94
  splits = value.split(separator)
91
95
  if len(splits) == len(parts):
@@ -106,16 +110,20 @@ def _valid_value_term_composite_with_separator(value: str,
106
110
  return result
107
111
 
108
112
 
109
- def _transform_to_pattern(term: UTerm|PTerm,
113
+ def _transform_to_pattern(term: UTerm | PTerm,
110
114
  universe_session: Session,
111
115
  project_session: Session) -> str:
112
116
  match term.kind:
113
117
  case TermKind.PLAIN:
114
- result = term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY]
118
+ if constants.DRS_SPECS_JSON_KEY in term.specs:
119
+ result = term.specs[constants.DRS_SPECS_JSON_KEY]
120
+ else:
121
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
122
+ "Can't validate it.")
115
123
  case TermKind.PATTERN:
116
- result = term.specs[esgvoc.core.constants.PATTERN_JSON_KEY]
124
+ result = term.specs[constants.PATTERN_JSON_KEY]
117
125
  case TermKind.COMPOSITE:
118
- separator, parts = _get_term_composite_separator_parts(term)
126
+ separator, parts = _get_composite_term_separator_parts(term)
119
127
  result = ""
120
128
  for part in parts:
121
129
  resolved_term = _resolve_term(part, universe_session, project_session)
@@ -123,22 +131,22 @@ def _transform_to_pattern(term: UTerm|PTerm,
123
131
  result = f'{result}{pattern}{separator}'
124
132
  result = result.rstrip(separator)
125
133
  case _:
126
- raise NotImplementedError(f'unsupported term kind {term.kind}')
134
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
127
135
  return result
128
136
 
129
137
 
130
138
  # TODO: support optionality of parts of composite.
131
139
  # It is backtrack possible for more than one missing parts.
132
- def _valid_value_term_composite_separator_less(value: str,
133
- term: UTerm|PTerm,
140
+ def _valid_value_composite_term_separator_less(value: str,
141
+ term: UTerm | PTerm,
134
142
  universe_session: Session,
135
143
  project_session: Session)\
136
- -> list[ValidationError]:
144
+ -> list[UniverseTermError | ProjectTermError]:
137
145
  result = list()
138
146
  try:
139
147
  pattern = _transform_to_pattern(term, universe_session, project_session)
140
148
  try:
141
- # Term patterns are meant to be validated individually.
149
+ # Patterns terms are meant to be validated individually.
142
150
  # So their regex are defined as a whole (begins by a ^, ends by a $).
143
151
  # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
144
152
  # The later, must be removed.
@@ -146,34 +154,34 @@ def _valid_value_term_composite_separator_less(value: str,
146
154
  pattern = f'^{pattern}$'
147
155
  regex = re.compile(pattern)
148
156
  except Exception as e:
149
- msg = f'regex compilation error:\n{e}'
150
- raise ValueError(msg) from e
157
+ msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
158
+ raise EsgvocDbError(msg) from e
151
159
  match = regex.match(value)
152
160
  if match is None:
153
161
  result.append(_create_term_error(value, term))
154
162
  return result
155
163
  except Exception as e:
156
- msg = f'cannot validate separator less composite term {term.id}:\n{e}'
157
- raise RuntimeError(msg) from e
164
+ msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
165
+ raise EsgvocNotImplementedError(msg) from e
158
166
 
159
167
 
160
- def _valid_value_for_term_composite(value: str,
161
- term: UTerm|PTerm,
168
+ def _valid_value_for_composite_term(value: str,
169
+ term: UTerm | PTerm,
162
170
  universe_session: Session,
163
171
  project_session: Session)\
164
- -> list[ValidationError]:
172
+ -> list[UniverseTermError | ProjectTermError]:
165
173
  result = list()
166
- separator, _ = _get_term_composite_separator_parts(term)
174
+ separator, _ = _get_composite_term_separator_parts(term)
167
175
  if separator:
168
- result = _valid_value_term_composite_with_separator(value, term, universe_session,
176
+ result = _valid_value_composite_term_with_separator(value, term, universe_session,
169
177
  project_session)
170
178
  else:
171
- result = _valid_value_term_composite_separator_less(value, term, universe_session,
179
+ result = _valid_value_composite_term_separator_less(value, term, universe_session,
172
180
  project_session)
173
181
  return result
174
182
 
175
183
 
176
- def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
184
+ def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
177
185
  if isinstance(term, UTerm):
178
186
  return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
179
187
  data_descriptor_id=term.data_descriptor.id)
@@ -183,31 +191,35 @@ def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
183
191
 
184
192
 
185
193
  def _valid_value(value: str,
186
- term: UTerm|PTerm,
194
+ term: UTerm | PTerm,
187
195
  universe_session: Session,
188
- project_session: Session) -> list[ValidationError]:
196
+ project_session: Session) -> list[UniverseTermError | ProjectTermError]:
189
197
  result = list()
190
198
  match term.kind:
191
199
  case TermKind.PLAIN:
192
- if term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] != value:
193
- result.append(_create_term_error(value, term))
200
+ if constants.DRS_SPECS_JSON_KEY in term.specs:
201
+ if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
202
+ result.append(_create_term_error(value, term))
203
+ else:
204
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
205
+ "Can't validate it.")
194
206
  case TermKind.PATTERN:
195
- # OPTIM: Pattern can be compiled and stored for further matching.
196
- pattern_match = re.match(term.specs[esgvoc.core.constants.PATTERN_JSON_KEY], value)
207
+ # TODO: Pattern can be compiled and stored for further matching.
208
+ pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
197
209
  if pattern_match is None:
198
210
  result.append(_create_term_error(value, term))
199
211
  case TermKind.COMPOSITE:
200
- result.extend(_valid_value_for_term_composite(value, term,
212
+ result.extend(_valid_value_for_composite_term(value, term,
201
213
  universe_session,
202
214
  project_session))
203
215
  case _:
204
- raise NotImplementedError(f'unsupported term kind {term.kind}')
216
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
205
217
  return result
206
218
 
207
219
 
208
220
  def _check_value(value: str) -> str:
209
221
  if not value or value.isspace():
210
- raise ValueError('value should be set')
222
+ raise EsgvocValueError('value should be set')
211
223
  else:
212
224
  return value
213
225
 
@@ -215,9 +227,9 @@ def _check_value(value: str) -> str:
215
227
  def _search_plain_term_and_valid_value(value: str,
216
228
  collection_id: str,
217
229
  project_session: Session) \
218
- -> str|None:
230
+ -> str | None:
219
231
  where_expression = and_(Collection.id == collection_id,
220
- PTerm.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
232
+ PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
221
233
  statement = select(PTerm).join(Collection).where(where_expression)
222
234
  term = project_session.exec(statement).one_or_none()
223
235
  return term.id if term else None
@@ -238,7 +250,7 @@ def _valid_value_against_all_terms_of_collection(value: str,
238
250
  result.append(pterm.id)
239
251
  return result
240
252
  else:
241
- raise RuntimeError(f'collection {collection.id} has no term')
253
+ raise EsgvocDbError(f"collection '{collection.id}' has no term")
242
254
 
243
255
 
244
256
  def _valid_value_against_given_term(value: str,
@@ -247,27 +259,20 @@ def _valid_value_against_given_term(value: str,
247
259
  term_id: str,
248
260
  universe_session: Session,
249
261
  project_session: Session)\
250
- -> list[ValidationError]:
251
- # [OPTIMIZATION]
262
+ -> list[UniverseTermError | ProjectTermError]:
263
+ # [OPTIMIZATION]
252
264
  key = value + project_id + collection_id + term_id
253
265
  if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
254
266
  result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
255
267
  else:
256
- try:
257
- terms = _find_terms_in_collection(collection_id,
258
- term_id,
259
- project_session,
260
- None)
261
- if terms:
262
- term = terms[0]
263
- result = _valid_value(value, term, universe_session, project_session)
264
- else:
265
- raise ValueError(f'unable to find term {term_id} ' +
266
- f'in collection {collection_id}')
267
- except Exception as e:
268
- msg = f'unable to valid term {term_id} ' +\
269
- f'in collection {collection_id}'
270
- raise RuntimeError(msg) from e
268
+ term = _get_term_in_collection(collection_id,
269
+ term_id,
270
+ project_session)
271
+ if term:
272
+ result = _valid_value(value, term, universe_session, project_session)
273
+ else:
274
+ raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
275
+ f"in collection '{collection_id}'")
271
276
  _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
272
277
  return result
273
278
 
@@ -280,11 +285,11 @@ def valid_term(value: str,
280
285
  """
281
286
  Check if the given value may or may not represent the given term. The functions returns
282
287
  a report that contains the possible errors.
283
-
288
+
284
289
  Behavior based on the nature of the term:
285
290
  - plain term: the function try to match the value on the drs_name field.
286
- - term pattern: the function try to match the value on the pattern field (regex).
287
- - term composite:
291
+ - pattern term: the function try to match the value on the pattern field (regex).
292
+ - composite term:
288
293
  - if the composite has got a separator, the function splits the value according to the\
289
294
  separator of the term then it try to match every part of the composite\
290
295
  with every split of the value.
@@ -292,7 +297,7 @@ def valid_term(value: str,
292
297
  composite so as to compare it as a regex to the value.
293
298
 
294
299
  If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
295
- the function raises a ValueError.
300
+ the function raises a EsgvocNotFoundError.
296
301
 
297
302
  :param value: A value to be validated
298
303
  :type value: str
@@ -304,7 +309,7 @@ def valid_term(value: str,
304
309
  :type term_id: str
305
310
  :returns: A validation report that contains the possible errors
306
311
  :rtype: ValidationReport
307
- :raises ValueError: If any of the provided ids is not found
312
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
308
313
  """
309
314
  value = _check_value(value)
310
315
  with get_universe_session() as universe_session, \
@@ -320,18 +325,15 @@ def _valid_term_in_collection(value: str,
320
325
  universe_session: Session,
321
326
  project_session: Session) \
322
327
  -> list[MatchingTerm]:
323
- # [OPTIMIZATION]
328
+ # [OPTIMIZATION]
324
329
  key = value + project_id + collection_id
325
330
  if key in _VALID_TERM_IN_COLLECTION_CACHE:
326
331
  result = _VALID_TERM_IN_COLLECTION_CACHE[key]
327
332
  else:
328
333
  value = _check_value(value)
329
334
  result = list()
330
- collections = _find_collections_in_project(collection_id,
331
- project_session,
332
- None)
333
- if collections:
334
- collection = collections[0]
335
+ collection = _get_collection_in_project(collection_id, project_session)
336
+ if collection:
335
337
  match collection.term_kind:
336
338
  case TermKind.PLAIN:
337
339
  term_id_found = _search_plain_term_and_valid_value(value, collection_id,
@@ -349,8 +351,8 @@ def _valid_term_in_collection(value: str,
349
351
  collection_id=collection_id,
350
352
  term_id=term_id_found))
351
353
  else:
352
- msg = f'unable to find collection {collection_id}'
353
- raise ValueError(msg)
354
+ msg = f"unable to find collection '{collection_id}'"
355
+ raise EsgvocNotFoundError(msg)
354
356
  _VALID_TERM_IN_COLLECTION_CACHE[key] = result
355
357
  return result
356
358
 
@@ -362,11 +364,11 @@ def valid_term_in_collection(value: str,
362
364
  """
363
365
  Check if the given value may or may not represent a term in the given collection. The function
364
366
  returns the terms that the value matches.
365
-
367
+
366
368
  Behavior based on the nature of the term:
367
369
  - plain term: the function try to match the value on the drs_name field.
368
- - term pattern: the function try to match the value on the pattern field (regex).
369
- - term composite:
370
+ - pattern term: the function try to match the value on the pattern field (regex).
371
+ - composite term:
370
372
  - if the composite has got a separator, the function splits the value according to the \
371
373
  separator of the term then it try to match every part of the composite \
372
374
  with every split of the value.
@@ -374,7 +376,7 @@ def valid_term_in_collection(value: str,
374
376
  composite so as to compare it as a regex to the value.
375
377
 
376
378
  If any of the provided ids (`project_id` or `collection_id`) is not found,
377
- the function raises a ValueError.
379
+ the function raises a EsgvocNotFoundError.
378
380
 
379
381
  :param value: A value to be validated
380
382
  :type value: str
@@ -384,7 +386,7 @@ def valid_term_in_collection(value: str,
384
386
  :type collection_id: str
385
387
  :returns: The list of terms that the value matches.
386
388
  :rtype: list[MatchingTerm]
387
- :raises ValueError: If any of the provided ids is not found
389
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
388
390
  """
389
391
  with get_universe_session() as universe_session, \
390
392
  _get_project_session_with_exception(project_id) as project_session:
@@ -408,18 +410,18 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
408
410
  """
409
411
  Check if the given value may or may not represent a term in the given project. The function
410
412
  returns the terms that the value matches.
411
-
413
+
412
414
  Behavior based on the nature of the term:
413
415
  - plain term: the function try to match the value on the drs_name field.
414
- - term pattern: the function try to match the value on the pattern field (regex).
415
- - term composite:
416
+ - pattern term: the function try to match the value on the pattern field (regex).
417
+ - composite term:
416
418
  - if the composite has got a separator, the function splits the value according to the \
417
419
  separator of the term then it try to match every part of the composite \
418
420
  with every split of the value.
419
421
  - if the composite hasn't got a separator, the function aggregates the parts of the \
420
422
  composite so as to compare it as a regex to the value.
421
423
 
422
- If the `project_id` is not found, the function raises a ValueError.
424
+ If the `project_id` is not found, the function raises a EsgvocNotFoundError.
423
425
 
424
426
  :param value: A value to be validated
425
427
  :type value: str
@@ -427,7 +429,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
427
429
  :type project_id: str
428
430
  :returns: The list of terms that the value matches.
429
431
  :rtype: list[MatchingTerm]
430
- :raises ValueError: If the `project_id` is not found
432
+ :raises EsgvocNotFoundError: If the `project_id` is not found
431
433
  """
432
434
  with get_universe_session() as universe_session, \
433
435
  _get_project_session_with_exception(project_id) as project_session:
@@ -438,11 +440,11 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
438
440
  """
439
441
  Check if the given value may or may not represent a term in all projects. The function
440
442
  returns the terms that the value matches.
441
-
443
+
442
444
  Behavior based on the nature of the term:
443
445
  - plain term: the function try to match the value on the drs_name field.
444
- - term pattern: the function try to match the value on the pattern field (regex).
445
- - term composite:
446
+ - pattern term: the function try to match the value on the pattern field (regex).
447
+ - composite term:
446
448
  - if the composite has got a separator, the function splits the value according to the \
447
449
  separator of the term then it try to match every part of the composite \
448
450
  with every split of the value.
@@ -463,422 +465,701 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
463
465
  return result
464
466
 
465
467
 
466
- def _find_terms_in_collection(collection_id: str,
467
- term_id: str,
468
- session: Session,
469
- settings: SearchSettings|None = None) -> Sequence[PTerm]:
470
- # Settings only apply on the term_id comparison.
471
- where_expression = _create_str_comparison_expression(field=PTerm.id,
472
- value=term_id,
473
- settings=settings)
474
- statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
475
- where_expression)
476
- results = session.exec(statement)
477
- result = results.all()
468
+ def get_all_terms_in_collection(project_id: str,
469
+ collection_id: str,
470
+ selected_term_fields: Iterable[str] | None = None)\
471
+ -> list[DataDescriptor]:
472
+ """
473
+ Gets all terms of the given collection of a project.
474
+ This function performs an exact match on the `project_id` and `collection_id`,
475
+ and does not search for similar or related projects and collections.
476
+ If any of the provided ids (`project_id` or `collection_id`) is not found, the function
477
+ returns an empty list.
478
+
479
+ :param project_id: A project id
480
+ :type project_id: str
481
+ :param collection_id: A collection id
482
+ :type collection_id: str
483
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
484
+ fields of the terms are returned. If empty, selects the id and type fields.
485
+ :type selected_term_fields: Iterable[str] | None
486
+ :returns: a list of term instances. Returns an empty list if no matches are found.
487
+ :rtype: list[DataDescriptor]
488
+ """
489
+ result = list()
490
+ if connection := _get_project_connection(project_id):
491
+ with connection.create_session() as session:
492
+ collection = _get_collection_in_project(collection_id, session)
493
+ if collection:
494
+ result = _get_all_terms_in_collection(collection, selected_term_fields)
478
495
  return result
479
496
 
480
497
 
481
- def find_terms_in_collection(project_id:str,
482
- collection_id: str,
483
- term_id: str,
484
- settings: SearchSettings|None = None) \
485
- -> list[BaseModel]:
486
- """
487
- Finds one or more terms, based on the specified search settings, in the given collection of a project.
488
- This function performs an exact match on the `project_id` and `collection_id`,
489
- and does **not** search for similar or related projects and collections.
490
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
491
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
492
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
493
- If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
494
- the function returns an empty list.
495
-
496
- Behavior based on search type:
497
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
498
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
499
- Pydantic term instances in the list.
498
+ def _get_all_collections_in_project(session: Session) -> list[Collection]:
499
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
500
+ # Project can't be missing if session exists.
501
+ return project.collections # type: ignore
502
+
503
+
504
+ def get_all_collections_in_project(project_id: str) -> list[str]:
505
+ """
506
+ Gets all collections of the given project.
507
+ This function performs an exact match on the `project_id` and
508
+ does not search for similar or related projects.
509
+ If the provided `project_id` is not found, the function returns an empty list.
500
510
 
501
511
  :param project_id: A project id
502
512
  :type project_id: str
503
- :param collection_id: A collection
504
- :type collection_id: str
505
- :param term_id: A term id to be found
506
- :type term_id: str
507
- :param settings: The search settings
508
- :type settings: SearchSettings|None
509
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
510
- :rtype: list[BaseModel]
513
+ :returns: A list of collection ids. Returns an empty list if no matches are found.
514
+ :rtype: list[str]
511
515
  """
512
- result: list[BaseModel] = list()
513
- if connection:=_get_project_connection(project_id):
516
+ result = list()
517
+ if connection := _get_project_connection(project_id):
514
518
  with connection.create_session() as session:
515
- terms = _find_terms_in_collection(collection_id, term_id, session, settings)
516
- instantiate_pydantic_terms(terms, result)
519
+ collections = _get_all_collections_in_project(session)
520
+ for collection in collections:
521
+ result.append(collection.id)
517
522
  return result
518
523
 
519
524
 
520
- def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
521
- term_id: str,
522
- session: Session,
523
- settings: SearchSettings|None = None) \
524
- -> Sequence[PTerm]:
525
- # Settings only apply on the term_id comparison.
526
- where_expression = _create_str_comparison_expression(field=PTerm.id,
527
- value=term_id,
528
- settings=settings)
529
- statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
530
- where_expression)
531
- results = session.exec(statement)
532
- result = results.all()
525
+ def _get_all_terms_in_collection(collection: Collection,
526
+ selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
527
+ result: list[DataDescriptor] = list()
528
+ instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
533
529
  return result
534
530
 
535
531
 
536
- def find_terms_from_data_descriptor_in_project(project_id: str,
537
- data_descriptor_id: str,
538
- term_id: str,
539
- settings: SearchSettings|None = None) \
540
- -> list[tuple[BaseModel, str]]:
541
- """
542
- Finds one or more terms in the given project which are instances of the given data descriptor
543
- in the universe, based on the specified search settings, in the given collection of a project.
544
- This function performs an exact match on the `project_id` and `data_descriptor_id`,
545
- and does **not** search for similar or related projects and data descriptors.
546
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
547
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
548
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
549
- If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
550
- the function returns an empty list.
551
-
552
- Behavior based on search type:
553
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
554
- collection id in the list.
555
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
556
- Pydantic term instances and collection ids in the list.
532
+ def get_all_terms_in_project(project_id: str,
533
+ selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
534
+ """
535
+ Gets all terms of the given project.
536
+ This function performs an exact match on the `project_id` and
537
+ does not search for similar or related projects.
538
+ Terms are unique within a collection but may have some synonyms in a project.
539
+ If the provided `project_id` is not found, the function returns an empty list.
557
540
 
558
541
  :param project_id: A project id
559
542
  :type project_id: str
560
- :param data_descriptor_id: A data descriptor
561
- :type data_descriptor_id: str
562
- :param term_id: A term id to be found
563
- :type term_id: str
564
- :param settings: The search settings
565
- :type settings: SearchSettings|None
566
- :returns: A list of tuple of Pydantic term instances and related collection ids. \
567
- Returns an empty list if no matches are found.
568
- :rtype: list[tuple[BaseModel, str]]
543
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
544
+ fields of the terms are returned. If empty, selects the id and type fields.
545
+ :type selected_term_fields: Iterable[str] | None
546
+ :returns: A list of term instances. Returns an empty list if no matches are found.
547
+ :rtype: list[DataDescriptor]
569
548
  """
570
549
  result = list()
571
- if connection:=_get_project_connection(project_id):
550
+ if connection := _get_project_connection(project_id):
572
551
  with connection.create_session() as session:
573
- terms = _find_terms_from_data_descriptor_in_project(data_descriptor_id,
574
- term_id,
575
- session,
576
- settings)
577
- for pterm in terms:
578
- collection_id = pterm.collection.id
579
- term = instantiate_pydantic_term(pterm)
580
- result.append((term, collection_id))
552
+ collections = _get_all_collections_in_project(session)
553
+ for collection in collections:
554
+ # Term may have some synonyms in a project.
555
+ result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
581
556
  return result
582
557
 
583
558
 
584
- def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
585
- term_id: str,
586
- settings: SearchSettings|None = None) \
587
- -> list[tuple[list[tuple[BaseModel, str]], str]]:
559
+ def get_all_terms_in_all_projects(selected_term_fields: Iterable[str] | None = None) \
560
+ -> list[tuple[str, list[DataDescriptor]]]:
588
561
  """
589
- Finds one or more terms in all projects which are instances of the given data descriptor
590
- in the universe, based on the specified search settings, in the given collection of a project.
591
- This function performs an exact match on the `data_descriptor_id`,
592
- and does **not** search for similar or related data descriptors.
593
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
594
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
595
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
596
- If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
597
- the function returns an empty list.
598
-
599
- Behavior based on search type:
600
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
601
- collection id in the list.
602
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
603
- Pydantic term instances and collection ids in the list.
604
-
605
- :param data_descriptor_id: A data descriptor
606
- :type data_descriptor_id: str
607
- :param term_id: A term id to be found
608
- :type term_id: str
609
- :param settings: The search settings
610
- :type settings: SearchSettings|None
611
- :returns: A list of tuple of matching terms with their collection id, per project. \
612
- Returns an empty list if no matches are found.
613
- :rtype: list[tuple[list[tuple[BaseModel, str]], str]]
562
+ Gets all terms of all projects.
563
+
564
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
565
+ fields of the terms are returned. If empty, selects the id and type fields.
566
+ :type selected_term_fields: Iterable[str] | None
567
+ :returns: A list of tuple project_id and term instances of that project.
568
+ :rtype: list[tuple[str, list[DataDescriptor]]]
614
569
  """
615
570
  project_ids = get_all_projects()
616
- result: list[tuple[list[tuple[BaseModel, str]], str]] = list()
571
+ result = list()
617
572
  for project_id in project_ids:
618
- matching_terms = find_terms_from_data_descriptor_in_project(project_id,
619
- data_descriptor_id,
620
- term_id,
621
- settings)
622
- result.append((matching_terms, project_id))
573
+ terms = get_all_terms_in_project(project_id, selected_term_fields)
574
+ result.append((project_id, terms))
623
575
  return result
624
576
 
625
577
 
626
- def _find_terms_in_project(term_id: str,
627
- session: Session,
628
- settings: SearchSettings|None) -> Sequence[PTerm]:
629
- where_expression = _create_str_comparison_expression(field=PTerm.id,
630
- value=term_id,
631
- settings=settings)
632
- statement = select(PTerm).where(where_expression)
633
- results = session.exec(statement).all()
634
- return results
635
-
636
-
637
- def find_terms_in_all_projects(term_id: str,
638
- settings: SearchSettings|None = None) \
639
- -> list[BaseModel]:
640
- """
641
- Finds one or more terms, based on the specified search settings, in all projects.
642
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
643
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
644
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
645
- Terms are unique within a collection but may have some synonyms within a project.
646
- If the provided `term_id` is not found, the function returns an empty list.
647
-
648
- :param term_id: A term id to be found
649
- :type term_id: str
650
- :param settings: The search settings
651
- :type settings: SearchSettings|None
652
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
653
- :rtype: list[BaseModel]
578
+ def get_all_projects() -> list[str]:
654
579
  """
655
- project_ids = get_all_projects()
656
- result = list()
657
- for project_id in project_ids:
658
- result.extend(find_terms_in_project(project_id, term_id, settings))
580
+ Gets all projects.
581
+
582
+ :returns: A list of project ids.
583
+ :rtype: list[str]
584
+ """
585
+ return list(service.current_state.projects.keys())
586
+
587
+
588
+ def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
589
+ statement = select(PTerm).where(PTerm.id == term_id)
590
+ results = session.exec(statement)
591
+ result = results.first() # Term ids are not supposed to be unique within a project.
659
592
  return result
660
593
 
661
594
 
662
- def find_terms_in_project(project_id: str,
663
- term_id: str,
664
- settings: SearchSettings|None = None) \
665
- -> list[BaseModel]:
595
+ def get_term_in_project(project_id: str, term_id: str,
596
+ selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
666
597
  """
667
- Finds one or more terms, based on the specified search settings, in a project.
668
- This function performs an exact match on the `project_id` and
669
- does **not** search for similar or related projects.
670
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
671
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
672
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
673
- Terms are unique within a collection but may have some synonyms within a project.
674
- If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
675
- an empty list.
598
+ Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
599
+ the given term id.
600
+ Terms are unique within a collection but may have some synonyms in a project.
601
+ This function performs an exact match on the `project_id` and `term_id`, and does not search
602
+ for similar or related projects and terms.
603
+ If any of the provided ids (`project_id` or `term_id`) is not found,
604
+ the function returns `None`.
676
605
 
677
- :param project_id: A project id
606
+ :param project_id: The id of the given project.
678
607
  :type project_id: str
679
- :param term_id: A term id to be found
608
+ :param term_id: The id of a term to be found.
680
609
  :type term_id: str
681
- :param settings: The search settings
682
- :type settings: SearchSettings|None
683
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
684
- :rtype: list[BaseModel]
610
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
611
+ fields of the terms are returned. If empty, selects the id and type fields.
612
+ :type selected_term_fields: Iterable[str] | None
613
+ :returns: A term instance. Returns `None` if no match is found.
614
+ :rtype: DataDescriptor | None
685
615
  """
686
- result: list[BaseModel] = list()
687
- if connection:=_get_project_connection(project_id):
616
+ result: DataDescriptor | None = None
617
+ if connection := _get_project_connection(project_id):
688
618
  with connection.create_session() as session:
689
- terms = _find_terms_in_project(term_id, session, settings)
690
- instantiate_pydantic_terms(terms, result)
619
+ term_found = _get_term_in_project(term_id, session)
620
+ if term_found:
621
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
691
622
  return result
692
623
 
693
624
 
694
- def get_all_terms_in_collection(project_id: str,
695
- collection_id: str)\
696
- -> list[BaseModel]:
625
+ def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
626
+ statement = select(PTerm).join(Collection).where(Collection.id == collection_id,
627
+ PTerm.id == term_id)
628
+ results = session.exec(statement)
629
+ result = results.one_or_none()
630
+ return result
631
+
632
+
633
+ def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
634
+ selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
697
635
  """
698
- Gets all terms of the given collection of a project.
699
- This function performs an exact match on the `project_id` and `collection_id`,
700
- and does **not** search for similar or related projects and collections.
701
- If any of the provided ids (`project_id` or `collection_id`) is not found, the function
702
- returns an empty list.
636
+ Returns the term, in the given project and collection,
637
+ whose id corresponds exactly to the given term id.
638
+ This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
639
+ and does not search for similar or related projects, collections and terms.
640
+ If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
641
+ the function returns `None`.
703
642
 
704
- :param project_id: A project id
643
+ :param project_id: The id of the given project.
705
644
  :type project_id: str
706
- :param collection_id: A collection id
645
+ :param collection_id: The id of the given collection.
707
646
  :type collection_id: str
708
- :returns: a list of Pydantic term instances. Returns an empty list if no matches are found.
709
- :rtype: list[BaseModel]
647
+ :param term_id: The id of a term to be found.
648
+ :type term_id: str
649
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
650
+ fields of the terms are returned. If empty, selects the id and type fields.
651
+ :type selected_term_fields: Iterable[str] | None
652
+ :returns: A term instance. Returns `None` if no match is found.
653
+ :rtype: DataDescriptor | None
710
654
  """
711
- result = list()
712
- if connection:=_get_project_connection(project_id):
655
+ result: DataDescriptor | None = None
656
+ if connection := _get_project_connection(project_id):
713
657
  with connection.create_session() as session:
714
- collections = _find_collections_in_project(collection_id,
715
- session,
716
- None)
717
- if collections:
718
- collection = collections[0]
719
- result = _get_all_terms_in_collection(collection)
658
+ term_found = _get_term_in_collection(collection_id, term_id, session)
659
+ if term_found:
660
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
720
661
  return result
721
662
 
722
663
 
723
- def _find_collections_in_project(collection_id: str,
724
- session: Session,
725
- settings: SearchSettings|None) \
726
- -> Sequence[Collection]:
727
- where_exp = _create_str_comparison_expression(field=Collection.id,
728
- value=collection_id,
729
- settings=settings)
730
- statement = select(Collection).where(where_exp)
664
+ def _get_collection_in_project(collection_id: str, session: Session) -> Collection | None:
665
+ statement = select(Collection).where(Collection.id == collection_id)
731
666
  results = session.exec(statement)
732
- result = results.all()
667
+ result = results.one_or_none()
733
668
  return result
734
669
 
735
670
 
736
- def find_collections_in_project(project_id: str,
737
- collection_id: str,
738
- settings: SearchSettings|None = None) \
739
- -> list[dict]:
740
- """
741
- Finds one or more collections of the given project.
742
- This function performs an exact match on the `project_id` and
743
- does **not** search for similar or related projects.
744
- The given `collection_id` is searched according to the search type specified in
745
- the parameter `settings`,
746
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
747
- If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
748
- If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
749
- an empty list.
750
-
751
- Behavior based on search type:
752
- - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
753
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
754
- collection contexts in the list.
671
+ def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
672
+ """
673
+ Returns the collection, in the given project, whose id corresponds exactly to
674
+ the given collection id.
675
+ This function performs an exact match on the `project_id` and `collection_id`, and does not search
676
+ for similar or related projects and collections.
677
+ If any of the provided ids (`project_id` or `collection_id`) is not found,
678
+ the function returns `None`.
755
679
 
756
- :param project_id: A project id
680
+ :param project_id: The id of the given project.
757
681
  :type project_id: str
758
- :param collection_id: A collection id to be found
682
+ :param collection_id: The id of a collection to be found.
759
683
  :type collection_id: str
760
- :param settings: The search settings
761
- :type settings: SearchSettings|None
762
- :returns: A list of collection contexts. Returns an empty list if no matches are found.
763
- :rtype: list[dict]
684
+ :returns: A collection id and context. Returns `None` if no match is found.
685
+ :rtype: tuple[str, dict] | None
764
686
  """
765
- result = list()
766
- if connection:=_get_project_connection(project_id):
687
+ result: tuple[str, dict] | None = None
688
+ if connection := _get_project_connection(project_id):
767
689
  with connection.create_session() as session:
768
- collections = _find_collections_in_project(collection_id,
769
- session,
770
- settings)
771
- for collection in collections:
772
- result.append(collection.context)
690
+ collection_found = _get_collection_in_project(collection_id, session)
691
+ if collection_found:
692
+ result = collection_found.id, collection_found.context
773
693
  return result
774
694
 
775
695
 
776
- def _get_all_collections_in_project(session: Session) -> list[Collection]:
777
- project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
778
- # Project can't be missing if session exists.
779
- return project.collections # type: ignore
780
-
781
-
782
- def get_all_collections_in_project(project_id: str) -> list[str]:
696
+ def get_project(project_id: str) -> ProjectSpecs | None:
783
697
  """
784
- Gets all collections of the given project.
785
- This function performs an exact match on the `project_id` and
786
- does **not** search for similar or related projects.
787
- If the provided `project_id` is not found, the function returns an empty list.
698
+ Get a project and returns its specifications.
699
+ This function performs an exact match on the `project_id` and
700
+ does not search for similar or related projects.
701
+ If the provided `project_id` is not found, the function returns `None`.
788
702
 
789
- :param project_id: A project id
703
+ :param project_id: A project id to be found
790
704
  :type project_id: str
791
- :returns: A list of collection ids. Returns an empty list if no matches are found.
792
- :rtype: list[str]
705
+ :returns: The specs of the project found. Returns `None` if no matches are found.
706
+ :rtype: ProjectSpecs | None
793
707
  """
794
- result = list()
795
- if connection:=_get_project_connection(project_id):
708
+ result: ProjectSpecs | None = None
709
+ if connection := _get_project_connection(project_id):
796
710
  with connection.create_session() as session:
797
- collections = _get_all_collections_in_project(session)
798
- for collection in collections:
799
- result.append(collection.id)
711
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
712
+ try:
713
+ # Project can't be missing if session exists.
714
+ result = ProjectSpecs(**project.specs) # type: ignore
715
+ except Exception as e:
716
+ msg = f"unable to read specs in project '{project_id}'"
717
+ raise EsgvocDbError(msg) from e
800
718
  return result
801
719
 
802
720
 
803
- def _get_all_terms_in_collection(collection: Collection) -> list[BaseModel]:
804
- result: list[BaseModel] = list()
805
- instantiate_pydantic_terms(collection.terms, result)
721
+ def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
722
+ session: Session) -> Collection | None:
723
+ statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
724
+ result = session.exec(statement).one_or_none()
806
725
  return result
807
726
 
808
727
 
809
- def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
728
+ def get_collection_from_data_descriptor_in_project(project_id: str,
729
+ data_descriptor_id: str) \
730
+ -> tuple[str, dict] | None:
810
731
  """
811
- Gets all terms of the given project.
812
- This function performs an exact match on the `project_id` and
813
- does **not** search for similar or related projects.
814
- Terms are unique within a collection but may have some synonyms in a project.
815
- If the provided `project_id` is not found, the function returns an empty list.
816
-
817
- :param project_id: A project id
732
+ Returns the collection, in the given project, that corresponds to the given data descriptor
733
+ in the universe.
734
+ This function performs an exact match on the `project_id` and `data_descriptor_id`,
735
+ and does not search for similar or related projects and data descriptors.
736
+ If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
737
+ there is no collection corresponding to the given data descriptor, the function returns `None`.
738
+
739
+ :param project_id: The id of the given project.
818
740
  :type project_id: str
819
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
820
- :rtype: list[BaseModel]
741
+ :param data_descriptor_id: The id of the given data descriptor.
742
+ :type data_descriptor_id: str
743
+ :returns: A collection id and context. Returns `None` if no matches are found.
744
+ :rtype: tuple[str, dict] | None
745
+ """
746
+ result: tuple[str, dict] | None = None
747
+ if connection := _get_project_connection(project_id):
748
+ with connection.create_session() as session:
749
+ collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
750
+ session)
751
+ if collection_found:
752
+ result = collection_found.id, collection_found.context
753
+ return result
754
+
755
+
756
+ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) \
757
+ -> list[tuple[str, str, dict]]:
758
+ """
759
+ Returns the collections, in all projects, that correspond to the given data descriptor
760
+ in the universe.
761
+ This function performs an exact match on `data_descriptor_id`,
762
+ and does not search for similar or related data descriptors.
763
+ If the provided `data_descriptor_id` is not found, or if
764
+ there is no collection corresponding to the given data descriptor, the function returns
765
+ an empty list.
766
+
767
+ :param data_descriptor_id: The id of the given data descriptor.
768
+ :type data_descriptor_id: str
769
+ :returns: A list of collection ids, their project_ids and contexts. \
770
+ Returns an empty list if no matches are found.
771
+ :rtype: list[tuple[str, str, dict]]
821
772
  """
822
773
  result = list()
823
- if connection:=_get_project_connection(project_id):
774
+ project_ids = get_all_projects()
775
+ for project_id in project_ids:
776
+ collection_found = get_collection_from_data_descriptor_in_project(project_id,
777
+ data_descriptor_id)
778
+ if collection_found:
779
+ result.append((project_id, collection_found[0], collection_found[1]))
780
+ return result
781
+
782
+
783
+ def _get_term_from_universe_term_id_in_project(data_descriptor_id: str,
784
+ universe_term_id: str,
785
+ project_session: Session) -> PTerm | None:
786
+ statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id == data_descriptor_id,
787
+ PTerm.id == universe_term_id)
788
+ results = project_session.exec(statement)
789
+ result = results.one_or_none()
790
+ return result
791
+
792
+
793
+ def get_term_from_universe_term_id_in_project(project_id: str,
794
+ data_descriptor_id: str,
795
+ universe_term_id: str,
796
+ selected_term_fields: Iterable[str] | None = None) \
797
+ -> tuple[str, DataDescriptor] | None:
798
+ """
799
+ Returns the term, in the given project, that corresponds to the given term in the universe.
800
+ This function performs an exact match on the `project_id`, `data_descriptor_id`
801
+ and `universe_term_id`, and does not search for similar or related projects, data descriptors
802
+ and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
803
+ is not found, or if there is no project term corresponding to the given universe term
804
+ the function returns `None`.
805
+
806
+ :param project_id: The id of the given project.
807
+ :type project_id: str
808
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
809
+ :type data_descriptor_id: str
810
+ :param universe_term_id: The id of the given universe term.
811
+ :type universe_term_id: str
812
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
813
+ fields of the terms are returned. If empty, selects the id and type fields.
814
+ :type selected_term_fields: Iterable[str] | None
815
+ :returns: A collection id and the project term instance. Returns `None` if no matches are found.
816
+ :rtype: tuple[str, DataDescriptor] | None
817
+ """
818
+ result: tuple[str, DataDescriptor] | None = None
819
+ if connection := _get_project_connection(project_id):
824
820
  with connection.create_session() as session:
825
- collections = _get_all_collections_in_project(session)
826
- for collection in collections:
827
- # Term may have some synonyms in a project.
828
- result.extend(_get_all_terms_in_collection(collection))
821
+ term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
822
+ universe_term_id,
823
+ session)
824
+ if term_found:
825
+ pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
826
+ result = (term_found.collection.id, pydantic_term)
829
827
  return result
830
828
 
831
829
 
832
- def get_all_terms_in_all_projects() -> list[BaseModel]:
830
+ def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
831
+ universe_term_id: str,
832
+ selected_term_fields: Iterable[str] | None = None) \
833
+ -> list[tuple[str, str, DataDescriptor]]:
833
834
  """
834
- Gets all terms of all projects.
835
+ Returns the terms, in all projects, that correspond to the given term in the universe.
836
+ This function performs an exact match on the `data_descriptor_id`
837
+ and `universe_term_id`, and does not search for similar or related data descriptors
838
+ and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
839
+ is not found, or if there is no project term corresponding to the given universe term
840
+ the function returns an empty list.
835
841
 
836
- :returns: A list of Pydantic term instances.
837
- :rtype: list[BaseModel]
842
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
843
+ :type data_descriptor_id: str
844
+ :param universe_term_id: The id of the given universe term.
845
+ :type universe_term_id: str
846
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
847
+ fields of the terms are returned. If empty, selects the id and type fields.
848
+ :type selected_term_fields: Iterable[str] | None
849
+ :returns: A project_id, collection id and the project term instance. \
850
+ Returns an empty list if no matches are found.
851
+ :rtype: list[tuple[str, str, DataDescriptor]]
838
852
  """
853
+ result: list[tuple[str, str, DataDescriptor]] = list()
839
854
  project_ids = get_all_projects()
840
- result = list()
841
855
  for project_id in project_ids:
842
- result.extend(get_all_terms_in_project(project_id))
856
+ term_found = get_term_from_universe_term_id_in_project(project_id,
857
+ data_descriptor_id,
858
+ universe_term_id,
859
+ selected_term_fields)
860
+ if term_found:
861
+ result.append((project_id, term_found[0], term_found[1]))
843
862
  return result
844
863
 
845
864
 
846
- def find_project(project_id: str) -> dict|None:
865
+ def _find_collections_in_project(expression: str,
866
+ session: Session,
867
+ only_id: bool = False,
868
+ limit: int | None = None,
869
+ offset: int | None = None) -> Sequence[Collection]:
870
+ matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
871
+ tmp_statement = select(PCollectionFTS5).where(matching_condition)
872
+ statement = select(Collection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
873
+ return execute_match_statement(expression, statement, session)
874
+
875
+
876
+ def find_collections_in_project(expression: str, project_id: str,
877
+ only_id: bool = False,
878
+ limit: int | None = None,
879
+ offset: int | None = None) -> list[tuple[str, dict]]:
847
880
  """
848
- Finds a project.
849
- This function performs an exact match on the `project_id` and
850
- does **not** search for similar or related projects.
851
- If the provided `project_id` is not found, the function returns `None`.
852
-
853
- :param project_id: A project id to be found
881
+ Find collections in the given project based on a full text search defined by the given `expression`.
882
+ The `expression` comes from the powerful
883
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
884
+ and corresponds to the expression of the `MATCH` operator.
885
+ It can be composed of one or multiple keywords combined with boolean
886
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
887
+ with the wildcard `*`.
888
+ The function returns a list of collection ids and contexts, sorted according to the
889
+ bm25 ranking metric (list index `0` has the highest rank).
890
+ This function performs an exact match on the `project_id`,
891
+ and does not search for similar or related projects.
892
+ If the provided `expression` does not hit any collection or the given `project_id` does not
893
+ match exactly to an id of a project, the function returns an empty list.
894
+ The function searches for the `expression` in the collection specifications.
895
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
896
+ collections. **At the moment, `only_id` is set to `True` as the collections
897
+ haven't got any description.**
898
+
899
+ :param expression: The full text search expression.
900
+ :type expression: str
901
+ :param project_id: The id of the given project.
854
902
  :type project_id: str
855
- :returns: The specs of the project found. Returns `None` if no matches are found.
856
- :rtype: dict|None
903
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
904
+ :type only_id: bool
905
+ :param limit: Limit the number of returned items found. Returns all items found the if \
906
+ `limit` is either `None`, zero or negative.
907
+ :type limit: int | None
908
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
909
+ either `None`, zero or negative.
910
+ :type offset: int | None
911
+ :returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
912
+ :rtype: list[tuple[str, dict]]
913
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
857
914
  """
858
- result = None
859
- if connection:=_get_project_connection(project_id):
915
+ result: list[tuple[str, dict]] = list()
916
+ if connection := _get_project_connection(project_id):
860
917
  with connection.create_session() as session:
861
- project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
862
- # Project can't be missing if session exists.
863
- result = project.specs # type: ignore
918
+ collections_found = _find_collections_in_project(expression, session, only_id,
919
+ limit, offset)
920
+ for collection in collections_found:
921
+ result.append((collection.id, collection.context))
864
922
  return result
865
923
 
866
924
 
867
- def get_all_projects() -> list[str]:
925
+ def _find_terms_in_collection(expression: str,
926
+ collection_id: str,
927
+ session: Session,
928
+ only_id: bool = False,
929
+ limit: int | None = None,
930
+ offset: int | None = None) -> Sequence[PTerm]:
931
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
932
+ where_condition = Collection.id == collection_id, matching_condition
933
+ tmp_statement = select(PTermFTS5).join(Collection).where(*where_condition)
934
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
935
+ return execute_match_statement(expression, statement, session)
936
+
937
+
938
+ def _find_terms_in_project(expression: str,
939
+ session: Session,
940
+ only_id: bool = False,
941
+ limit: int | None = None,
942
+ offset: int | None = None) -> Sequence[PTerm]:
943
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
944
+ tmp_statement = select(PTermFTS5).where(matching_condition)
945
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
946
+ return execute_match_statement(expression, statement, session)
947
+
948
+
949
+ def find_terms_in_collection(expression: str, project_id: str,
950
+ collection_id: str,
951
+ only_id: bool = False,
952
+ limit: int | None = None,
953
+ offset: int | None = None,
954
+ selected_term_fields: Iterable[str] | None = None) \
955
+ -> list[DataDescriptor]:
868
956
  """
869
- Gets all projects.
870
-
871
- :returns: A list of project ids.
872
- :rtype: list[str]
957
+ Find terms in the given project and collection based on a full text search defined by the given
958
+ `expression`. The `expression` comes from the powerful
959
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
960
+ and corresponds to the expression of the `MATCH` operator.
961
+ It can be composed of one or multiple keywords combined with boolean
962
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
963
+ with the wildcard `*`.
964
+ The function returns a list of term instances, sorted according to the
965
+ bm25 ranking metric (list index `0` has the highest rank).
966
+ This function performs an exact match on the `project_id` and `collection_id`,
967
+ and does not search for similar or related projects and collections.
968
+ If the provided `expression` does not hit any term or if any of the provided ids
969
+ (`project_id` or `collection_id`) is not found, the function returns an empty list.
970
+ The function searches for the `expression` in the term specifications.
971
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
972
+ terms.
973
+
974
+ :param expression: The full text search expression.
975
+ :type expression: str
976
+ :param project_id: The id of the given project.
977
+ :type project_id: str
978
+ :param collection_id: The id of the given collection.
979
+ :type collection_id: str
980
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
981
+ :type only_id: bool
982
+ :param limit: Limit the number of returned items found. Returns all items found the if \
983
+ `limit` is either `None`, zero or negative.
984
+ :type limit: int | None
985
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
986
+ either `None`, zero or negative.
987
+ :type offset: int | None
988
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
989
+ fields of the terms are returned. If empty, selects the id and type fields.
990
+ :type selected_term_fields: Iterable[str] | None
991
+ :returns: A list of term instances. Returns an empty list if no matches are found.
992
+ :rtype: list[DataDescriptor]
993
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
873
994
  """
874
- return list(service.state_service.projects.keys())
995
+ result: list[DataDescriptor] = list()
996
+ if connection := _get_project_connection(project_id):
997
+ with connection.create_session() as session:
998
+ pterms_found = _find_terms_in_collection(expression, collection_id, session,
999
+ only_id, limit, offset)
1000
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1001
+ return result
875
1002
 
876
1003
 
877
- if __name__ == "__main__":
878
- vr = valid_term('r1i1p1f111', 'cmip6plus', 'member_id', 'ripf')
879
- if vr:
880
- print('OK')
881
- else:
882
- print(vr)
883
- for error in vr.errors:
884
- print(error)
1004
+ def find_terms_in_project(expression: str,
1005
+ project_id: str,
1006
+ only_id: bool = False,
1007
+ limit: int | None = None,
1008
+ offset: int | None = None,
1009
+ selected_term_fields: Iterable[str] | None = None) \
1010
+ -> list[DataDescriptor]:
1011
+ """
1012
+ Find terms in the given project on a full text search defined by the given
1013
+ `expression`. The `expression` comes from the powerful
1014
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1015
+ and corresponds to the expression of the `MATCH` operator.
1016
+ It can be composed of one or multiple keywords combined with boolean
1017
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1018
+ with the wildcard `*`.
1019
+ The function returns a list of term instances, sorted according to the
1020
+ bm25 ranking metric (list index `0` has the highest rank).
1021
+ This function performs an exact match on the `project_id`,
1022
+ and does not search for similar or related projects.
1023
+ If the provided `expression` does not hit any term or if any of the provided `project_id` is
1024
+ not found, the function returns an empty list.
1025
+ The function searches for the `expression` in the term specifications.
1026
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1027
+ terms.
1028
+
1029
+ :param expression: The full text search expression.
1030
+ :type expression: str
1031
+ :param project_id: The id of the given project.
1032
+ :type project_id: str
1033
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1034
+ :type only_id: bool
1035
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1036
+ `limit` is either `None`, zero or negative.
1037
+ :type limit: int | None
1038
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1039
+ either `None`, zero or negative.
1040
+ :type offset: int | None
1041
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
1042
+ fields of the terms are returned. If empty, selects the id and type fields.
1043
+ :type selected_term_fields: Iterable[str] | None
1044
+ :returns: A list of term instances. Returns an empty list if no matches are found.
1045
+ :rtype: list[DataDescriptor]
1046
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1047
+ """
1048
+ result: list[DataDescriptor] = list()
1049
+ if connection := _get_project_connection(project_id):
1050
+ with connection.create_session() as session:
1051
+ pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
1052
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1053
+ return result
1054
+
1055
+
1056
+ def find_terms_in_all_projects(expression: str,
1057
+ only_id: bool = False,
1058
+ limit: int | None = None,
1059
+ offset: int | None = None,
1060
+ selected_term_fields: Iterable[str] | None = None) \
1061
+ -> list[tuple[str, list[DataDescriptor]]]:
1062
+ """
1063
+ Find terms in the all projects on a full text search defined by the given
1064
+ `expression`. The `expression` comes from the powerful
1065
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1066
+ and corresponds to the expression of the `MATCH` operator.
1067
+ It can be composed of one or multiple keywords combined with boolean
1068
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1069
+ with the wildcard `*`.
1070
+ The function returns a list of project ids and term instances, sorted according to the
1071
+ bm25 ranking metric (list index `0` has the highest rank).
1072
+ If the provided `expression` does not hit any term, the function returns an empty list.
1073
+ The function searches for the `expression` in the term specifications.
1074
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1075
+ terms.
1076
+
1077
+ :param expression: The full text search expression.
1078
+ :type expression: str
1079
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1080
+ :type only_id: bool
1081
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1082
+ `limit` is either `None`, zero or negative.
1083
+ :type limit: int | None
1084
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1085
+ either `None`, zero or negative.
1086
+ :type offset: int | None
1087
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
1088
+ fields of the terms are returned. If empty, selects the id and type fields.
1089
+ :type selected_term_fields: Iterable[str] | None
1090
+ :returns: A list of project ids and term instances. Returns an empty list if no matches are found.
1091
+ :rtype: list[tuple[str, list[DataDescriptor]]]
1092
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1093
+ """
1094
+ result: list[tuple[str, list[DataDescriptor]]] = list()
1095
+ project_ids = get_all_projects()
1096
+ for project_id in project_ids:
1097
+ terms_found = find_terms_in_project(expression, project_id, only_id,
1098
+ limit, offset, selected_term_fields)
1099
+ if terms_found:
1100
+ result.append((project_id, terms_found))
1101
+ return result
1102
+
1103
+
1104
+ def find_items_in_project(expression: str,
1105
+ project_id: str,
1106
+ only_id: bool = False,
1107
+ limit: int | None = None,
1108
+ offset: int | None = None) -> list[Item]:
1109
+ """
1110
+ Find items, at the moment terms and collections, in the given project based on a full-text
1111
+ search defined by the given `expression`. The `expression` comes from the powerful
1112
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1113
+ and corresponds to the expression of the `MATCH` operator.
1114
+ It can be composed of one or multiple keywords combined with boolean
1115
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1116
+ with the wildcard `*`.
1117
+ The function returns a list of item instances sorted according to the
1118
+ bm25 ranking metric (list index `0` has the highest rank).
1119
+ This function performs an exact match on the `project_id`,
1120
+ and does not search for similar or related projects.
1121
+ If the provided `expression` does not hit any item, or the provided `project_id` is not found,
1122
+ the function returns an empty list.
1123
+ The function searches for the `expression` in the term and collection specifications.
1124
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1125
+ terms and collections. **At the moment, `only_id` is set to `True` for the collections because
1126
+ they haven't got any description.**
1127
+
1128
+ :param expression: The full text search expression.
1129
+ :type expression: str
1130
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1131
+ :type only_id: bool
1132
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1133
+ `limit` is either `None`, zero or negative.
1134
+ :type limit: int | None
1135
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1136
+ either `None`, zero or negative.
1137
+ :type offset: int | None
1138
+ :returns: A list of item instances. Returns an empty list if no matches are found.
1139
+ :rtype: list[Item]
1140
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1141
+ """
1142
+ # TODO: execute union query when it will be possible to compute parent of terms and collections.
1143
+ result = list()
1144
+ if connection := _get_project_connection(project_id):
1145
+ with connection.create_session() as session:
1146
+ if only_id:
1147
+ collection_column = col(PCollectionFTS5.id)
1148
+ term_column = col(PTermFTS5.id)
1149
+ else:
1150
+ collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
1151
+ term_column = col(PTermFTS5.specs) # type: ignore
1152
+ collection_where_condition = collection_column.match(expression)
1153
+ collection_statement = select(PCollectionFTS5.id,
1154
+ text("'collection' AS TYPE"),
1155
+ text(f"'{project_id}' AS TYPE"),
1156
+ text('rank')).where(collection_where_condition)
1157
+ term_where_condition = term_column.match(expression)
1158
+ term_statement = select(PTermFTS5.id,
1159
+ text("'term' AS TYPE"),
1160
+ Collection.id,
1161
+ text('rank')).join(Collection) \
1162
+ .where(term_where_condition)
1163
+ result = execute_find_item_statements(session, expression, collection_statement,
1164
+ term_statement, limit, offset)
1165
+ return result