esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (87) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +95 -60
  3. esgvoc/api/data_descriptors/__init__.py +50 -28
  4. esgvoc/api/data_descriptors/activity.py +3 -3
  5. esgvoc/api/data_descriptors/area_label.py +16 -1
  6. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  8. esgvoc/api/data_descriptors/consortium.py +14 -13
  9. esgvoc/api/data_descriptors/contact.py +5 -0
  10. esgvoc/api/data_descriptors/conventions.py +6 -0
  11. esgvoc/api/data_descriptors/creation_date.py +5 -0
  12. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  13. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  14. esgvoc/api/data_descriptors/date.py +1 -1
  15. esgvoc/api/data_descriptors/directory_date.py +1 -1
  16. esgvoc/api/data_descriptors/experiment.py +13 -11
  17. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  18. esgvoc/api/data_descriptors/frequency.py +3 -3
  19. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  20. esgvoc/api/data_descriptors/grid_label.py +2 -2
  21. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  22. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  23. esgvoc/api/data_descriptors/institution.py +8 -5
  24. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  25. esgvoc/api/data_descriptors/license.py +3 -3
  26. esgvoc/api/data_descriptors/mip_era.py +1 -1
  27. esgvoc/api/data_descriptors/model_component.py +1 -1
  28. esgvoc/api/data_descriptors/obs_type.py +5 -0
  29. esgvoc/api/data_descriptors/organisation.py +1 -1
  30. esgvoc/api/data_descriptors/physic_index.py +1 -1
  31. esgvoc/api/data_descriptors/product.py +2 -2
  32. esgvoc/api/data_descriptors/publication_status.py +5 -0
  33. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  34. esgvoc/api/data_descriptors/realm.py +1 -1
  35. esgvoc/api/data_descriptors/region.py +5 -0
  36. esgvoc/api/data_descriptors/resolution.py +3 -3
  37. esgvoc/api/data_descriptors/source.py +9 -5
  38. esgvoc/api/data_descriptors/source_type.py +1 -1
  39. esgvoc/api/data_descriptors/table.py +3 -2
  40. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  41. esgvoc/api/data_descriptors/time_range.py +4 -3
  42. esgvoc/api/data_descriptors/title.py +5 -0
  43. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  44. esgvoc/api/data_descriptors/variable.py +25 -12
  45. esgvoc/api/data_descriptors/variant_label.py +3 -3
  46. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  47. esgvoc/api/project_specs.py +120 -4
  48. esgvoc/api/projects.py +733 -505
  49. esgvoc/api/py.typed +0 -0
  50. esgvoc/api/report.py +12 -8
  51. esgvoc/api/search.py +168 -98
  52. esgvoc/api/universe.py +368 -157
  53. esgvoc/apps/drs/constants.py +1 -1
  54. esgvoc/apps/drs/generator.py +51 -69
  55. esgvoc/apps/drs/report.py +60 -15
  56. esgvoc/apps/drs/validator.py +60 -71
  57. esgvoc/apps/jsg/cmip6_template.json +74 -0
  58. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  59. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  60. esgvoc/apps/py.typed +0 -0
  61. esgvoc/cli/config.py +500 -0
  62. esgvoc/cli/drs.py +3 -2
  63. esgvoc/cli/find.py +138 -0
  64. esgvoc/cli/get.py +46 -38
  65. esgvoc/cli/main.py +10 -3
  66. esgvoc/cli/status.py +27 -18
  67. esgvoc/cli/valid.py +10 -15
  68. esgvoc/core/constants.py +1 -1
  69. esgvoc/core/db/__init__.py +2 -4
  70. esgvoc/core/db/connection.py +5 -3
  71. esgvoc/core/db/models/project.py +57 -15
  72. esgvoc/core/db/models/universe.py +49 -10
  73. esgvoc/core/db/project_ingestion.py +79 -65
  74. esgvoc/core/db/universe_ingestion.py +71 -40
  75. esgvoc/core/exceptions.py +33 -0
  76. esgvoc/core/logging_handler.py +24 -2
  77. esgvoc/core/repo_fetcher.py +61 -59
  78. esgvoc/core/service/data_merger.py +47 -34
  79. esgvoc/core/service/state.py +107 -83
  80. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  81. esgvoc-1.0.0.dist-info/RECORD +95 -0
  82. esgvoc/api/_utils.py +0 -53
  83. esgvoc/core/logging.conf +0 -21
  84. esgvoc-0.3.0.dist-info/RECORD +0 -78
  85. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  86. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  87. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py CHANGED
@@ -1,31 +1,39 @@
1
1
  import re
2
- from collections.abc import Iterable, Sequence
2
+ from typing import Iterable, Sequence
3
3
 
4
- from sqlmodel import Session, and_, select
4
+ from sqlalchemy import text
5
+ from sqlmodel import Session, and_, col, select
5
6
 
6
7
  import esgvoc.api.universe as universe
7
8
  import esgvoc.core.constants as constants
8
9
  import esgvoc.core.service as service
9
- from esgvoc.api._utils import (APIException, get_universe_session,
10
- instantiate_pydantic_term,
11
- instantiate_pydantic_terms)
12
10
  from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
13
11
  from esgvoc.api.project_specs import ProjectSpecs
14
- from esgvoc.api.report import (ProjectTermError, UniverseTermError,
15
- ValidationReport)
16
- from esgvoc.api.search import (MatchingTerm, SearchSettings,
17
- _create_str_comparison_expression)
12
+ from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
13
+ from esgvoc.api.search import (
14
+ Item,
15
+ MatchingTerm,
16
+ execute_find_item_statements,
17
+ execute_match_statement,
18
+ generate_matching_condition,
19
+ get_universe_session,
20
+ handle_rank_limit_offset,
21
+ instantiate_pydantic_term,
22
+ instantiate_pydantic_terms,
23
+ process_expression,
24
+ )
18
25
  from esgvoc.core.db.connection import DBConnection
19
26
  from esgvoc.core.db.models.mixins import TermKind
20
- from esgvoc.core.db.models.project import Collection, Project, PTerm
27
+ from esgvoc.core.db.models.project import PCollection, PCollectionFTS5, Project, PTerm, PTermFTS5
21
28
  from esgvoc.core.db.models.universe import UTerm
29
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
22
30
 
23
31
  # [OPTIMIZATION]
24
32
  _VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
25
- _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
33
+ _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
26
34
 
27
35
 
28
- def _get_project_connection(project_id: str) -> DBConnection|None:
36
+ def _get_project_connection(project_id: str) -> DBConnection | None:
29
37
  if project_id in service.current_state.projects:
30
38
  return service.current_state.projects[project_id].db_connection
31
39
  else:
@@ -33,38 +41,32 @@ def _get_project_connection(project_id: str) -> DBConnection|None:
33
41
 
34
42
 
35
43
  def _get_project_session_with_exception(project_id: str) -> Session:
36
- if connection:=_get_project_connection(project_id):
44
+ if connection := _get_project_connection(project_id):
37
45
  project_session = connection.create_session()
38
46
  return project_session
39
47
  else:
40
- raise APIException(f'unable to find project {project_id}')
48
+ raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
41
49
 
42
50
 
43
- def _resolve_term(composite_term_part: dict,
44
- universe_session: Session,
45
- project_session: Session) -> UTerm|PTerm:
51
+ def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
46
52
  # First find the term in the universe than in the current project
47
53
  term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
48
54
  term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
49
- uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
50
- term_id=term_id,
51
- session=universe_session,
52
- settings=None)
53
- if uterms:
54
- return uterms[0]
55
+ uterm = universe._get_term_in_data_descriptor(
56
+ data_descriptor_id=term_type, term_id=term_id, session=universe_session
57
+ )
58
+ if uterm:
59
+ return uterm
55
60
  else:
56
- pterms = _find_terms_in_collection(collection_id=term_type,
57
- term_id=term_id,
58
- session=project_session,
59
- settings=None)
60
- if pterms:
61
- return pterms[0]
61
+ pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
62
+ if pterm:
63
+ return pterm
62
64
  else:
63
- msg = f'unable to find the term {term_id} in {term_type}'
64
- raise RuntimeError(msg)
65
+ msg = f"unable to find the term '{term_id}' in '{term_type}'"
66
+ raise EsgvocNotFoundError(msg)
65
67
 
66
68
 
67
- def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
69
+ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
68
70
  separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
69
71
  parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
70
72
  return separator, parts
@@ -72,11 +74,9 @@ def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
72
74
 
73
75
  # TODO: support optionality of parts of composite.
74
76
  # It is backtrack possible for more than one missing parts.
75
- def _valid_value_composite_term_with_separator(value: str,
76
- term: UTerm|PTerm,
77
- universe_session: Session,
78
- project_session: Session)\
79
- -> list[UniverseTermError|ProjectTermError]:
77
+ def _valid_value_composite_term_with_separator(
78
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
79
+ ) -> list[UniverseTermError | ProjectTermError]:
80
80
  result = list()
81
81
  separator, parts = _get_composite_term_separator_parts(term)
82
82
  if separator in value:
@@ -84,14 +84,25 @@ def _valid_value_composite_term_with_separator(value: str,
84
84
  if len(splits) == len(parts):
85
85
  for index in range(0, len(splits)):
86
86
  given_value = splits[index]
87
- resolved_term = _resolve_term(parts[index],
88
- universe_session,
89
- project_session)
90
- errors = _valid_value(given_value,
91
- resolved_term,
92
- universe_session,
93
- project_session)
94
- result.extend(errors)
87
+ if "id" not in parts[index].keys():
88
+ terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
89
+ parts[index]["id"] = [term.id for term in terms]
90
+ if type(parts[index]["id"]) is str:
91
+ parts[index]["id"] = [parts[index]["id"]]
92
+
93
+ errors_list = list()
94
+ for id in parts[index]["id"]:
95
+ part_parts = dict(parts[index])
96
+ part_parts["id"] = id
97
+ resolved_term = _resolve_term(part_parts, universe_session, project_session)
98
+ errors = _valid_value(given_value, resolved_term, universe_session, project_session)
99
+ if len(errors) == 0:
100
+ errors_list = errors
101
+ break
102
+ else:
103
+ errors_list.extend(errors)
104
+ else:
105
+ result.append(_create_term_error(value, term))
95
106
  else:
96
107
  result.append(_create_term_error(value, term))
97
108
  else:
@@ -99,38 +110,33 @@ def _valid_value_composite_term_with_separator(value: str,
99
110
  return result
100
111
 
101
112
 
102
- def _transform_to_pattern(term: UTerm|PTerm,
103
- universe_session: Session,
104
- project_session: Session) -> str:
113
+ def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
105
114
  match term.kind:
106
115
  case TermKind.PLAIN:
107
116
  if constants.DRS_SPECS_JSON_KEY in term.specs:
108
117
  result = term.specs[constants.DRS_SPECS_JSON_KEY]
109
118
  else:
110
- raise APIException(f"the term {term.id} doesn't have drs name. " +
111
- "Can't validate it.")
119
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
112
120
  case TermKind.PATTERN:
113
121
  result = term.specs[constants.PATTERN_JSON_KEY]
114
122
  case TermKind.COMPOSITE:
115
- separator, parts = _get_composite_term_separator_parts(term)
123
+ separator, parts = _get_composite_term_separator_parts(term)
116
124
  result = ""
117
125
  for part in parts:
118
126
  resolved_term = _resolve_term(part, universe_session, project_session)
119
127
  pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
120
- result = f'{result}{pattern}{separator}'
128
+ result = f"{result}{pattern}{separator}"
121
129
  result = result.rstrip(separator)
122
130
  case _:
123
- raise RuntimeError(f'unsupported term kind {term.kind}')
131
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
124
132
  return result
125
133
 
126
134
 
127
135
  # TODO: support optionality of parts of composite.
128
136
  # It is backtrack possible for more than one missing parts.
129
- def _valid_value_composite_term_separator_less(value: str,
130
- term: UTerm|PTerm,
131
- universe_session: Session,
132
- project_session: Session)\
133
- -> list[UniverseTermError|ProjectTermError]:
137
+ def _valid_value_composite_term_separator_less(
138
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
139
+ ) -> list[UniverseTermError | ProjectTermError]:
134
140
  result = list()
135
141
  try:
136
142
  pattern = _transform_to_pattern(term, universe_session, project_session)
@@ -139,50 +145,45 @@ def _valid_value_composite_term_separator_less(value: str,
139
145
  # So their regex are defined as a whole (begins by a ^, ends by a $).
140
146
  # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
141
147
  # The later, must be removed.
142
- pattern = pattern.replace('^', '').replace('$', '')
143
- pattern = f'^{pattern}$'
148
+ pattern = pattern.replace("^", "").replace("$", "")
149
+ pattern = f"^{pattern}$"
144
150
  regex = re.compile(pattern)
145
151
  except Exception as e:
146
- msg = f'regex compilation error while processing term {term.id}:\n{e}'
147
- raise RuntimeError(msg) from e
152
+ msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
153
+ raise EsgvocDbError(msg) from e
148
154
  match = regex.match(value)
149
155
  if match is None:
150
156
  result.append(_create_term_error(value, term))
151
157
  return result
152
158
  except Exception as e:
153
- msg = f'cannot validate separator less composite term {term.id}:\n{e}'
154
- raise RuntimeError(msg) from e
159
+ msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
160
+ raise EsgvocNotImplementedError(msg) from e
155
161
 
156
162
 
157
- def _valid_value_for_composite_term(value: str,
158
- term: UTerm|PTerm,
159
- universe_session: Session,
160
- project_session: Session)\
161
- -> list[UniverseTermError|ProjectTermError]:
163
+ def _valid_value_for_composite_term(
164
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
165
+ ) -> list[UniverseTermError | ProjectTermError]:
162
166
  result = list()
163
167
  separator, _ = _get_composite_term_separator_parts(term)
164
168
  if separator:
165
- result = _valid_value_composite_term_with_separator(value, term, universe_session,
166
- project_session)
169
+ result = _valid_value_composite_term_with_separator(value, term, universe_session, project_session)
167
170
  else:
168
- result = _valid_value_composite_term_separator_less(value, term, universe_session,
169
- project_session)
171
+ result = _valid_value_composite_term_separator_less(value, term, universe_session, project_session)
170
172
  return result
171
173
 
172
174
 
173
- def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
175
+ def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
174
176
  if isinstance(term, UTerm):
175
- return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
176
- data_descriptor_id=term.data_descriptor.id)
177
+ return UniverseTermError(
178
+ value=value, term=term.specs, term_kind=term.kind, data_descriptor_id=term.data_descriptor.id
179
+ )
177
180
  else:
178
- return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
179
- collection_id=term.collection.id)
181
+ return ProjectTermError(value=value, term=term.specs, term_kind=term.kind, collection_id=term.collection.id)
180
182
 
181
183
 
182
- def _valid_value(value: str,
183
- term: UTerm|PTerm,
184
- universe_session: Session,
185
- project_session: Session) -> list[UniverseTermError|ProjectTermError]:
184
+ def _valid_value(
185
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
186
+ ) -> list[UniverseTermError | ProjectTermError]:
186
187
  result = list()
187
188
  match term.kind:
188
189
  case TermKind.PLAIN:
@@ -190,89 +191,65 @@ def _valid_value(value: str,
190
191
  if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
191
192
  result.append(_create_term_error(value, term))
192
193
  else:
193
- raise APIException(f"the term {term.id} doesn't have drs name. " +
194
- "Can't validate it.")
194
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
195
195
  case TermKind.PATTERN:
196
- # OPTIM: Pattern can be compiled and stored for further matching.
196
+ # TODO: Pattern can be compiled and stored for further matching.
197
197
  pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
198
198
  if pattern_match is None:
199
199
  result.append(_create_term_error(value, term))
200
200
  case TermKind.COMPOSITE:
201
- result.extend(_valid_value_for_composite_term(value, term,
202
- universe_session,
203
- project_session))
201
+ result.extend(_valid_value_for_composite_term(value, term, universe_session, project_session))
204
202
  case _:
205
- raise RuntimeError(f'unsupported term kind {term.kind}')
203
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
206
204
  return result
207
205
 
208
206
 
209
207
  def _check_value(value: str) -> str:
210
208
  if not value or value.isspace():
211
- raise APIException('value should be set')
209
+ raise EsgvocValueError("value should be set")
212
210
  else:
213
211
  return value
214
212
 
215
213
 
216
- def _search_plain_term_and_valid_value(value: str,
217
- collection_id: str,
218
- project_session: Session) \
219
- -> str|None:
220
- where_expression = and_(Collection.id == collection_id,
221
- PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
222
- statement = select(PTerm).join(Collection).where(where_expression)
214
+ def _search_plain_term_and_valid_value(value: str, collection_id: str, project_session: Session) -> str | None:
215
+ where_expression = and_(PCollection.id == collection_id, PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
216
+ statement = select(PTerm).join(PCollection).where(where_expression)
223
217
  term = project_session.exec(statement).one_or_none()
224
218
  return term.id if term else None
225
219
 
226
220
 
227
- def _valid_value_against_all_terms_of_collection(value: str,
228
- collection: Collection,
229
- universe_session: Session,
230
- project_session: Session) \
231
- -> list[str]:
221
+ def _valid_value_against_all_terms_of_collection(
222
+ value: str, collection: PCollection, universe_session: Session, project_session: Session
223
+ ) -> list[str]:
232
224
  if collection.terms:
233
225
  result = list()
234
226
  for pterm in collection.terms:
235
- _errors = _valid_value(value, pterm,
236
- universe_session,
237
- project_session)
227
+ _errors = _valid_value(value, pterm, universe_session, project_session)
238
228
  if not _errors:
239
229
  result.append(pterm.id)
240
230
  return result
241
231
  else:
242
- raise RuntimeError(f'collection {collection.id} has no term')
232
+ raise EsgvocDbError(f"collection '{collection.id}' has no term")
243
233
 
244
234
 
245
- def _valid_value_against_given_term(value: str,
246
- project_id: str,
247
- collection_id: str,
248
- term_id: str,
249
- universe_session: Session,
250
- project_session: Session)\
251
- -> list[UniverseTermError|ProjectTermError]:
252
- # [OPTIMIZATION]
235
+ def _valid_value_against_given_term(
236
+ value: str, project_id: str, collection_id: str, term_id: str, universe_session: Session, project_session: Session
237
+ ) -> list[UniverseTermError | ProjectTermError]:
238
+ # [OPTIMIZATION]
253
239
  key = value + project_id + collection_id + term_id
254
240
  if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
255
241
  result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
256
242
  else:
257
- terms = _find_terms_in_collection(collection_id,
258
- term_id,
259
- project_session,
260
- None)
261
- if terms:
262
- term = terms[0]
243
+ term = _get_term_in_collection(collection_id, term_id, project_session)
244
+ if term:
263
245
  result = _valid_value(value, term, universe_session, project_session)
264
246
  else:
265
- raise APIException(f'unable to find term {term_id} ' +
266
- f'in collection {collection_id}')
247
+ raise EsgvocNotFoundError(f"unable to find term '{term_id}' " + f"in collection '{collection_id}'")
267
248
  _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
268
249
  return result
269
250
 
270
251
 
271
- def valid_term(value: str,
272
- project_id: str,
273
- collection_id: str,
274
- term_id: str) \
275
- -> ValidationReport:
252
+ def valid_term(value: str, project_id: str, collection_id: str, term_id: str) -> ValidationReport:
276
253
  """
277
254
  Check if the given value may or may not represent the given term. The functions returns
278
255
  a report that contains the possible errors.
@@ -288,7 +265,7 @@ def valid_term(value: str,
288
265
  composite so as to compare it as a regex to the value.
289
266
 
290
267
  If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
291
- the function raises a APIException.
268
+ the function raises a EsgvocNotFoundError.
292
269
 
293
270
  :param value: A value to be validated
294
271
  :type value: str
@@ -300,61 +277,51 @@ def valid_term(value: str,
300
277
  :type term_id: str
301
278
  :returns: A validation report that contains the possible errors
302
279
  :rtype: ValidationReport
303
- :raises APIException: If any of the provided ids is not found
280
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
304
281
  """
305
282
  value = _check_value(value)
306
- with get_universe_session() as universe_session, \
307
- _get_project_session_with_exception(project_id) as project_session:
308
- errors = _valid_value_against_given_term(value, project_id, collection_id, term_id,
309
- universe_session, project_session)
283
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
284
+ errors = _valid_value_against_given_term(
285
+ value, project_id, collection_id, term_id, universe_session, project_session
286
+ )
310
287
  return ValidationReport(expression=value, errors=errors)
311
288
 
312
289
 
313
- def _valid_term_in_collection(value: str,
314
- project_id: str,
315
- collection_id: str,
316
- universe_session: Session,
317
- project_session: Session) \
318
- -> list[MatchingTerm]:
319
- # [OPTIMIZATION]
290
+ def _valid_term_in_collection(
291
+ value: str, project_id: str, collection_id: str, universe_session: Session, project_session: Session
292
+ ) -> list[MatchingTerm]:
293
+ # [OPTIMIZATION]
320
294
  key = value + project_id + collection_id
321
295
  if key in _VALID_TERM_IN_COLLECTION_CACHE:
322
296
  result = _VALID_TERM_IN_COLLECTION_CACHE[key]
323
297
  else:
324
298
  value = _check_value(value)
325
299
  result = list()
326
- collections = _find_collections_in_project(collection_id,
327
- project_session,
328
- None)
329
- if collections:
330
- collection = collections[0]
300
+ collection = _get_collection_in_project(collection_id, project_session)
301
+ if collection:
331
302
  match collection.term_kind:
332
303
  case TermKind.PLAIN:
333
- term_id_found = _search_plain_term_and_valid_value(value, collection_id,
334
- project_session)
304
+ term_id_found = _search_plain_term_and_valid_value(value, collection_id, project_session)
335
305
  if term_id_found:
336
- result.append(MatchingTerm(project_id=project_id,
337
- collection_id=collection_id,
338
- term_id=term_id_found))
306
+ result.append(
307
+ MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
308
+ )
339
309
  case _:
340
- term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
341
- universe_session,
342
- project_session)
310
+ term_ids_found = _valid_value_against_all_terms_of_collection(
311
+ value, collection, universe_session, project_session
312
+ )
343
313
  for term_id_found in term_ids_found:
344
- result.append(MatchingTerm(project_id=project_id,
345
- collection_id=collection_id,
346
- term_id=term_id_found))
314
+ result.append(
315
+ MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
316
+ )
347
317
  else:
348
- msg = f'unable to find collection {collection_id}'
349
- raise APIException(msg)
318
+ msg = f"unable to find collection '{collection_id}'"
319
+ raise EsgvocNotFoundError(msg)
350
320
  _VALID_TERM_IN_COLLECTION_CACHE[key] = result
351
321
  return result
352
322
 
353
323
 
354
- def valid_term_in_collection(value: str,
355
- project_id: str,
356
- collection_id: str) \
357
- -> list[MatchingTerm]:
324
+ def valid_term_in_collection(value: str, project_id: str, collection_id: str) -> list[MatchingTerm]:
358
325
  """
359
326
  Check if the given value may or may not represent a term in the given collection. The function
360
327
  returns the terms that the value matches.
@@ -370,7 +337,7 @@ def valid_term_in_collection(value: str,
370
337
  composite so as to compare it as a regex to the value.
371
338
 
372
339
  If any of the provided ids (`project_id` or `collection_id`) is not found,
373
- the function raises a APIException.
340
+ the function raises a EsgvocNotFoundError.
374
341
 
375
342
  :param value: A value to be validated
376
343
  :type value: str
@@ -380,23 +347,19 @@ def valid_term_in_collection(value: str,
380
347
  :type collection_id: str
381
348
  :returns: The list of terms that the value matches.
382
349
  :rtype: list[MatchingTerm]
383
- :raises APIException: If any of the provided ids is not found
350
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
384
351
  """
385
- with get_universe_session() as universe_session, \
386
- _get_project_session_with_exception(project_id) as project_session:
387
- return _valid_term_in_collection(value, project_id, collection_id,
388
- universe_session, project_session)
352
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
353
+ return _valid_term_in_collection(value, project_id, collection_id, universe_session, project_session)
389
354
 
390
355
 
391
- def _valid_term_in_project(value: str,
392
- project_id: str,
393
- universe_session: Session,
394
- project_session: Session) -> list[MatchingTerm]:
356
+ def _valid_term_in_project(
357
+ value: str, project_id: str, universe_session: Session, project_session: Session
358
+ ) -> list[MatchingTerm]:
395
359
  result = list()
396
360
  collections = _get_all_collections_in_project(project_session)
397
361
  for collection in collections:
398
- result.extend(_valid_term_in_collection(value, project_id, collection.id,
399
- universe_session, project_session))
362
+ result.extend(_valid_term_in_collection(value, project_id, collection.id, universe_session, project_session))
400
363
  return result
401
364
 
402
365
 
@@ -415,7 +378,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
415
378
  - if the composite hasn't got a separator, the function aggregates the parts of the \
416
379
  composite so as to compare it as a regex to the value.
417
380
 
418
- If the `project_id` is not found, the function raises a APIException.
381
+ If the `project_id` is not found, the function raises a EsgvocNotFoundError.
419
382
 
420
383
  :param value: A value to be validated
421
384
  :type value: str
@@ -423,10 +386,9 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
423
386
  :type project_id: str
424
387
  :returns: The list of terms that the value matches.
425
388
  :rtype: list[MatchingTerm]
426
- :raises APIException: If the `project_id` is not found
389
+ :raises EsgvocNotFoundError: If the `project_id` is not found
427
390
  """
428
- with get_universe_session() as universe_session, \
429
- _get_project_session_with_exception(project_id) as project_session:
391
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
430
392
  return _valid_term_in_project(value, project_id, universe_session, project_session)
431
393
 
432
394
 
@@ -454,447 +416,713 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
454
416
  with get_universe_session() as universe_session:
455
417
  for project_id in get_all_projects():
456
418
  with _get_project_session_with_exception(project_id) as project_session:
457
- result.extend(_valid_term_in_project(value, project_id,
458
- universe_session, project_session))
459
- return result
460
-
461
-
462
- def _find_terms_in_collection(collection_id: str,
463
- term_id: str,
464
- session: Session,
465
- settings: SearchSettings|None = None) -> Sequence[PTerm]:
466
- # Settings only apply on the term_id comparison.
467
- where_expression = _create_str_comparison_expression(field=PTerm.id,
468
- value=term_id,
469
- settings=settings)
470
- statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
471
- where_expression)
472
- results = session.exec(statement)
473
- result = results.all()
419
+ result.extend(_valid_term_in_project(value, project_id, universe_session, project_session))
474
420
  return result
475
421
 
476
422
 
477
- def find_terms_in_collection(project_id:str,
478
- collection_id: str,
479
- term_id: str,
480
- settings: SearchSettings|None = None) \
481
- -> list[DataDescriptor]:
423
+ def get_all_terms_in_collection(
424
+ project_id: str, collection_id: str, selected_term_fields: Iterable[str] | None = None
425
+ ) -> list[DataDescriptor]:
482
426
  """
483
- Finds one or more terms, based on the specified search settings, in the given collection of a project.
427
+ Gets all terms of the given collection of a project.
484
428
  This function performs an exact match on the `project_id` and `collection_id`,
485
- and does **not** search for similar or related projects and collections.
486
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
487
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
488
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
489
- If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
490
- the function returns an empty list.
491
-
492
- Behavior based on search type:
493
- - `EXACT` and absence of `settings`: returns zero or one term instance in the list.
494
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
495
- term instances in the list.
429
+ and does not search for similar or related projects and collections.
430
+ If any of the provided ids (`project_id` or `collection_id`) is not found, the function
431
+ returns an empty list.
496
432
 
497
433
  :param project_id: A project id
498
434
  :type project_id: str
499
- :param collection_id: A collection
435
+ :param collection_id: A collection id
500
436
  :type collection_id: str
501
- :param term_id: A term id to be found
502
- :type term_id: str
503
- :param settings: The search settings
504
- :type settings: SearchSettings|None
505
- :returns: A list of term instances. Returns an empty list if no matches are found.
437
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
438
+ fields of the terms are returned. If empty, selects the id and type fields.
439
+ :type selected_term_fields: Iterable[str] | None
440
+ :returns: a list of term instances. Returns an empty list if no matches are found.
506
441
  :rtype: list[DataDescriptor]
507
442
  """
508
- result: list[DataDescriptor] = list()
509
- if connection:=_get_project_connection(project_id):
443
+ result = list()
444
+ if connection := _get_project_connection(project_id):
510
445
  with connection.create_session() as session:
511
- terms = _find_terms_in_collection(collection_id, term_id, session, settings)
512
- instantiate_pydantic_terms(terms, result,
513
- settings.selected_term_fields if settings else None)
446
+ collection = _get_collection_in_project(collection_id, session)
447
+ if collection:
448
+ result = _get_all_terms_in_collection(collection, selected_term_fields)
514
449
  return result
515
450
 
516
451
 
517
- def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
518
- term_id: str,
519
- session: Session,
520
- settings: SearchSettings|None = None) \
521
- -> Sequence[PTerm]:
522
- # Settings only apply on the term_id comparison.
523
- where_expression = _create_str_comparison_expression(field=PTerm.id,
524
- value=term_id,
525
- settings=settings)
526
- statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
527
- where_expression)
528
- results = session.exec(statement)
529
- result = results.all()
530
- return result
452
+ def _get_all_collections_in_project(session: Session) -> list[PCollection]:
453
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
454
+ # Project can't be missing if session exists.
455
+ return project.collections # type: ignore
531
456
 
532
457
 
533
- def find_terms_from_data_descriptor_in_project(project_id: str,
534
- data_descriptor_id: str,
535
- term_id: str,
536
- settings: SearchSettings|None = None) \
537
- -> list[tuple[DataDescriptor, str]]:
458
+ def get_all_collections_in_project(project_id: str) -> list[str]:
538
459
  """
539
- Finds one or more terms in the given project which are instances of the given data descriptor
540
- in the universe, based on the specified search settings, in the given collection of a project.
541
- This function performs an exact match on the `project_id` and `data_descriptor_id`,
542
- and does **not** search for similar or related projects and data descriptors.
543
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
544
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
545
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
546
- If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
547
- the function returns an empty list.
548
-
549
- Behavior based on search type:
550
- - `EXACT` and absence of `settings`: returns zero or one term instance and \
551
- collection id in the list.
552
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
553
- term instances and collection ids in the list.
460
+ Gets all collections of the given project.
461
+ This function performs an exact match on the `project_id` and
462
+ does not search for similar or related projects.
463
+ If the provided `project_id` is not found, the function returns an empty list.
554
464
 
555
465
  :param project_id: A project id
556
466
  :type project_id: str
557
- :param data_descriptor_id: A data descriptor
558
- :type data_descriptor_id: str
559
- :param term_id: A term id to be found
560
- :type term_id: str
561
- :param settings: The search settings
562
- :type settings: SearchSettings|None
563
- :returns: A list of tuple of term instances and related collection ids. \
564
- Returns an empty list if no matches are found.
565
- :rtype: list[tuple[DataDescriptor, str]]
467
+ :returns: A list of collection ids. Returns an empty list if no matches are found.
468
+ :rtype: list[str]
566
469
  """
567
470
  result = list()
568
- if connection:=_get_project_connection(project_id):
471
+ if connection := _get_project_connection(project_id):
569
472
  with connection.create_session() as session:
570
- terms = _find_terms_from_data_descriptor_in_project(data_descriptor_id,
571
- term_id,
572
- session,
573
- settings)
574
- for pterm in terms:
575
- collection_id = pterm.collection.id
576
- term = instantiate_pydantic_term(pterm,
577
- settings.selected_term_fields if settings else None)
578
- result.append((term, collection_id))
473
+ collections = _get_all_collections_in_project(session)
474
+ for collection in collections:
475
+ result.append(collection.id)
579
476
  return result
580
477
 
581
478
 
582
- def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
583
- term_id: str,
584
- settings: SearchSettings|None = None) \
585
- -> list[tuple[list[tuple[DataDescriptor, str]], str]]:
586
- """
587
- Finds one or more terms in all projects which are instances of the given data descriptor
588
- in the universe, based on the specified search settings, in the given collection of a project.
589
- This function performs an exact match on the `data_descriptor_id`,
590
- and does **not** search for similar or related data descriptors.
591
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
592
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
593
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
594
- If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
595
- the function returns an empty list.
479
+ def _get_all_terms_in_collection(
480
+ collection: PCollection, selected_term_fields: Iterable[str] | None
481
+ ) -> list[DataDescriptor]:
482
+ result: list[DataDescriptor] = list()
483
+ instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
484
+ return result
596
485
 
597
- Behavior based on search type:
598
- - `EXACT` and absence of `settings`: returns zero or one term instance and \
599
- collection id in the list.
600
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
601
- term instances and collection ids in the list.
602
486
 
603
- :param data_descriptor_id: A data descriptor
604
- :type data_descriptor_id: str
605
- :param term_id: A term id to be found
606
- :type term_id: str
607
- :param settings: The search settings
608
- :type settings: SearchSettings|None
609
- :returns: A list of tuple of matching terms with their collection id, per project. \
610
- Returns an empty list if no matches are found.
611
- :rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
487
+ def get_all_terms_in_project(
488
+ project_id: str, selected_term_fields: Iterable[str] | None = None
489
+ ) -> list[DataDescriptor]:
612
490
  """
613
- project_ids = get_all_projects()
614
- result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
615
- for project_id in project_ids:
616
- matching_terms = find_terms_from_data_descriptor_in_project(project_id,
617
- data_descriptor_id,
618
- term_id,
619
- settings)
620
- if matching_terms:
621
- result.append((matching_terms, project_id))
622
- return result
623
-
491
+ Gets all terms of the given project.
492
+ This function performs an exact match on the `project_id` and
493
+ does not search for similar or related projects.
494
+ Terms are unique within a collection but may have some synonyms in a project.
495
+ If the provided `project_id` is not found, the function returns an empty list.
624
496
 
625
- def _find_terms_in_project(term_id: str,
626
- session: Session,
627
- settings: SearchSettings|None) -> Sequence[PTerm]:
628
- where_expression = _create_str_comparison_expression(field=PTerm.id,
629
- value=term_id,
630
- settings=settings)
631
- statement = select(PTerm).where(where_expression)
632
- results = session.exec(statement).all()
633
- return results
497
+ :param project_id: A project id
498
+ :type project_id: str
499
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
500
+ fields of the terms are returned. If empty, selects the id and type fields.
501
+ :type selected_term_fields: Iterable[str] | None
502
+ :returns: A list of term instances. Returns an empty list if no matches are found.
503
+ :rtype: list[DataDescriptor]
504
+ """
505
+ result = list()
506
+ if connection := _get_project_connection(project_id):
507
+ with connection.create_session() as session:
508
+ collections = _get_all_collections_in_project(session)
509
+ for collection in collections:
510
+ # Term may have some synonyms in a project.
511
+ result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
512
+ return result
634
513
 
635
514
 
636
- def find_terms_in_all_projects(term_id: str,
637
- settings: SearchSettings|None = None) \
638
- -> list[DataDescriptor]:
515
+ def get_all_terms_in_all_projects(
516
+ selected_term_fields: Iterable[str] | None = None,
517
+ ) -> list[tuple[str, list[DataDescriptor]]]:
639
518
  """
640
- Finds one or more terms, based on the specified search settings, in all projects.
641
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
642
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
643
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
644
- Terms are unique within a collection but may have some synonyms within a project.
645
- If the provided `term_id` is not found, the function returns an empty list.
519
+ Gets all terms of all projects.
646
520
 
647
- :param term_id: A term id to be found
648
- :type term_id: str
649
- :param settings: The search settings
650
- :type settings: SearchSettings|None
651
- :returns: A list of term instances. Returns an empty list if no matches are found.
652
- :rtype: list[DataDescriptor]
521
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
522
+ fields of the terms are returned. If empty, selects the id and type fields.
523
+ :type selected_term_fields: Iterable[str] | None
524
+ :returns: A list of tuple project_id and term instances of that project.
525
+ :rtype: list[tuple[str, list[DataDescriptor]]]
653
526
  """
654
527
  project_ids = get_all_projects()
655
528
  result = list()
656
529
  for project_id in project_ids:
657
- result.extend(find_terms_in_project(project_id, term_id, settings))
530
+ terms = get_all_terms_in_project(project_id, selected_term_fields)
531
+ result.append((project_id, terms))
658
532
  return result
659
533
 
660
534
 
661
- def find_terms_in_project(project_id: str,
662
- term_id: str,
663
- settings: SearchSettings|None = None) \
664
- -> list[DataDescriptor]:
535
+ def get_all_projects() -> list[str]:
665
536
  """
666
- Finds one or more terms, based on the specified search settings, in a project.
667
- This function performs an exact match on the `project_id` and
668
- does **not** search for similar or related projects.
669
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
670
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
671
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
672
- Terms are unique within a collection but may have some synonyms within a project.
673
- If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
674
- an empty list.
537
+ Gets all projects.
675
538
 
676
- :param project_id: A project id
539
+ :returns: A list of project ids.
540
+ :rtype: list[str]
541
+ """
542
+ return list(service.current_state.projects.keys())
543
+
544
+
545
+ def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
546
+ statement = select(PTerm).where(PTerm.id == term_id)
547
+ results = session.exec(statement)
548
+ result = results.first() # Term ids are not supposed to be unique within a project.
549
+ return result
550
+
551
+
552
+ def get_term_in_project(
553
+ project_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
554
+ ) -> DataDescriptor | None:
555
+ """
556
+ Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
557
+ the given term id.
558
+ Terms are unique within a collection but may have some synonyms in a project.
559
+ This function performs an exact match on the `project_id` and `term_id`, and does not search
560
+ for similar or related projects and terms.
561
+ If any of the provided ids (`project_id` or `term_id`) is not found,
562
+ the function returns `None`.
563
+
564
+ :param project_id: The id of the given project.
677
565
  :type project_id: str
678
- :param term_id: A term id to be found
566
+ :param term_id: The id of a term to be found.
679
567
  :type term_id: str
680
- :param settings: The search settings
681
- :type settings: SearchSettings|None
682
- :returns: A list of term instances. Returns an empty list if no matches are found.
683
- :rtype: list[DataDescriptor]
568
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
569
+ fields of the terms are returned. If empty, selects the id and type fields.
570
+ :type selected_term_fields: Iterable[str] | None
571
+ :returns: A term instance. Returns `None` if no match is found.
572
+ :rtype: DataDescriptor | None
684
573
  """
685
- result: list[DataDescriptor] = list()
686
- if connection:=_get_project_connection(project_id):
574
+ result: DataDescriptor | None = None
575
+ if connection := _get_project_connection(project_id):
687
576
  with connection.create_session() as session:
688
- terms = _find_terms_in_project(term_id, session, settings)
689
- instantiate_pydantic_terms(terms, result,
690
- settings.selected_term_fields if settings else None)
577
+ term_found = _get_term_in_project(term_id, session)
578
+ if term_found:
579
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
691
580
  return result
692
581
 
693
582
 
694
- def get_all_terms_in_collection(project_id: str,
695
- collection_id: str,
696
- selected_term_fields: Iterable[str]|None = None)\
697
- -> list[DataDescriptor]:
583
+ def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
584
+ statement = select(PTerm).join(PCollection).where(PCollection.id == collection_id, PTerm.id == term_id)
585
+ results = session.exec(statement)
586
+ result = results.one_or_none()
587
+ return result
588
+
589
+
590
+ def get_term_in_collection(
591
+ project_id: str, collection_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
592
+ ) -> DataDescriptor | None:
698
593
  """
699
- Gets all terms of the given collection of a project.
700
- This function performs an exact match on the `project_id` and `collection_id`,
701
- and does **not** search for similar or related projects and collections.
702
- If any of the provided ids (`project_id` or `collection_id`) is not found, the function
703
- returns an empty list.
594
+ Returns the term, in the given project and collection,
595
+ whose id corresponds exactly to the given term id.
596
+ This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
597
+ and does not search for similar or related projects, collections and terms.
598
+ If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
599
+ the function returns `None`.
704
600
 
705
- :param project_id: A project id
601
+ :param project_id: The id of the given project.
706
602
  :type project_id: str
707
- :param collection_id: A collection id
603
+ :param collection_id: The id of the given collection.
708
604
  :type collection_id: str
605
+ :param term_id: The id of a term to be found.
606
+ :type term_id: str
709
607
  :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
710
- fields of the terms are returned.
711
- :type selected_term_fields: Iterable[str]|None
712
- :returns: a list of term instances. Returns an empty list if no matches are found.
713
- :rtype: list[DataDescriptor]
608
+ fields of the terms are returned. If empty, selects the id and type fields.
609
+ :type selected_term_fields: Iterable[str] | None
610
+ :returns: A term instance. Returns `None` if no match is found.
611
+ :rtype: DataDescriptor | None
714
612
  """
715
- result = list()
716
- if connection:=_get_project_connection(project_id):
613
+ result: DataDescriptor | None = None
614
+ if connection := _get_project_connection(project_id):
717
615
  with connection.create_session() as session:
718
- collections = _find_collections_in_project(collection_id,
719
- session,
720
- None)
721
- if collections:
722
- collection = collections[0]
723
- result = _get_all_terms_in_collection(collection, selected_term_fields)
616
+ term_found = _get_term_in_collection(collection_id, term_id, session)
617
+ if term_found:
618
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
724
619
  return result
725
620
 
726
621
 
727
- def _find_collections_in_project(collection_id: str,
728
- session: Session,
729
- settings: SearchSettings|None) \
730
- -> Sequence[Collection]:
731
- where_exp = _create_str_comparison_expression(field=Collection.id,
732
- value=collection_id,
733
- settings=settings)
734
- statement = select(Collection).where(where_exp)
622
+ def _get_collection_in_project(collection_id: str, session: Session) -> PCollection | None:
623
+ statement = select(PCollection).where(PCollection.id == collection_id)
735
624
  results = session.exec(statement)
736
- result = results.all()
625
+ result = results.one_or_none()
737
626
  return result
738
627
 
739
628
 
740
- def find_collections_in_project(project_id: str,
741
- collection_id: str,
742
- settings: SearchSettings|None = None) \
743
- -> list[dict]:
629
+ def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
744
630
  """
745
- Finds one or more collections of the given project.
746
- This function performs an exact match on the `project_id` and
747
- does **not** search for similar or related projects.
748
- The given `collection_id` is searched according to the search type specified in
749
- the parameter `settings`,
750
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
751
- If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
752
- If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
753
- an empty list.
754
-
755
- Behavior based on search type:
756
- - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
757
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
758
- collection contexts in the list.
631
+ Returns the collection, in the given project, whose id corresponds exactly to
632
+ the given collection id.
633
+ This function performs an exact match on the `project_id` and `collection_id`, and does not search
634
+ for similar or related projects and collections.
635
+ If any of the provided ids (`project_id` or `collection_id`) is not found,
636
+ the function returns `None`.
759
637
 
760
- :param project_id: A project id
638
+ :param project_id: The id of the given project.
761
639
  :type project_id: str
762
- :param collection_id: A collection id to be found
640
+ :param collection_id: The id of a collection to be found.
763
641
  :type collection_id: str
764
- :param settings: The search settings
765
- :type settings: SearchSettings|None
766
- :returns: A list of collection contexts. Returns an empty list if no matches are found.
767
- :rtype: list[dict]
642
+ :returns: A collection id and context. Returns `None` if no match is found.
643
+ :rtype: tuple[str, dict] | None
768
644
  """
769
- result = list()
770
- if connection:=_get_project_connection(project_id):
645
+ result: tuple[str, dict] | None = None
646
+ if connection := _get_project_connection(project_id):
771
647
  with connection.create_session() as session:
772
- collections = _find_collections_in_project(collection_id,
773
- session,
774
- settings)
775
- for collection in collections:
776
- result.append(collection.context)
648
+ collection_found = _get_collection_in_project(collection_id, session)
649
+ if collection_found:
650
+ result = collection_found.id, collection_found.context
777
651
  return result
778
652
 
779
653
 
780
- def _get_all_collections_in_project(session: Session) -> list[Collection]:
781
- project = session.get(Project, constants.SQLITE_FIRST_PK)
782
- # Project can't be missing if session exists.
783
- return project.collections # type: ignore
784
-
785
-
786
- def get_all_collections_in_project(project_id: str) -> list[str]:
654
+ def get_project(project_id: str) -> ProjectSpecs | None:
787
655
  """
788
- Gets all collections of the given project.
656
+ Get a project and returns its specifications.
789
657
  This function performs an exact match on the `project_id` and
790
- does **not** search for similar or related projects.
791
- If the provided `project_id` is not found, the function returns an empty list.
658
+ does not search for similar or related projects.
659
+ If the provided `project_id` is not found, the function returns `None`.
792
660
 
793
- :param project_id: A project id
661
+ :param project_id: A project id to be found
794
662
  :type project_id: str
795
- :returns: A list of collection ids. Returns an empty list if no matches are found.
796
- :rtype: list[str]
663
+ :returns: The specs of the project found. Returns `None` if no matches are found.
664
+ :rtype: ProjectSpecs | None
797
665
  """
798
- result = list()
799
- if connection:=_get_project_connection(project_id):
666
+ result: ProjectSpecs | None = None
667
+ if connection := _get_project_connection(project_id):
800
668
  with connection.create_session() as session:
801
- collections = _get_all_collections_in_project(session)
802
- for collection in collections:
803
- result.append(collection.id)
669
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
670
+ try:
671
+ # Project can't be missing if session exists.
672
+ result = ProjectSpecs(**project.specs) # type: ignore
673
+ except Exception as e:
674
+ msg = f"unable to read specs in project '{project_id}'"
675
+ raise EsgvocDbError(msg) from e
804
676
  return result
805
677
 
806
678
 
807
- def _get_all_terms_in_collection(collection: Collection,
808
- selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
809
- result: list[DataDescriptor] = list()
810
- instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
679
+ def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str, session: Session) -> PCollection | None:
680
+ statement = select(PCollection).where(PCollection.data_descriptor_id == data_descriptor_id)
681
+ result = session.exec(statement).one_or_none()
811
682
  return result
812
683
 
813
684
 
814
- def get_all_terms_in_project(project_id: str,
815
- selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
685
+ def get_collection_from_data_descriptor_in_project(project_id: str, data_descriptor_id: str) -> tuple[str, dict] | None:
816
686
  """
817
- Gets all terms of the given project.
818
- This function performs an exact match on the `project_id` and
819
- does **not** search for similar or related projects.
820
- Terms are unique within a collection but may have some synonyms in a project.
821
- If the provided `project_id` is not found, the function returns an empty list.
687
+ Returns the collection, in the given project, that corresponds to the given data descriptor
688
+ in the universe.
689
+ This function performs an exact match on the `project_id` and `data_descriptor_id`,
690
+ and does not search for similar or related projects and data descriptors.
691
+ If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
692
+ there is no collection corresponding to the given data descriptor, the function returns `None`.
822
693
 
823
- :param project_id: A project id
694
+ :param project_id: The id of the given project.
824
695
  :type project_id: str
825
- :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
826
- fields of the terms are returned.
827
- :type selected_term_fields: Iterable[str]|None
828
- :returns: A list of term instances. Returns an empty list if no matches are found.
829
- :rtype: list[DataDescriptor]
696
+ :param data_descriptor_id: The id of the given data descriptor.
697
+ :type data_descriptor_id: str
698
+ :returns: A collection id and context. Returns `None` if no matches are found.
699
+ :rtype: tuple[str, dict] | None
700
+ """
701
+ result: tuple[str, dict] | None = None
702
+ if connection := _get_project_connection(project_id):
703
+ with connection.create_session() as session:
704
+ collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id, session)
705
+ if collection_found:
706
+ result = collection_found.id, collection_found.context
707
+ return result
708
+
709
+
710
+ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) -> list[tuple[str, str, dict]]:
711
+ """
712
+ Returns the collections, in all projects, that correspond to the given data descriptor
713
+ in the universe.
714
+ This function performs an exact match on `data_descriptor_id`,
715
+ and does not search for similar or related data descriptors.
716
+ If the provided `data_descriptor_id` is not found, or if
717
+ there is no collection corresponding to the given data descriptor, the function returns
718
+ an empty list.
719
+
720
+ :param data_descriptor_id: The id of the given data descriptor.
721
+ :type data_descriptor_id: str
722
+ :returns: A list of collection ids, their project_ids and contexts. \
723
+ Returns an empty list if no matches are found.
724
+ :rtype: list[tuple[str, str, dict]]
830
725
  """
831
726
  result = list()
832
- if connection:=_get_project_connection(project_id):
727
+ project_ids = get_all_projects()
728
+ for project_id in project_ids:
729
+ collection_found = get_collection_from_data_descriptor_in_project(project_id, data_descriptor_id)
730
+ if collection_found:
731
+ result.append((project_id, collection_found[0], collection_found[1]))
732
+ return result
733
+
734
+
735
+ def _get_term_from_universe_term_id_in_project(
736
+ data_descriptor_id: str, universe_term_id: str, project_session: Session
737
+ ) -> PTerm | None:
738
+ statement = (
739
+ select(PTerm)
740
+ .join(PCollection)
741
+ .where(PCollection.data_descriptor_id == data_descriptor_id, PTerm.id == universe_term_id)
742
+ )
743
+ results = project_session.exec(statement)
744
+ result = results.one_or_none()
745
+ return result
746
+
747
+
748
+ def get_term_from_universe_term_id_in_project(
749
+ project_id: str, data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
750
+ ) -> tuple[str, DataDescriptor] | None:
751
+ """
752
+ Returns the term, in the given project, that corresponds to the given term in the universe.
753
+ This function performs an exact match on the `project_id`, `data_descriptor_id`
754
+ and `universe_term_id`, and does not search for similar or related projects, data descriptors
755
+ and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
756
+ is not found, or if there is no project term corresponding to the given universe term
757
+ the function returns `None`.
758
+
759
+ :param project_id: The id of the given project.
760
+ :type project_id: str
761
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
762
+ :type data_descriptor_id: str
763
+ :param universe_term_id: The id of the given universe term.
764
+ :type universe_term_id: str
765
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
766
+ fields of the terms are returned. If empty, selects the id and type fields.
767
+ :type selected_term_fields: Iterable[str] | None
768
+ :returns: A collection id and the project term instance. Returns `None` if no matches are found.
769
+ :rtype: tuple[str, DataDescriptor] | None
770
+ """
771
+ result: tuple[str, DataDescriptor] | None = None
772
+ if connection := _get_project_connection(project_id):
833
773
  with connection.create_session() as session:
834
- collections = _get_all_collections_in_project(session)
835
- for collection in collections:
836
- # Term may have some synonyms in a project.
837
- result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
774
+ term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id, universe_term_id, session)
775
+ if term_found:
776
+ pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
777
+ result = (term_found.collection.id, pydantic_term)
838
778
  return result
839
779
 
840
780
 
841
- def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
842
- -> list[tuple[str, list[DataDescriptor]]]:
781
+ def get_term_from_universe_term_id_in_all_projects(
782
+ data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
783
+ ) -> list[tuple[str, str, DataDescriptor]]:
843
784
  """
844
- Gets all terms of all projects.
785
+ Returns the terms, in all projects, that correspond to the given term in the universe.
786
+ This function performs an exact match on the `data_descriptor_id`
787
+ and `universe_term_id`, and does not search for similar or related data descriptors
788
+ and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
789
+ is not found, or if there is no project term corresponding to the given universe term
790
+ the function returns an empty list.
845
791
 
792
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
793
+ :type data_descriptor_id: str
794
+ :param universe_term_id: The id of the given universe term.
795
+ :type universe_term_id: str
846
796
  :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
847
- fields of the terms are returned.
848
- :type selected_term_fields: Iterable[str]|None
849
- :returns: A list of tuple project_id and term instances of that project.
850
- :rtype: list[tuple[str, list[DataDescriptor]]]
797
+ fields of the terms are returned. If empty, selects the id and type fields.
798
+ :type selected_term_fields: Iterable[str] | None
799
+ :returns: A project_id, collection id and the project term instance. \
800
+ Returns an empty list if no matches are found.
801
+ :rtype: list[tuple[str, str, DataDescriptor]]
851
802
  """
803
+ result: list[tuple[str, str, DataDescriptor]] = list()
852
804
  project_ids = get_all_projects()
853
- result = list()
854
805
  for project_id in project_ids:
855
- terms = get_all_terms_in_project(project_id, selected_term_fields)
856
- result.append((project_id, terms))
806
+ term_found = get_term_from_universe_term_id_in_project(
807
+ project_id, data_descriptor_id, universe_term_id, selected_term_fields
808
+ )
809
+ if term_found:
810
+ result.append((project_id, term_found[0], term_found[1]))
857
811
  return result
858
812
 
859
813
 
860
- def find_project(project_id: str) -> ProjectSpecs|None:
814
+ def _find_collections_in_project(
815
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
816
+ ) -> Sequence[PCollection]:
817
+ matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
818
+ tmp_statement = select(PCollectionFTS5).where(matching_condition)
819
+ statement = select(PCollection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
820
+ return execute_match_statement(expression, statement, session)
821
+
822
+
823
+ def find_collections_in_project(
824
+ expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
825
+ ) -> list[tuple[str, dict]]:
861
826
  """
862
- Finds a project and returns its specifications.
863
- This function performs an exact match on the `project_id` and
864
- does **not** search for similar or related projects.
865
- If the provided `project_id` is not found, the function returns `None`.
827
+ Find collections in the given project based on a full text search defined by the given `expression`.
828
+ The `expression` can be composed of one or multiple keywords.
829
+ The keywords can combined with boolean operators: `AND`,
830
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
831
+ if no boolean operators is provided, whitespaces are handled as if there were
832
+ an implicit AND operator between each pair of keywords. Note that this
833
+ function does not provide any priority operator (parenthesis).
834
+ Keywords can define prefixes when adding a `*` at the end of them.
835
+ If the expression is composed of only one keyword, the function
836
+ automatically defines it as a prefix.
837
+ The function returns a list of collection ids and contexts, sorted according to the
838
+ bm25 ranking metric (list index `0` has the highest rank).
839
+ This function performs an exact match on the `project_id`,
840
+ and does not search for similar or related projects.
841
+ If the provided `expression` does not hit any collection or the given `project_id` does not
842
+ match exactly to an id of a project, the function returns an empty list.
843
+ The function searches for the `expression` in the collection specifications.
844
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
845
+ collections. **At the moment, `only_id` is set to `True` as the collections
846
+ haven't got any description.**
847
+
848
+ :param expression: The full text search expression.
849
+ :type expression: str
850
+ :param project_id: The id of the given project.
851
+ :type project_id: str
852
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
853
+ :type only_id: bool
854
+ :param limit: Limit the number of returned items found. Returns all items found the if \
855
+ `limit` is either `None`, zero or negative.
856
+ :type limit: int | None
857
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
858
+ either `None`, zero or negative.
859
+ :type offset: int | None
860
+ :returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
861
+ :rtype: list[tuple[str, dict]]
862
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
863
+ """
864
+ result: list[tuple[str, dict]] = list()
865
+ if connection := _get_project_connection(project_id):
866
+ with connection.create_session() as session:
867
+ collections_found = _find_collections_in_project(expression, session, only_id, limit, offset)
868
+ for collection in collections_found:
869
+ result.append((collection.id, collection.context))
870
+ return result
866
871
 
867
- :param project_id: A project id to be found
872
+
873
+ def _find_terms_in_collection(
874
+ expression: str,
875
+ collection_id: str,
876
+ session: Session,
877
+ only_id: bool = False,
878
+ limit: int | None = None,
879
+ offset: int | None = None,
880
+ ) -> Sequence[PTerm]:
881
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
882
+ where_condition = PCollection.id == collection_id, matching_condition
883
+ tmp_statement = select(PTermFTS5).join(PCollection).where(*where_condition)
884
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
885
+ return execute_match_statement(expression, statement, session)
886
+
887
+
888
+ def _find_terms_in_project(
889
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
890
+ ) -> Sequence[PTerm]:
891
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
892
+ tmp_statement = select(PTermFTS5).where(matching_condition)
893
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
894
+ return execute_match_statement(expression, statement, session)
895
+
896
+
897
+ def find_terms_in_collection(
898
+ expression: str,
899
+ project_id: str,
900
+ collection_id: str,
901
+ only_id: bool = False,
902
+ limit: int | None = None,
903
+ offset: int | None = None,
904
+ selected_term_fields: Iterable[str] | None = None,
905
+ ) -> list[DataDescriptor]:
906
+ """
907
+ Find terms in the given project and collection based on a full text search defined by the given
908
+ `expression`.
909
+ The `expression` can be composed of one or multiple keywords.
910
+ The keywords can combined with boolean operators: `AND`,
911
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
912
+ if no boolean operators is provided, whitespaces are handled as if there were
913
+ an implicit AND operator between each pair of keywords. Note that this
914
+ function does not provide any priority operator (parenthesis).
915
+ Keywords can define prefixes when adding a `*` at the end of them.
916
+ If the expression is composed of only one keyword, the function
917
+ automatically defines it as a prefix.
918
+ The function returns a list of term instances, sorted according to the
919
+ bm25 ranking metric (list index `0` has the highest rank).
920
+ This function performs an exact match on the `project_id` and `collection_id`,
921
+ and does not search for similar or related projects and collections.
922
+ If the provided `expression` does not hit any term or if any of the provided ids
923
+ (`project_id` or `collection_id`) is not found, the function returns an empty list.
924
+ The function searches for the `expression` in the term specifications.
925
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
926
+ terms.
927
+
928
+ :param expression: The full text search expression.
929
+ :type expression: str
930
+ :param project_id: The id of the given project.
868
931
  :type project_id: str
869
- :returns: The specs of the project found. Returns `None` if no matches are found.
870
- :rtype: ProjectSpecs|None
932
+ :param collection_id: The id of the given collection.
933
+ :type collection_id: str
934
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
935
+ :type only_id: bool
936
+ :param limit: Limit the number of returned items found. Returns all items found the if \
937
+ `limit` is either `None`, zero or negative.
938
+ :type limit: int | None
939
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
940
+ either `None`, zero or negative.
941
+ :type offset: int | None
942
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
943
+ fields of the terms are returned. If empty, selects the id and type fields.
944
+ :type selected_term_fields: Iterable[str] | None
945
+ :returns: A list of term instances. Returns an empty list if no matches are found.
946
+ :rtype: list[DataDescriptor]
947
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
871
948
  """
872
- result: ProjectSpecs|None = None
873
- if connection:=_get_project_connection(project_id):
949
+ result: list[DataDescriptor] = list()
950
+ if connection := _get_project_connection(project_id):
874
951
  with connection.create_session() as session:
875
- project = session.get(Project, constants.SQLITE_FIRST_PK)
876
- try:
877
- # Project can't be missing if session exists.
878
- result = ProjectSpecs(**project.specs) # type: ignore
879
- except Exception as e:
880
- msg = f'Unable to read specs in project {project_id}'
881
- raise RuntimeError(msg) from e
952
+ pterms_found = _find_terms_in_collection(expression, collection_id, session, only_id, limit, offset)
953
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
882
954
  return result
883
955
 
884
956
 
885
- def get_all_projects() -> list[str]:
957
+ def find_terms_in_project(
958
+ expression: str,
959
+ project_id: str,
960
+ only_id: bool = False,
961
+ limit: int | None = None,
962
+ offset: int | None = None,
963
+ selected_term_fields: Iterable[str] | None = None,
964
+ ) -> list[DataDescriptor]:
886
965
  """
887
- Gets all projects.
966
+ Find terms in the given project based on a full text search defined by the given `expression`.
967
+ The `expression` can be composed of one or multiple keywords.
968
+ The keywords can combined with boolean operators: `AND`,
969
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
970
+ if no boolean operators is provided, whitespaces are handled as if there were
971
+ an implicit AND operator between each pair of keywords. Note that this
972
+ function does not provide any priority operator (parenthesis).
973
+ Keywords can define prefixes when adding a `*` at the end of them.
974
+ If the expression is composed of only one keyword, the function
975
+ automatically defines it as a prefix.
976
+ The function returns a list of term instances, sorted according to the
977
+ bm25 ranking metric (list index `0` has the highest rank).
978
+ This function performs an exact match on the `project_id`,
979
+ and does not search for similar or related projects.
980
+ If the provided `expression` does not hit any term or if any of the provided `project_id` is
981
+ not found, the function returns an empty list.
982
+ The function searches for the `expression` in the term specifications.
983
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
984
+ terms.
985
+
986
+ :param expression: The full text search expression.
987
+ :type expression: str
988
+ :param project_id: The id of the given project.
989
+ :type project_id: str
990
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
991
+ :type only_id: bool
992
+ :param limit: Limit the number of returned items found. Returns all items found the if \
993
+ `limit` is either `None`, zero or negative.
994
+ :type limit: int | None
995
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
996
+ either `None`, zero or negative.
997
+ :type offset: int | None
998
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
999
+ fields of the terms are returned. If empty, selects the id and type fields.
1000
+ :type selected_term_fields: Iterable[str] | None
1001
+ :returns: A list of term instances. Returns an empty list if no matches are found.
1002
+ :rtype: list[DataDescriptor]
1003
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1004
+ """
1005
+ result: list[DataDescriptor] = list()
1006
+ if connection := _get_project_connection(project_id):
1007
+ with connection.create_session() as session:
1008
+ pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
1009
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1010
+ return result
888
1011
 
889
- :returns: A list of project ids.
890
- :rtype: list[str]
1012
+
1013
+ def find_terms_in_all_projects(
1014
+ expression: str,
1015
+ only_id: bool = False,
1016
+ limit: int | None = None,
1017
+ offset: int | None = None,
1018
+ selected_term_fields: Iterable[str] | None = None,
1019
+ ) -> list[tuple[str, list[DataDescriptor]]]:
891
1020
  """
892
- return list(service.current_state.projects.keys())
1021
+ Find terms in all projects based on a full text search defined by the given `expression`.
1022
+ The `expression` can be composed of one or multiple keywords.
1023
+ The keywords can combined with boolean operators: `AND`,
1024
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
1025
+ if no boolean operators is provided, whitespaces are handled as if there were
1026
+ an implicit AND operator between each pair of keywords. Note that this
1027
+ function does not provide any priority operator (parenthesis).
1028
+ Keywords can define prefixes when adding a `*` at the end of them.
1029
+ If the expression is composed of only one keyword, the function
1030
+ automatically defines it as a prefix.
1031
+ The function returns a list of project ids and term instances, sorted according to the
1032
+ bm25 ranking metric (list index `0` has the highest rank).
1033
+ If the provided `expression` does not hit any term, the function returns an empty list.
1034
+ The function searches for the `expression` in the term specifications.
1035
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1036
+ terms.
1037
+
1038
+ :param expression: The full text search expression.
1039
+ :type expression: str
1040
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1041
+ :type only_id: bool
1042
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1043
+ `limit` is either `None`, zero or negative.
1044
+ :type limit: int | None
1045
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1046
+ either `None`, zero or negative.
1047
+ :type offset: int | None
1048
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
1049
+ fields of the terms are returned. If empty, selects the id and type fields.
1050
+ :type selected_term_fields: Iterable[str] | None
1051
+ :returns: A list of project ids and term instances. Returns an empty list if no matches are found.
1052
+ :rtype: list[tuple[str, list[DataDescriptor]]]
1053
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1054
+ """
1055
+ result: list[tuple[str, list[DataDescriptor]]] = list()
1056
+ project_ids = get_all_projects()
1057
+ for project_id in project_ids:
1058
+ terms_found = find_terms_in_project(expression, project_id, only_id, limit, offset, selected_term_fields)
1059
+ if terms_found:
1060
+ result.append((project_id, terms_found))
1061
+ return result
893
1062
 
894
1063
 
895
- if __name__ == "__main__":
896
- settings = SearchSettings()
897
- settings.selected_term_fields = ('id', 'drs_name')
898
- settings.case_sensitive = False
899
- matching_terms = find_terms_from_data_descriptor_in_all_projects('organisation', 'IpsL', settings)
900
- print(matching_terms)
1064
+ def find_items_in_project(
1065
+ expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
1066
+ ) -> list[Item]:
1067
+ """
1068
+ Find items, at the moment terms and collections, in the given project based on a full-text
1069
+ search defined by the given `expression`.
1070
+ The `expression` can be composed of one or multiple keywords.
1071
+ The keywords can combined with boolean operators: `AND`,
1072
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
1073
+ if no boolean operators is provided, whitespaces are handled as if there were
1074
+ an implicit AND operator between each pair of keywords. Note that this
1075
+ function does not provide any priority operator (parenthesis).
1076
+ Keywords can define prefixes when adding a `*` at the end of them.
1077
+ If the expression is composed of only one keyword, the function
1078
+ automatically defines it as a prefix.
1079
+ The function returns a list of item instances sorted according to the
1080
+ bm25 ranking metric (list index `0` has the highest rank).
1081
+ This function performs an exact match on the `project_id`,
1082
+ and does not search for similar or related projects.
1083
+ If the provided `expression` does not hit any item, or the provided `project_id` is not found,
1084
+ the function returns an empty list.
1085
+ The function searches for the `expression` in the term and collection specifications.
1086
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1087
+ terms and collections. **At the moment, `only_id` is set to `True` for the collections because
1088
+ they haven't got any description.**
1089
+
1090
+ :param expression: The full text search expression.
1091
+ :type expression: str
1092
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1093
+ :type only_id: bool
1094
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1095
+ `limit` is either `None`, zero or negative.
1096
+ :type limit: int | None
1097
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1098
+ either `None`, zero or negative.
1099
+ :type offset: int | None
1100
+ :returns: A list of item instances. Returns an empty list if no matches are found.
1101
+ :rtype: list[Item]
1102
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1103
+ """
1104
+ # TODO: execute union query when it will be possible to compute parent of terms and collections.
1105
+ result = list()
1106
+ if connection := _get_project_connection(project_id):
1107
+ with connection.create_session() as session:
1108
+ processed_expression = process_expression(expression)
1109
+ if only_id:
1110
+ collection_column = col(PCollectionFTS5.id)
1111
+ term_column = col(PTermFTS5.id)
1112
+ else:
1113
+ collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
1114
+ term_column = col(PTermFTS5.specs) # type: ignore
1115
+ collection_where_condition = collection_column.match(processed_expression)
1116
+ collection_statement = select(PCollectionFTS5.id,
1117
+ text("'collection' AS TYPE"),
1118
+ text(f"'{project_id}' AS TYPE"),
1119
+ text('rank')).where(collection_where_condition)
1120
+ term_where_condition = term_column.match(processed_expression)
1121
+ term_statement = select(PTermFTS5.id,
1122
+ text("'term' AS TYPE"),
1123
+ PCollection.id,
1124
+ text('rank')).join(PCollection) \
1125
+ .where(term_where_condition)
1126
+ result = execute_find_item_statements(session, processed_expression, collection_statement,
1127
+ term_statement, limit, offset)
1128
+ return result