esgvoc 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

esgvoc/api/projects.py CHANGED
@@ -1,31 +1,44 @@
1
1
  import re
2
- from collections.abc import Iterable, Sequence
2
+ from typing import Iterable, Sequence
3
3
 
4
- from sqlmodel import Session, and_, select
4
+ from sqlalchemy import text
5
+ from sqlmodel import Session, and_, col, select
5
6
 
6
7
  import esgvoc.api.universe as universe
7
8
  import esgvoc.core.constants as constants
8
9
  import esgvoc.core.service as service
9
- from esgvoc.api._utils import (APIException, get_universe_session,
10
- instantiate_pydantic_term,
11
- instantiate_pydantic_terms)
12
10
  from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
13
11
  from esgvoc.api.project_specs import ProjectSpecs
14
- from esgvoc.api.report import (ProjectTermError, UniverseTermError,
15
- ValidationReport)
16
- from esgvoc.api.search import (MatchingTerm, SearchSettings,
17
- _create_str_comparison_expression)
12
+ from esgvoc.api.report import ProjectTermError, UniverseTermError, ValidationReport
13
+ from esgvoc.api.search import (
14
+ Item,
15
+ MatchingTerm,
16
+ execute_find_item_statements,
17
+ execute_match_statement,
18
+ generate_matching_condition,
19
+ get_universe_session,
20
+ handle_rank_limit_offset,
21
+ instantiate_pydantic_term,
22
+ instantiate_pydantic_terms,
23
+ )
18
24
  from esgvoc.core.db.connection import DBConnection
19
25
  from esgvoc.core.db.models.mixins import TermKind
20
- from esgvoc.core.db.models.project import Collection, Project, PTerm
26
+ from esgvoc.core.db.models.project import (
27
+ Collection,
28
+ PCollectionFTS5,
29
+ Project,
30
+ PTerm,
31
+ PTermFTS5,
32
+ )
21
33
  from esgvoc.core.db.models.universe import UTerm
34
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
22
35
 
23
36
  # [OPTIMIZATION]
24
37
  _VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
25
- _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
38
+ _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError | ProjectTermError]] = dict()
26
39
 
27
40
 
28
- def _get_project_connection(project_id: str) -> DBConnection|None:
41
+ def _get_project_connection(project_id: str) -> DBConnection | None:
29
42
  if project_id in service.current_state.projects:
30
43
  return service.current_state.projects[project_id].db_connection
31
44
  else:
@@ -33,38 +46,36 @@ def _get_project_connection(project_id: str) -> DBConnection|None:
33
46
 
34
47
 
35
48
  def _get_project_session_with_exception(project_id: str) -> Session:
36
- if connection:=_get_project_connection(project_id):
49
+ if connection := _get_project_connection(project_id):
37
50
  project_session = connection.create_session()
38
51
  return project_session
39
52
  else:
40
- raise APIException(f'unable to find project {project_id}')
53
+ raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
41
54
 
42
55
 
43
56
  def _resolve_term(composite_term_part: dict,
44
57
  universe_session: Session,
45
- project_session: Session) -> UTerm|PTerm:
58
+ project_session: Session) -> UTerm | PTerm:
46
59
  # First find the term in the universe than in the current project
47
60
  term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
48
61
  term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
49
- uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
50
- term_id=term_id,
51
- session=universe_session,
52
- settings=None)
53
- if uterms:
54
- return uterms[0]
62
+ uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
63
+ term_id=term_id,
64
+ session=universe_session)
65
+ if uterm:
66
+ return uterm
55
67
  else:
56
- pterms = _find_terms_in_collection(collection_id=term_type,
57
- term_id=term_id,
58
- session=project_session,
59
- settings=None)
60
- if pterms:
61
- return pterms[0]
68
+ pterm = _get_term_in_collection(collection_id=term_type,
69
+ term_id=term_id,
70
+ session=project_session)
71
+ if pterm:
72
+ return pterm
62
73
  else:
63
- msg = f'unable to find the term {term_id} in {term_type}'
64
- raise RuntimeError(msg)
74
+ msg = f"unable to find the term '{term_id}' in '{term_type}'"
75
+ raise EsgvocNotFoundError(msg)
65
76
 
66
77
 
67
- def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
78
+ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
68
79
  separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
69
80
  parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
70
81
  return separator, parts
@@ -73,10 +84,10 @@ def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
73
84
  # TODO: support optionality of parts of composite.
74
85
  # It is backtrack possible for more than one missing parts.
75
86
  def _valid_value_composite_term_with_separator(value: str,
76
- term: UTerm|PTerm,
87
+ term: UTerm | PTerm,
77
88
  universe_session: Session,
78
89
  project_session: Session)\
79
- -> list[UniverseTermError|ProjectTermError]:
90
+ -> list[UniverseTermError | ProjectTermError]:
80
91
  result = list()
81
92
  separator, parts = _get_composite_term_separator_parts(term)
82
93
  if separator in value:
@@ -99,7 +110,7 @@ def _valid_value_composite_term_with_separator(value: str,
99
110
  return result
100
111
 
101
112
 
102
- def _transform_to_pattern(term: UTerm|PTerm,
113
+ def _transform_to_pattern(term: UTerm | PTerm,
103
114
  universe_session: Session,
104
115
  project_session: Session) -> str:
105
116
  match term.kind:
@@ -107,12 +118,12 @@ def _transform_to_pattern(term: UTerm|PTerm,
107
118
  if constants.DRS_SPECS_JSON_KEY in term.specs:
108
119
  result = term.specs[constants.DRS_SPECS_JSON_KEY]
109
120
  else:
110
- raise APIException(f"the term {term.id} doesn't have drs name. " +
111
- "Can't validate it.")
121
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
122
+ "Can't validate it.")
112
123
  case TermKind.PATTERN:
113
124
  result = term.specs[constants.PATTERN_JSON_KEY]
114
125
  case TermKind.COMPOSITE:
115
- separator, parts = _get_composite_term_separator_parts(term)
126
+ separator, parts = _get_composite_term_separator_parts(term)
116
127
  result = ""
117
128
  for part in parts:
118
129
  resolved_term = _resolve_term(part, universe_session, project_session)
@@ -120,17 +131,17 @@ def _transform_to_pattern(term: UTerm|PTerm,
120
131
  result = f'{result}{pattern}{separator}'
121
132
  result = result.rstrip(separator)
122
133
  case _:
123
- raise RuntimeError(f'unsupported term kind {term.kind}')
134
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
124
135
  return result
125
136
 
126
137
 
127
138
  # TODO: support optionality of parts of composite.
128
139
  # It is backtrack possible for more than one missing parts.
129
140
  def _valid_value_composite_term_separator_less(value: str,
130
- term: UTerm|PTerm,
141
+ term: UTerm | PTerm,
131
142
  universe_session: Session,
132
143
  project_session: Session)\
133
- -> list[UniverseTermError|ProjectTermError]:
144
+ -> list[UniverseTermError | ProjectTermError]:
134
145
  result = list()
135
146
  try:
136
147
  pattern = _transform_to_pattern(term, universe_session, project_session)
@@ -143,22 +154,22 @@ def _valid_value_composite_term_separator_less(value: str,
143
154
  pattern = f'^{pattern}$'
144
155
  regex = re.compile(pattern)
145
156
  except Exception as e:
146
- msg = f'regex compilation error while processing term {term.id}:\n{e}'
147
- raise RuntimeError(msg) from e
157
+ msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
158
+ raise EsgvocDbError(msg) from e
148
159
  match = regex.match(value)
149
160
  if match is None:
150
161
  result.append(_create_term_error(value, term))
151
162
  return result
152
163
  except Exception as e:
153
- msg = f'cannot validate separator less composite term {term.id}:\n{e}'
154
- raise RuntimeError(msg) from e
164
+ msg = f"cannot validate separator less composite term '{term.id}':\n{e}"
165
+ raise EsgvocNotImplementedError(msg) from e
155
166
 
156
167
 
157
168
  def _valid_value_for_composite_term(value: str,
158
- term: UTerm|PTerm,
169
+ term: UTerm | PTerm,
159
170
  universe_session: Session,
160
171
  project_session: Session)\
161
- -> list[UniverseTermError|ProjectTermError]:
172
+ -> list[UniverseTermError | ProjectTermError]:
162
173
  result = list()
163
174
  separator, _ = _get_composite_term_separator_parts(term)
164
175
  if separator:
@@ -170,7 +181,7 @@ def _valid_value_for_composite_term(value: str,
170
181
  return result
171
182
 
172
183
 
173
- def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
184
+ def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
174
185
  if isinstance(term, UTerm):
175
186
  return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
176
187
  data_descriptor_id=term.data_descriptor.id)
@@ -180,9 +191,9 @@ def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|Proje
180
191
 
181
192
 
182
193
  def _valid_value(value: str,
183
- term: UTerm|PTerm,
194
+ term: UTerm | PTerm,
184
195
  universe_session: Session,
185
- project_session: Session) -> list[UniverseTermError|ProjectTermError]:
196
+ project_session: Session) -> list[UniverseTermError | ProjectTermError]:
186
197
  result = list()
187
198
  match term.kind:
188
199
  case TermKind.PLAIN:
@@ -190,10 +201,10 @@ def _valid_value(value: str,
190
201
  if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
191
202
  result.append(_create_term_error(value, term))
192
203
  else:
193
- raise APIException(f"the term {term.id} doesn't have drs name. " +
194
- "Can't validate it.")
204
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
205
+ "Can't validate it.")
195
206
  case TermKind.PATTERN:
196
- # OPTIM: Pattern can be compiled and stored for further matching.
207
+ # TODO: Pattern can be compiled and stored for further matching.
197
208
  pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
198
209
  if pattern_match is None:
199
210
  result.append(_create_term_error(value, term))
@@ -202,13 +213,13 @@ def _valid_value(value: str,
202
213
  universe_session,
203
214
  project_session))
204
215
  case _:
205
- raise RuntimeError(f'unsupported term kind {term.kind}')
216
+ raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
206
217
  return result
207
218
 
208
219
 
209
220
  def _check_value(value: str) -> str:
210
221
  if not value or value.isspace():
211
- raise APIException('value should be set')
222
+ raise EsgvocValueError('value should be set')
212
223
  else:
213
224
  return value
214
225
 
@@ -216,7 +227,7 @@ def _check_value(value: str) -> str:
216
227
  def _search_plain_term_and_valid_value(value: str,
217
228
  collection_id: str,
218
229
  project_session: Session) \
219
- -> str|None:
230
+ -> str | None:
220
231
  where_expression = and_(Collection.id == collection_id,
221
232
  PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
222
233
  statement = select(PTerm).join(Collection).where(where_expression)
@@ -239,7 +250,7 @@ def _valid_value_against_all_terms_of_collection(value: str,
239
250
  result.append(pterm.id)
240
251
  return result
241
252
  else:
242
- raise RuntimeError(f'collection {collection.id} has no term')
253
+ raise EsgvocDbError(f"collection '{collection.id}' has no term")
243
254
 
244
255
 
245
256
  def _valid_value_against_given_term(value: str,
@@ -248,22 +259,20 @@ def _valid_value_against_given_term(value: str,
248
259
  term_id: str,
249
260
  universe_session: Session,
250
261
  project_session: Session)\
251
- -> list[UniverseTermError|ProjectTermError]:
252
- # [OPTIMIZATION]
262
+ -> list[UniverseTermError | ProjectTermError]:
263
+ # [OPTIMIZATION]
253
264
  key = value + project_id + collection_id + term_id
254
265
  if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
255
266
  result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
256
267
  else:
257
- terms = _find_terms_in_collection(collection_id,
258
- term_id,
259
- project_session,
260
- None)
261
- if terms:
262
- term = terms[0]
268
+ term = _get_term_in_collection(collection_id,
269
+ term_id,
270
+ project_session)
271
+ if term:
263
272
  result = _valid_value(value, term, universe_session, project_session)
264
273
  else:
265
- raise APIException(f'unable to find term {term_id} ' +
266
- f'in collection {collection_id}')
274
+ raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
275
+ f"in collection '{collection_id}'")
267
276
  _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
268
277
  return result
269
278
 
@@ -288,7 +297,7 @@ def valid_term(value: str,
288
297
  composite so as to compare it as a regex to the value.
289
298
 
290
299
  If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
291
- the function raises a APIException.
300
+ the function raises a EsgvocNotFoundError.
292
301
 
293
302
  :param value: A value to be validated
294
303
  :type value: str
@@ -300,7 +309,7 @@ def valid_term(value: str,
300
309
  :type term_id: str
301
310
  :returns: A validation report that contains the possible errors
302
311
  :rtype: ValidationReport
303
- :raises APIException: If any of the provided ids is not found
312
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
304
313
  """
305
314
  value = _check_value(value)
306
315
  with get_universe_session() as universe_session, \
@@ -316,18 +325,15 @@ def _valid_term_in_collection(value: str,
316
325
  universe_session: Session,
317
326
  project_session: Session) \
318
327
  -> list[MatchingTerm]:
319
- # [OPTIMIZATION]
328
+ # [OPTIMIZATION]
320
329
  key = value + project_id + collection_id
321
330
  if key in _VALID_TERM_IN_COLLECTION_CACHE:
322
331
  result = _VALID_TERM_IN_COLLECTION_CACHE[key]
323
332
  else:
324
333
  value = _check_value(value)
325
334
  result = list()
326
- collections = _find_collections_in_project(collection_id,
327
- project_session,
328
- None)
329
- if collections:
330
- collection = collections[0]
335
+ collection = _get_collection_in_project(collection_id, project_session)
336
+ if collection:
331
337
  match collection.term_kind:
332
338
  case TermKind.PLAIN:
333
339
  term_id_found = _search_plain_term_and_valid_value(value, collection_id,
@@ -345,8 +351,8 @@ def _valid_term_in_collection(value: str,
345
351
  collection_id=collection_id,
346
352
  term_id=term_id_found))
347
353
  else:
348
- msg = f'unable to find collection {collection_id}'
349
- raise APIException(msg)
354
+ msg = f"unable to find collection '{collection_id}'"
355
+ raise EsgvocNotFoundError(msg)
350
356
  _VALID_TERM_IN_COLLECTION_CACHE[key] = result
351
357
  return result
352
358
 
@@ -370,7 +376,7 @@ def valid_term_in_collection(value: str,
370
376
  composite so as to compare it as a regex to the value.
371
377
 
372
378
  If any of the provided ids (`project_id` or `collection_id`) is not found,
373
- the function raises a APIException.
379
+ the function raises a EsgvocNotFoundError.
374
380
 
375
381
  :param value: A value to be validated
376
382
  :type value: str
@@ -380,7 +386,7 @@ def valid_term_in_collection(value: str,
380
386
  :type collection_id: str
381
387
  :returns: The list of terms that the value matches.
382
388
  :rtype: list[MatchingTerm]
383
- :raises APIException: If any of the provided ids is not found
389
+ :raises EsgvocNotFoundError: If any of the provided ids is not found
384
390
  """
385
391
  with get_universe_session() as universe_session, \
386
392
  _get_project_session_with_exception(project_id) as project_session:
@@ -415,7 +421,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
415
421
  - if the composite hasn't got a separator, the function aggregates the parts of the \
416
422
  composite so as to compare it as a regex to the value.
417
423
 
418
- If the `project_id` is not found, the function raises a APIException.
424
+ If the `project_id` is not found, the function raises a EsgvocNotFoundError.
419
425
 
420
426
  :param value: A value to be validated
421
427
  :type value: str
@@ -423,7 +429,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
423
429
  :type project_id: str
424
430
  :returns: The list of terms that the value matches.
425
431
  :rtype: list[MatchingTerm]
426
- :raises APIException: If the `project_id` is not found
432
+ :raises EsgvocNotFoundError: If the `project_id` is not found
427
433
  """
428
434
  with get_universe_session() as universe_session, \
429
435
  _get_project_session_with_exception(project_id) as project_session:
@@ -459,246 +465,14 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
459
465
  return result
460
466
 
461
467
 
462
- def _find_terms_in_collection(collection_id: str,
463
- term_id: str,
464
- session: Session,
465
- settings: SearchSettings|None = None) -> Sequence[PTerm]:
466
- # Settings only apply on the term_id comparison.
467
- where_expression = _create_str_comparison_expression(field=PTerm.id,
468
- value=term_id,
469
- settings=settings)
470
- statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
471
- where_expression)
472
- results = session.exec(statement)
473
- result = results.all()
474
- return result
475
-
476
-
477
- def find_terms_in_collection(project_id:str,
478
- collection_id: str,
479
- term_id: str,
480
- settings: SearchSettings|None = None) \
481
- -> list[DataDescriptor]:
482
- """
483
- Finds one or more terms, based on the specified search settings, in the given collection of a project.
484
- This function performs an exact match on the `project_id` and `collection_id`,
485
- and does **not** search for similar or related projects and collections.
486
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
487
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
488
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
489
- If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
490
- the function returns an empty list.
491
-
492
- Behavior based on search type:
493
- - `EXACT` and absence of `settings`: returns zero or one term instance in the list.
494
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
495
- term instances in the list.
496
-
497
- :param project_id: A project id
498
- :type project_id: str
499
- :param collection_id: A collection
500
- :type collection_id: str
501
- :param term_id: A term id to be found
502
- :type term_id: str
503
- :param settings: The search settings
504
- :type settings: SearchSettings|None
505
- :returns: A list of term instances. Returns an empty list if no matches are found.
506
- :rtype: list[DataDescriptor]
507
- """
508
- result: list[DataDescriptor] = list()
509
- if connection:=_get_project_connection(project_id):
510
- with connection.create_session() as session:
511
- terms = _find_terms_in_collection(collection_id, term_id, session, settings)
512
- instantiate_pydantic_terms(terms, result,
513
- settings.selected_term_fields if settings else None)
514
- return result
515
-
516
-
517
- def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
518
- term_id: str,
519
- session: Session,
520
- settings: SearchSettings|None = None) \
521
- -> Sequence[PTerm]:
522
- # Settings only apply on the term_id comparison.
523
- where_expression = _create_str_comparison_expression(field=PTerm.id,
524
- value=term_id,
525
- settings=settings)
526
- statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
527
- where_expression)
528
- results = session.exec(statement)
529
- result = results.all()
530
- return result
531
-
532
-
533
- def find_terms_from_data_descriptor_in_project(project_id: str,
534
- data_descriptor_id: str,
535
- term_id: str,
536
- settings: SearchSettings|None = None) \
537
- -> list[tuple[DataDescriptor, str]]:
538
- """
539
- Finds one or more terms in the given project which are instances of the given data descriptor
540
- in the universe, based on the specified search settings, in the given collection of a project.
541
- This function performs an exact match on the `project_id` and `data_descriptor_id`,
542
- and does **not** search for similar or related projects and data descriptors.
543
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
544
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
545
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
546
- If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
547
- the function returns an empty list.
548
-
549
- Behavior based on search type:
550
- - `EXACT` and absence of `settings`: returns zero or one term instance and \
551
- collection id in the list.
552
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
553
- term instances and collection ids in the list.
554
-
555
- :param project_id: A project id
556
- :type project_id: str
557
- :param data_descriptor_id: A data descriptor
558
- :type data_descriptor_id: str
559
- :param term_id: A term id to be found
560
- :type term_id: str
561
- :param settings: The search settings
562
- :type settings: SearchSettings|None
563
- :returns: A list of tuple of term instances and related collection ids. \
564
- Returns an empty list if no matches are found.
565
- :rtype: list[tuple[DataDescriptor, str]]
566
- """
567
- result = list()
568
- if connection:=_get_project_connection(project_id):
569
- with connection.create_session() as session:
570
- terms = _find_terms_from_data_descriptor_in_project(data_descriptor_id,
571
- term_id,
572
- session,
573
- settings)
574
- for pterm in terms:
575
- collection_id = pterm.collection.id
576
- term = instantiate_pydantic_term(pterm,
577
- settings.selected_term_fields if settings else None)
578
- result.append((term, collection_id))
579
- return result
580
-
581
-
582
- def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
583
- term_id: str,
584
- settings: SearchSettings|None = None) \
585
- -> list[tuple[list[tuple[DataDescriptor, str]], str]]:
586
- """
587
- Finds one or more terms in all projects which are instances of the given data descriptor
588
- in the universe, based on the specified search settings, in the given collection of a project.
589
- This function performs an exact match on the `data_descriptor_id`,
590
- and does **not** search for similar or related data descriptors.
591
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
592
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
593
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
594
- If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
595
- the function returns an empty list.
596
-
597
- Behavior based on search type:
598
- - `EXACT` and absence of `settings`: returns zero or one term instance and \
599
- collection id in the list.
600
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
601
- term instances and collection ids in the list.
602
-
603
- :param data_descriptor_id: A data descriptor
604
- :type data_descriptor_id: str
605
- :param term_id: A term id to be found
606
- :type term_id: str
607
- :param settings: The search settings
608
- :type settings: SearchSettings|None
609
- :returns: A list of tuple of matching terms with their collection id, per project. \
610
- Returns an empty list if no matches are found.
611
- :rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
612
- """
613
- project_ids = get_all_projects()
614
- result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
615
- for project_id in project_ids:
616
- matching_terms = find_terms_from_data_descriptor_in_project(project_id,
617
- data_descriptor_id,
618
- term_id,
619
- settings)
620
- if matching_terms:
621
- result.append((matching_terms, project_id))
622
- return result
623
-
624
-
625
- def _find_terms_in_project(term_id: str,
626
- session: Session,
627
- settings: SearchSettings|None) -> Sequence[PTerm]:
628
- where_expression = _create_str_comparison_expression(field=PTerm.id,
629
- value=term_id,
630
- settings=settings)
631
- statement = select(PTerm).where(where_expression)
632
- results = session.exec(statement).all()
633
- return results
634
-
635
-
636
- def find_terms_in_all_projects(term_id: str,
637
- settings: SearchSettings|None = None) \
638
- -> list[DataDescriptor]:
639
- """
640
- Finds one or more terms, based on the specified search settings, in all projects.
641
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
642
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
643
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
644
- Terms are unique within a collection but may have some synonyms within a project.
645
- If the provided `term_id` is not found, the function returns an empty list.
646
-
647
- :param term_id: A term id to be found
648
- :type term_id: str
649
- :param settings: The search settings
650
- :type settings: SearchSettings|None
651
- :returns: A list of term instances. Returns an empty list if no matches are found.
652
- :rtype: list[DataDescriptor]
653
- """
654
- project_ids = get_all_projects()
655
- result = list()
656
- for project_id in project_ids:
657
- result.extend(find_terms_in_project(project_id, term_id, settings))
658
- return result
659
-
660
-
661
- def find_terms_in_project(project_id: str,
662
- term_id: str,
663
- settings: SearchSettings|None = None) \
664
- -> list[DataDescriptor]:
665
- """
666
- Finds one or more terms, based on the specified search settings, in a project.
667
- This function performs an exact match on the `project_id` and
668
- does **not** search for similar or related projects.
669
- The given `term_id` is searched according to the search type specified in the parameter `settings`,
670
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
671
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
672
- Terms are unique within a collection but may have some synonyms within a project.
673
- If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
674
- an empty list.
675
-
676
- :param project_id: A project id
677
- :type project_id: str
678
- :param term_id: A term id to be found
679
- :type term_id: str
680
- :param settings: The search settings
681
- :type settings: SearchSettings|None
682
- :returns: A list of term instances. Returns an empty list if no matches are found.
683
- :rtype: list[DataDescriptor]
684
- """
685
- result: list[DataDescriptor] = list()
686
- if connection:=_get_project_connection(project_id):
687
- with connection.create_session() as session:
688
- terms = _find_terms_in_project(term_id, session, settings)
689
- instantiate_pydantic_terms(terms, result,
690
- settings.selected_term_fields if settings else None)
691
- return result
692
-
693
-
694
468
  def get_all_terms_in_collection(project_id: str,
695
469
  collection_id: str,
696
- selected_term_fields: Iterable[str]|None = None)\
470
+ selected_term_fields: Iterable[str] | None = None)\
697
471
  -> list[DataDescriptor]:
698
472
  """
699
473
  Gets all terms of the given collection of a project.
700
474
  This function performs an exact match on the `project_id` and `collection_id`,
701
- and does **not** search for similar or related projects and collections.
475
+ and does not search for similar or related projects and collections.
702
476
  If any of the provided ids (`project_id` or `collection_id`) is not found, the function
703
477
  returns an empty list.
704
478
 
@@ -707,87 +481,31 @@ def get_all_terms_in_collection(project_id: str,
707
481
  :param collection_id: A collection id
708
482
  :type collection_id: str
709
483
  :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
710
- fields of the terms are returned.
711
- :type selected_term_fields: Iterable[str]|None
484
+ fields of the terms are returned. If empty, selects the id and type fields.
485
+ :type selected_term_fields: Iterable[str] | None
712
486
  :returns: a list of term instances. Returns an empty list if no matches are found.
713
487
  :rtype: list[DataDescriptor]
714
488
  """
715
489
  result = list()
716
- if connection:=_get_project_connection(project_id):
490
+ if connection := _get_project_connection(project_id):
717
491
  with connection.create_session() as session:
718
- collections = _find_collections_in_project(collection_id,
719
- session,
720
- None)
721
- if collections:
722
- collection = collections[0]
492
+ collection = _get_collection_in_project(collection_id, session)
493
+ if collection:
723
494
  result = _get_all_terms_in_collection(collection, selected_term_fields)
724
495
  return result
725
496
 
726
497
 
727
- def _find_collections_in_project(collection_id: str,
728
- session: Session,
729
- settings: SearchSettings|None) \
730
- -> Sequence[Collection]:
731
- where_exp = _create_str_comparison_expression(field=Collection.id,
732
- value=collection_id,
733
- settings=settings)
734
- statement = select(Collection).where(where_exp)
735
- results = session.exec(statement)
736
- result = results.all()
737
- return result
738
-
739
-
740
- def find_collections_in_project(project_id: str,
741
- collection_id: str,
742
- settings: SearchSettings|None = None) \
743
- -> list[dict]:
744
- """
745
- Finds one or more collections of the given project.
746
- This function performs an exact match on the `project_id` and
747
- does **not** search for similar or related projects.
748
- The given `collection_id` is searched according to the search type specified in
749
- the parameter `settings`,
750
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
751
- If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
752
- If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
753
- an empty list.
754
-
755
- Behavior based on search type:
756
- - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
757
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
758
- collection contexts in the list.
759
-
760
- :param project_id: A project id
761
- :type project_id: str
762
- :param collection_id: A collection id to be found
763
- :type collection_id: str
764
- :param settings: The search settings
765
- :type settings: SearchSettings|None
766
- :returns: A list of collection contexts. Returns an empty list if no matches are found.
767
- :rtype: list[dict]
768
- """
769
- result = list()
770
- if connection:=_get_project_connection(project_id):
771
- with connection.create_session() as session:
772
- collections = _find_collections_in_project(collection_id,
773
- session,
774
- settings)
775
- for collection in collections:
776
- result.append(collection.context)
777
- return result
778
-
779
-
780
498
  def _get_all_collections_in_project(session: Session) -> list[Collection]:
781
499
  project = session.get(Project, constants.SQLITE_FIRST_PK)
782
500
  # Project can't be missing if session exists.
783
- return project.collections # type: ignore
501
+ return project.collections # type: ignore
784
502
 
785
503
 
786
504
  def get_all_collections_in_project(project_id: str) -> list[str]:
787
505
  """
788
506
  Gets all collections of the given project.
789
507
  This function performs an exact match on the `project_id` and
790
- does **not** search for similar or related projects.
508
+ does not search for similar or related projects.
791
509
  If the provided `project_id` is not found, the function returns an empty list.
792
510
 
793
511
  :param project_id: A project id
@@ -796,7 +514,7 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
796
514
  :rtype: list[str]
797
515
  """
798
516
  result = list()
799
- if connection:=_get_project_connection(project_id):
517
+ if connection := _get_project_connection(project_id):
800
518
  with connection.create_session() as session:
801
519
  collections = _get_all_collections_in_project(session)
802
520
  for collection in collections:
@@ -805,31 +523,31 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
805
523
 
806
524
 
807
525
  def _get_all_terms_in_collection(collection: Collection,
808
- selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
526
+ selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
809
527
  result: list[DataDescriptor] = list()
810
528
  instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
811
529
  return result
812
530
 
813
531
 
814
532
  def get_all_terms_in_project(project_id: str,
815
- selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
533
+ selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
816
534
  """
817
535
  Gets all terms of the given project.
818
536
  This function performs an exact match on the `project_id` and
819
- does **not** search for similar or related projects.
537
+ does not search for similar or related projects.
820
538
  Terms are unique within a collection but may have some synonyms in a project.
821
539
  If the provided `project_id` is not found, the function returns an empty list.
822
540
 
823
541
  :param project_id: A project id
824
542
  :type project_id: str
825
543
  :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
826
- fields of the terms are returned.
827
- :type selected_term_fields: Iterable[str]|None
544
+ fields of the terms are returned. If empty, selects the id and type fields.
545
+ :type selected_term_fields: Iterable[str] | None
828
546
  :returns: A list of term instances. Returns an empty list if no matches are found.
829
547
  :rtype: list[DataDescriptor]
830
548
  """
831
549
  result = list()
832
- if connection:=_get_project_connection(project_id):
550
+ if connection := _get_project_connection(project_id):
833
551
  with connection.create_session() as session:
834
552
  collections = _get_all_collections_in_project(session)
835
553
  for collection in collections:
@@ -838,14 +556,14 @@ def get_all_terms_in_project(project_id: str,
838
556
  return result
839
557
 
840
558
 
841
- def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
559
+ def get_all_terms_in_all_projects(selected_term_fields: Iterable[str] | None = None) \
842
560
  -> list[tuple[str, list[DataDescriptor]]]:
843
561
  """
844
562
  Gets all terms of all projects.
845
563
 
846
564
  :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
847
- fields of the terms are returned.
848
- :type selected_term_fields: Iterable[str]|None
565
+ fields of the terms are returned. If empty, selects the id and type fields.
566
+ :type selected_term_fields: Iterable[str] | None
849
567
  :returns: A list of tuple project_id and term instances of that project.
850
568
  :rtype: list[tuple[str, list[DataDescriptor]]]
851
569
  """
@@ -857,44 +575,591 @@ def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = Non
857
575
  return result
858
576
 
859
577
 
860
- def find_project(project_id: str) -> ProjectSpecs|None:
578
+ def get_all_projects() -> list[str]:
579
+ """
580
+ Gets all projects.
581
+
582
+ :returns: A list of project ids.
583
+ :rtype: list[str]
584
+ """
585
+ return list(service.current_state.projects.keys())
586
+
587
+
588
+ def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
589
+ statement = select(PTerm).where(PTerm.id == term_id)
590
+ results = session.exec(statement)
591
+ result = results.first() # Term ids are not supposed to be unique within a project.
592
+ return result
593
+
594
+
595
+ def get_term_in_project(project_id: str, term_id: str,
596
+ selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
597
+ """
598
+ Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
599
+ the given term id.
600
+ Terms are unique within a collection but may have some synonyms in a project.
601
+ This function performs an exact match on the `project_id` and `term_id`, and does not search
602
+ for similar or related projects and terms.
603
+ If any of the provided ids (`project_id` or `term_id`) is not found,
604
+ the function returns `None`.
605
+
606
+ :param project_id: The id of the given project.
607
+ :type project_id: str
608
+ :param term_id: The id of a term to be found.
609
+ :type term_id: str
610
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
611
+ fields of the terms are returned. If empty, selects the id and type fields.
612
+ :type selected_term_fields: Iterable[str] | None
613
+ :returns: A term instance. Returns `None` if no match is found.
614
+ :rtype: DataDescriptor | None
615
+ """
616
+ result: DataDescriptor | None = None
617
+ if connection := _get_project_connection(project_id):
618
+ with connection.create_session() as session:
619
+ term_found = _get_term_in_project(term_id, session)
620
+ if term_found:
621
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
622
+ return result
623
+
624
+
625
+ def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
626
+ statement = select(PTerm).join(Collection).where(Collection.id == collection_id,
627
+ PTerm.id == term_id)
628
+ results = session.exec(statement)
629
+ result = results.one_or_none()
630
+ return result
631
+
632
+
633
+ def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
634
+ selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
635
+ """
636
+ Returns the term, in the given project and collection,
637
+ whose id corresponds exactly to the given term id.
638
+ This function performs an exact match on the `project_id`, `collection_id` and `term_id`,
639
+ and does not search for similar or related projects, collections and terms.
640
+ If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
641
+ the function returns `None`.
642
+
643
+ :param project_id: The id of the given project.
644
+ :type project_id: str
645
+ :param collection_id: The id of the given collection.
646
+ :type collection_id: str
647
+ :param term_id: The id of a term to be found.
648
+ :type term_id: str
649
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
650
+ fields of the terms are returned. If empty, selects the id and type fields.
651
+ :type selected_term_fields: Iterable[str] | None
652
+ :returns: A term instance. Returns `None` if no match is found.
653
+ :rtype: DataDescriptor | None
654
+ """
655
+ result: DataDescriptor | None = None
656
+ if connection := _get_project_connection(project_id):
657
+ with connection.create_session() as session:
658
+ term_found = _get_term_in_collection(collection_id, term_id, session)
659
+ if term_found:
660
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
661
+ return result
662
+
663
+
664
+ def _get_collection_in_project(collection_id: str, session: Session) -> Collection | None:
665
+ statement = select(Collection).where(Collection.id == collection_id)
666
+ results = session.exec(statement)
667
+ result = results.one_or_none()
668
+ return result
669
+
670
+
671
+ def get_collection_in_project(project_id: str, collection_id: str) -> tuple[str, dict] | None:
861
672
  """
862
- Finds a project and returns its specifications.
673
+ Returns the collection, in the given project, whose id corresponds exactly to
674
+ the given collection id.
675
+ This function performs an exact match on the `project_id` and `collection_id`, and does not search
676
+ for similar or related projects and collections.
677
+ If any of the provided ids (`project_id` or `collection_id`) is not found,
678
+ the function returns `None`.
679
+
680
+ :param project_id: The id of the given project.
681
+ :type project_id: str
682
+ :param collection_id: The id of a collection to be found.
683
+ :type collection_id: str
684
+ :returns: A collection id and context. Returns `None` if no match is found.
685
+ :rtype: tuple[str, dict] | None
686
+ """
687
+ result: tuple[str, dict] | None = None
688
+ if connection := _get_project_connection(project_id):
689
+ with connection.create_session() as session:
690
+ collection_found = _get_collection_in_project(collection_id, session)
691
+ if collection_found:
692
+ result = collection_found.id, collection_found.context
693
+ return result
694
+
695
+
696
+ def get_project(project_id: str) -> ProjectSpecs | None:
697
+ """
698
+ Get a project and returns its specifications.
863
699
  This function performs an exact match on the `project_id` and
864
- does **not** search for similar or related projects.
700
+ does not search for similar or related projects.
865
701
  If the provided `project_id` is not found, the function returns `None`.
866
702
 
867
703
  :param project_id: A project id to be found
868
704
  :type project_id: str
869
705
  :returns: The specs of the project found. Returns `None` if no matches are found.
870
- :rtype: ProjectSpecs|None
706
+ :rtype: ProjectSpecs | None
871
707
  """
872
- result: ProjectSpecs|None = None
873
- if connection:=_get_project_connection(project_id):
708
+ result: ProjectSpecs | None = None
709
+ if connection := _get_project_connection(project_id):
874
710
  with connection.create_session() as session:
875
711
  project = session.get(Project, constants.SQLITE_FIRST_PK)
876
712
  try:
877
713
  # Project can't be missing if session exists.
878
- result = ProjectSpecs(**project.specs) # type: ignore
714
+ result = ProjectSpecs(**project.specs) # type: ignore
879
715
  except Exception as e:
880
- msg = f'Unable to read specs in project {project_id}'
881
- raise RuntimeError(msg) from e
716
+ msg = f"unable to read specs in project '{project_id}'"
717
+ raise EsgvocDbError(msg) from e
882
718
  return result
883
719
 
884
720
 
885
- def get_all_projects() -> list[str]:
721
+ def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
722
+ session: Session) -> Collection | None:
723
+ statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
724
+ result = session.exec(statement).one_or_none()
725
+ return result
726
+
727
+
728
+ def get_collection_from_data_descriptor_in_project(project_id: str,
729
+ data_descriptor_id: str) \
730
+ -> tuple[str, dict] | None:
886
731
  """
887
- Gets all projects.
732
+ Returns the collection, in the given project, that corresponds to the given data descriptor
733
+ in the universe.
734
+ This function performs an exact match on the `project_id` and `data_descriptor_id`,
735
+ and does not search for similar or related projects and data descriptors.
736
+ If any of the provided ids (`project_id` or `data_descriptor_id`) is not found, or if
737
+ there is no collection corresponding to the given data descriptor, the function returns `None`.
888
738
 
889
- :returns: A list of project ids.
890
- :rtype: list[str]
739
+ :param project_id: The id of the given project.
740
+ :type project_id: str
741
+ :param data_descriptor_id: The id of the given data descriptor.
742
+ :type data_descriptor_id: str
743
+ :returns: A collection id and context. Returns `None` if no matches are found.
744
+ :rtype: tuple[str, dict] | None
891
745
  """
892
- return list(service.current_state.projects.keys())
746
+ result: tuple[str, dict] | None = None
747
+ if connection := _get_project_connection(project_id):
748
+ with connection.create_session() as session:
749
+ collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
750
+ session)
751
+ if collection_found:
752
+ result = collection_found.id, collection_found.context
753
+ return result
754
+
755
+
756
+ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) \
757
+ -> list[tuple[str, str, dict]]:
758
+ """
759
+ Returns the collections, in all projects, that correspond to the given data descriptor
760
+ in the universe.
761
+ This function performs an exact match on `data_descriptor_id`,
762
+ and does not search for similar or related data descriptors.
763
+ If the provided `data_descriptor_id` is not found, or if
764
+ there is no collection corresponding to the given data descriptor, the function returns
765
+ an empty list.
766
+
767
+ :param data_descriptor_id: The id of the given data descriptor.
768
+ :type data_descriptor_id: str
769
+ :returns: A list of collection ids, their project_ids and contexts. \
770
+ Returns an empty list if no matches are found.
771
+ :rtype: list[tuple[str, str, dict]]
772
+ """
773
+ result = list()
774
+ project_ids = get_all_projects()
775
+ for project_id in project_ids:
776
+ collection_found = get_collection_from_data_descriptor_in_project(project_id,
777
+ data_descriptor_id)
778
+ if collection_found:
779
+ result.append((project_id, collection_found[0], collection_found[1]))
780
+ return result
781
+
782
+
783
+ def _get_term_from_universe_term_id_in_project(data_descriptor_id: str,
784
+ universe_term_id: str,
785
+ project_session: Session) -> PTerm | None:
786
+ statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id == data_descriptor_id,
787
+ PTerm.id == universe_term_id)
788
+ results = project_session.exec(statement)
789
+ result = results.one_or_none()
790
+ return result
791
+
792
+
793
+ def get_term_from_universe_term_id_in_project(project_id: str,
794
+ data_descriptor_id: str,
795
+ universe_term_id: str,
796
+ selected_term_fields: Iterable[str] | None = None) \
797
+ -> tuple[str, DataDescriptor] | None:
798
+ """
799
+ Returns the term, in the given project, that corresponds to the given term in the universe.
800
+ This function performs an exact match on the `project_id`, `data_descriptor_id`
801
+ and `universe_term_id`, and does not search for similar or related projects, data descriptors
802
+ and terms. If any of the provided ids (`project_id`, `data_descriptor_id` or `universe_term_id`)
803
+ is not found, or if there is no project term corresponding to the given universe term
804
+ the function returns `None`.
805
+
806
+ :param project_id: The id of the given project.
807
+ :type project_id: str
808
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
809
+ :type data_descriptor_id: str
810
+ :param universe_term_id: The id of the given universe term.
811
+ :type universe_term_id: str
812
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
813
+ fields of the terms are returned. If empty, selects the id and type fields.
814
+ :type selected_term_fields: Iterable[str] | None
815
+ :returns: A collection id and the project term instance. Returns `None` if no matches are found.
816
+ :rtype: tuple[str, DataDescriptor] | None
817
+ """
818
+ result: tuple[str, DataDescriptor] | None = None
819
+ if connection := _get_project_connection(project_id):
820
+ with connection.create_session() as session:
821
+ term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
822
+ universe_term_id,
823
+ session)
824
+ if term_found:
825
+ pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
826
+ result = (term_found.collection.id, pydantic_term)
827
+ return result
828
+
829
+
830
+ def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
831
+ universe_term_id: str,
832
+ selected_term_fields: Iterable[str] | None = None) \
833
+ -> list[tuple[str, str, DataDescriptor]]:
834
+ """
835
+ Returns the terms, in all projects, that correspond to the given term in the universe.
836
+ This function performs an exact match on the `data_descriptor_id`
837
+ and `universe_term_id`, and does not search for similar or related data descriptors
838
+ and terms. If any of the provided ids (`data_descriptor_id` or `universe_term_id`)
839
+ is not found, or if there is no project term corresponding to the given universe term
840
+ the function returns an empty list.
841
+
842
+ :param data_descriptor_id: The id of the data descriptor that contains the given universe term.
843
+ :type data_descriptor_id: str
844
+ :param universe_term_id: The id of the given universe term.
845
+ :type universe_term_id: str
846
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
847
+ fields of the terms are returned. If empty, selects the id and type fields.
848
+ :type selected_term_fields: Iterable[str] | None
849
+ :returns: A project_id, collection id and the project term instance. \
850
+ Returns an empty list if no matches are found.
851
+ :rtype: list[tuple[str, str, DataDescriptor]]
852
+ """
853
+ result: list[tuple[str, str, DataDescriptor]] = list()
854
+ project_ids = get_all_projects()
855
+ for project_id in project_ids:
856
+ term_found = get_term_from_universe_term_id_in_project(project_id,
857
+ data_descriptor_id,
858
+ universe_term_id,
859
+ selected_term_fields)
860
+ if term_found:
861
+ result.append((project_id, term_found[0], term_found[1]))
862
+ return result
863
+
864
+
865
+ def _find_collections_in_project(expression: str,
866
+ session: Session,
867
+ only_id: bool = False,
868
+ limit: int | None = None,
869
+ offset: int | None = None) -> Sequence[Collection]:
870
+ matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
871
+ tmp_statement = select(PCollectionFTS5).where(matching_condition)
872
+ statement = select(Collection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
873
+ return execute_match_statement(expression, statement, session)
874
+
875
+
876
+ def find_collections_in_project(expression: str, project_id: str,
877
+ only_id: bool = False,
878
+ limit: int | None = None,
879
+ offset: int | None = None) -> list[tuple[str, dict]]:
880
+ """
881
+ Find collections in the given project based on a full text search defined by the given `expression`.
882
+ The `expression` comes from the powerful
883
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
884
+ and corresponds to the expression of the `MATCH` operator.
885
+ It can be composed of one or multiple keywords combined with boolean
886
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
887
+ with the wildcard `*`.
888
+ The function returns a list of collection ids and contexts, sorted according to the
889
+ bm25 ranking metric (list index `0` has the highest rank).
890
+ This function performs an exact match on the `project_id`,
891
+ and does not search for similar or related projects.
892
+ If the provided `expression` does not hit any collection or the given `project_id` does not
893
+ match exactly to an id of a project, the function returns an empty list.
894
+ The function searches for the `expression` in the collection specifications.
895
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
896
+ collections. **At the moment, `only_id` is set to `True` as the collections
897
+ haven't got any description.**
898
+
899
+ :param expression: The full text search expression.
900
+ :type expression: str
901
+ :param project_id: The id of the given project.
902
+ :type project_id: str
903
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
904
+ :type only_id: bool
905
+ :param limit: Limit the number of returned items found. Returns all items found the if \
906
+ `limit` is either `None`, zero or negative.
907
+ :type limit: int | None
908
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
909
+ either `None`, zero or negative.
910
+ :type offset: int | None
911
+ :returns: A list of collection ids and contexts. Returns an empty list if no matches are found.
912
+ :rtype: list[tuple[str, dict]]
913
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
914
+ """
915
+ result: list[tuple[str, dict]] = list()
916
+ if connection := _get_project_connection(project_id):
917
+ with connection.create_session() as session:
918
+ collections_found = _find_collections_in_project(expression, session, only_id,
919
+ limit, offset)
920
+ for collection in collections_found:
921
+ result.append((collection.id, collection.context))
922
+ return result
923
+
924
+
925
+ def _find_terms_in_collection(expression: str,
926
+ collection_id: str,
927
+ session: Session,
928
+ only_id: bool = False,
929
+ limit: int | None = None,
930
+ offset: int | None = None) -> Sequence[PTerm]:
931
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
932
+ where_condition = Collection.id == collection_id, matching_condition
933
+ tmp_statement = select(PTermFTS5).join(Collection).where(*where_condition)
934
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
935
+ return execute_match_statement(expression, statement, session)
936
+
937
+
938
+ def _find_terms_in_project(expression: str,
939
+ session: Session,
940
+ only_id: bool = False,
941
+ limit: int | None = None,
942
+ offset: int | None = None) -> Sequence[PTerm]:
943
+ matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
944
+ tmp_statement = select(PTermFTS5).where(matching_condition)
945
+ statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
946
+ return execute_match_statement(expression, statement, session)
947
+
948
+
949
+ def find_terms_in_collection(expression: str, project_id: str,
950
+ collection_id: str,
951
+ only_id: bool = False,
952
+ limit: int | None = None,
953
+ offset: int | None = None,
954
+ selected_term_fields: Iterable[str] | None = None) \
955
+ -> list[DataDescriptor]:
956
+ """
957
+ Find terms in the given project and collection based on a full text search defined by the given
958
+ `expression`. The `expression` comes from the powerful
959
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
960
+ and corresponds to the expression of the `MATCH` operator.
961
+ It can be composed of one or multiple keywords combined with boolean
962
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
963
+ with the wildcard `*`.
964
+ The function returns a list of term instances, sorted according to the
965
+ bm25 ranking metric (list index `0` has the highest rank).
966
+ This function performs an exact match on the `project_id` and `collection_id`,
967
+ and does not search for similar or related projects and collections.
968
+ If the provided `expression` does not hit any term or if any of the provided ids
969
+ (`project_id` or `collection_id`) is not found, the function returns an empty list.
970
+ The function searches for the `expression` in the term specifications.
971
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
972
+ terms.
973
+
974
+ :param expression: The full text search expression.
975
+ :type expression: str
976
+ :param project_id: The id of the given project.
977
+ :type project_id: str
978
+ :param collection_id: The id of the given collection.
979
+ :type collection_id: str
980
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
981
+ :type only_id: bool
982
+ :param limit: Limit the number of returned items found. Returns all items found the if \
983
+ `limit` is either `None`, zero or negative.
984
+ :type limit: int | None
985
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
986
+ either `None`, zero or negative.
987
+ :type offset: int | None
988
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
989
+ fields of the terms are returned. If empty, selects the id and type fields.
990
+ :type selected_term_fields: Iterable[str] | None
991
+ :returns: A list of term instances. Returns an empty list if no matches are found.
992
+ :rtype: list[DataDescriptor]
993
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
994
+ """
995
+ result: list[DataDescriptor] = list()
996
+ if connection := _get_project_connection(project_id):
997
+ with connection.create_session() as session:
998
+ pterms_found = _find_terms_in_collection(expression, collection_id, session,
999
+ only_id, limit, offset)
1000
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1001
+ return result
1002
+
1003
+
1004
+ def find_terms_in_project(expression: str,
1005
+ project_id: str,
1006
+ only_id: bool = False,
1007
+ limit: int | None = None,
1008
+ offset: int | None = None,
1009
+ selected_term_fields: Iterable[str] | None = None) \
1010
+ -> list[DataDescriptor]:
1011
+ """
1012
+ Find terms in the given project on a full text search defined by the given
1013
+ `expression`. The `expression` comes from the powerful
1014
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1015
+ and corresponds to the expression of the `MATCH` operator.
1016
+ It can be composed of one or multiple keywords combined with boolean
1017
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1018
+ with the wildcard `*`.
1019
+ The function returns a list of term instances, sorted according to the
1020
+ bm25 ranking metric (list index `0` has the highest rank).
1021
+ This function performs an exact match on the `project_id`,
1022
+ and does not search for similar or related projects.
1023
+ If the provided `expression` does not hit any term or if any of the provided `project_id` is
1024
+ not found, the function returns an empty list.
1025
+ The function searches for the `expression` in the term specifications.
1026
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1027
+ terms.
1028
+
1029
+ :param expression: The full text search expression.
1030
+ :type expression: str
1031
+ :param project_id: The id of the given project.
1032
+ :type project_id: str
1033
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1034
+ :type only_id: bool
1035
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1036
+ `limit` is either `None`, zero or negative.
1037
+ :type limit: int | None
1038
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1039
+ either `None`, zero or negative.
1040
+ :type offset: int | None
1041
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
1042
+ fields of the terms are returned. If empty, selects the id and type fields.
1043
+ :type selected_term_fields: Iterable[str] | None
1044
+ :returns: A list of term instances. Returns an empty list if no matches are found.
1045
+ :rtype: list[DataDescriptor]
1046
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1047
+ """
1048
+ result: list[DataDescriptor] = list()
1049
+ if connection := _get_project_connection(project_id):
1050
+ with connection.create_session() as session:
1051
+ pterms_found = _find_terms_in_project(expression, session, only_id, limit, offset)
1052
+ instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1053
+ return result
893
1054
 
894
1055
 
895
- if __name__ == "__main__":
896
- settings = SearchSettings()
897
- settings.selected_term_fields = ('id', 'drs_name')
898
- settings.case_sensitive = False
899
- matching_terms = find_terms_from_data_descriptor_in_all_projects('organisation', 'IpsL', settings)
900
- print(matching_terms)
1056
+ def find_terms_in_all_projects(expression: str,
1057
+ only_id: bool = False,
1058
+ limit: int | None = None,
1059
+ offset: int | None = None,
1060
+ selected_term_fields: Iterable[str] | None = None) \
1061
+ -> list[tuple[str, list[DataDescriptor]]]:
1062
+ """
1063
+ Find terms in the all projects on a full text search defined by the given
1064
+ `expression`. The `expression` comes from the powerful
1065
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1066
+ and corresponds to the expression of the `MATCH` operator.
1067
+ It can be composed of one or multiple keywords combined with boolean
1068
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1069
+ with the wildcard `*`.
1070
+ The function returns a list of project ids and term instances, sorted according to the
1071
+ bm25 ranking metric (list index `0` has the highest rank).
1072
+ If the provided `expression` does not hit any term, the function returns an empty list.
1073
+ The function searches for the `expression` in the term specifications.
1074
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1075
+ terms.
1076
+
1077
+ :param expression: The full text search expression.
1078
+ :type expression: str
1079
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1080
+ :type only_id: bool
1081
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1082
+ `limit` is either `None`, zero or negative.
1083
+ :type limit: int | None
1084
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1085
+ either `None`, zero or negative.
1086
+ :type offset: int | None
1087
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
1088
+ fields of the terms are returned. If empty, selects the id and type fields.
1089
+ :type selected_term_fields: Iterable[str] | None
1090
+ :returns: A list of project ids and term instances. Returns an empty list if no matches are found.
1091
+ :rtype: list[tuple[str, list[DataDescriptor]]]
1092
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1093
+ """
1094
+ result: list[tuple[str, list[DataDescriptor]]] = list()
1095
+ project_ids = get_all_projects()
1096
+ for project_id in project_ids:
1097
+ terms_found = find_terms_in_project(expression, project_id, only_id,
1098
+ limit, offset, selected_term_fields)
1099
+ if terms_found:
1100
+ result.append((project_id, terms_found))
1101
+ return result
1102
+
1103
+
1104
+ def find_items_in_project(expression: str,
1105
+ project_id: str,
1106
+ only_id: bool = False,
1107
+ limit: int | None = None,
1108
+ offset: int | None = None) -> list[Item]:
1109
+ """
1110
+ Find items, at the moment terms and collections, in the given project based on a full-text
1111
+ search defined by the given `expression`. The `expression` comes from the powerful
1112
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1113
+ and corresponds to the expression of the `MATCH` operator.
1114
+ It can be composed of one or multiple keywords combined with boolean
1115
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1116
+ with the wildcard `*`.
1117
+ The function returns a list of item instances sorted according to the
1118
+ bm25 ranking metric (list index `0` has the highest rank).
1119
+ This function performs an exact match on the `project_id`,
1120
+ and does not search for similar or related projects.
1121
+ If the provided `expression` does not hit any item, or the provided `project_id` is not found,
1122
+ the function returns an empty list.
1123
+ The function searches for the `expression` in the term and collection specifications.
1124
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
1125
+ terms and collections. **At the moment, `only_id` is set to `True` for the collections because
1126
+ they haven't got any description.**
1127
+
1128
+ :param expression: The full text search expression.
1129
+ :type expression: str
1130
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
1131
+ :type only_id: bool
1132
+ :param limit: Limit the number of returned items found. Returns all items found the if \
1133
+ `limit` is either `None`, zero or negative.
1134
+ :type limit: int | None
1135
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
1136
+ either `None`, zero or negative.
1137
+ :type offset: int | None
1138
+ :returns: A list of item instances. Returns an empty list if no matches are found.
1139
+ :rtype: list[Item]
1140
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
1141
+ """
1142
+ # TODO: execute union query when it will be possible to compute parent of terms and collections.
1143
+ result = list()
1144
+ if connection := _get_project_connection(project_id):
1145
+ with connection.create_session() as session:
1146
+ if only_id:
1147
+ collection_column = col(PCollectionFTS5.id)
1148
+ term_column = col(PTermFTS5.id)
1149
+ else:
1150
+ collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
1151
+ term_column = col(PTermFTS5.specs) # type: ignore
1152
+ collection_where_condition = collection_column.match(expression)
1153
+ collection_statement = select(PCollectionFTS5.id,
1154
+ text("'collection' AS TYPE"),
1155
+ text(f"'{project_id}' AS TYPE"),
1156
+ text('rank')).where(collection_where_condition)
1157
+ term_where_condition = term_column.match(expression)
1158
+ term_statement = select(PTermFTS5.id,
1159
+ text("'term' AS TYPE"),
1160
+ Collection.id,
1161
+ text('rank')).join(Collection) \
1162
+ .where(term_where_condition)
1163
+ result = execute_find_item_statements(session, expression, collection_statement,
1164
+ term_statement, limit, offset)
1165
+ return result