esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (73) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +50 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/mip_era.py +1 -1
  26. esgvoc/api/data_descriptors/model_component.py +1 -1
  27. esgvoc/api/data_descriptors/obs_type.py +5 -0
  28. esgvoc/api/data_descriptors/organisation.py +1 -1
  29. esgvoc/api/data_descriptors/physic_index.py +1 -1
  30. esgvoc/api/data_descriptors/product.py +2 -2
  31. esgvoc/api/data_descriptors/publication_status.py +5 -0
  32. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  33. esgvoc/api/data_descriptors/realm.py +1 -1
  34. esgvoc/api/data_descriptors/region.py +5 -0
  35. esgvoc/api/data_descriptors/resolution.py +3 -3
  36. esgvoc/api/data_descriptors/source.py +9 -5
  37. esgvoc/api/data_descriptors/source_type.py +1 -1
  38. esgvoc/api/data_descriptors/table.py +3 -2
  39. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  40. esgvoc/api/data_descriptors/time_range.py +4 -3
  41. esgvoc/api/data_descriptors/title.py +5 -0
  42. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  43. esgvoc/api/data_descriptors/variable.py +25 -12
  44. esgvoc/api/data_descriptors/variant_label.py +3 -3
  45. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  46. esgvoc/api/project_specs.py +117 -2
  47. esgvoc/api/projects.py +242 -279
  48. esgvoc/api/search.py +30 -3
  49. esgvoc/api/universe.py +42 -27
  50. esgvoc/apps/jsg/cmip6_template.json +74 -0
  51. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  52. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  53. esgvoc/cli/config.py +500 -0
  54. esgvoc/cli/find.py +138 -0
  55. esgvoc/cli/get.py +43 -38
  56. esgvoc/cli/main.py +10 -3
  57. esgvoc/cli/status.py +27 -18
  58. esgvoc/cli/valid.py +10 -15
  59. esgvoc/core/db/models/project.py +11 -11
  60. esgvoc/core/db/models/universe.py +3 -3
  61. esgvoc/core/db/project_ingestion.py +40 -40
  62. esgvoc/core/db/universe_ingestion.py +36 -33
  63. esgvoc/core/logging_handler.py +24 -2
  64. esgvoc/core/repo_fetcher.py +61 -59
  65. esgvoc/core/service/data_merger.py +47 -34
  66. esgvoc/core/service/state.py +107 -83
  67. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  68. esgvoc-1.0.0.dist-info/RECORD +95 -0
  69. esgvoc/core/logging.conf +0 -21
  70. esgvoc-0.4.0.dist-info/RECORD +0 -80
  71. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py CHANGED
@@ -20,16 +20,11 @@ from esgvoc.api.search import (
20
20
  handle_rank_limit_offset,
21
21
  instantiate_pydantic_term,
22
22
  instantiate_pydantic_terms,
23
+ process_expression,
23
24
  )
24
25
  from esgvoc.core.db.connection import DBConnection
25
26
  from esgvoc.core.db.models.mixins import TermKind
26
- from esgvoc.core.db.models.project import (
27
- Collection,
28
- PCollectionFTS5,
29
- Project,
30
- PTerm,
31
- PTermFTS5,
32
- )
27
+ from esgvoc.core.db.models.project import PCollection, PCollectionFTS5, Project, PTerm, PTermFTS5
33
28
  from esgvoc.core.db.models.universe import UTerm
34
29
  from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
35
30
 
@@ -53,21 +48,17 @@ def _get_project_session_with_exception(project_id: str) -> Session:
53
48
  raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
54
49
 
55
50
 
56
- def _resolve_term(composite_term_part: dict,
57
- universe_session: Session,
58
- project_session: Session) -> UTerm | PTerm:
51
+ def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
59
52
  # First find the term in the universe than in the current project
60
53
  term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
61
54
  term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
62
- uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
63
- term_id=term_id,
64
- session=universe_session)
55
+ uterm = universe._get_term_in_data_descriptor(
56
+ data_descriptor_id=term_type, term_id=term_id, session=universe_session
57
+ )
65
58
  if uterm:
66
59
  return uterm
67
60
  else:
68
- pterm = _get_term_in_collection(collection_id=term_type,
69
- term_id=term_id,
70
- session=project_session)
61
+ pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
71
62
  if pterm:
72
63
  return pterm
73
64
  else:
@@ -83,11 +74,9 @@ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]
83
74
 
84
75
  # TODO: support optionality of parts of composite.
85
76
  # It is backtrack possible for more than one missing parts.
86
- def _valid_value_composite_term_with_separator(value: str,
87
- term: UTerm | PTerm,
88
- universe_session: Session,
89
- project_session: Session)\
90
- -> list[UniverseTermError | ProjectTermError]:
77
+ def _valid_value_composite_term_with_separator(
78
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
79
+ ) -> list[UniverseTermError | ProjectTermError]:
91
80
  result = list()
92
81
  separator, parts = _get_composite_term_separator_parts(term)
93
82
  if separator in value:
@@ -95,14 +84,25 @@ def _valid_value_composite_term_with_separator(value: str,
95
84
  if len(splits) == len(parts):
96
85
  for index in range(0, len(splits)):
97
86
  given_value = splits[index]
98
- resolved_term = _resolve_term(parts[index],
99
- universe_session,
100
- project_session)
101
- errors = _valid_value(given_value,
102
- resolved_term,
103
- universe_session,
104
- project_session)
105
- result.extend(errors)
87
+ if "id" not in parts[index].keys():
88
+ terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
89
+ parts[index]["id"] = [term.id for term in terms]
90
+ if type(parts[index]["id"]) is str:
91
+ parts[index]["id"] = [parts[index]["id"]]
92
+
93
+ errors_list = list()
94
+ for id in parts[index]["id"]:
95
+ part_parts = dict(parts[index])
96
+ part_parts["id"] = id
97
+ resolved_term = _resolve_term(part_parts, universe_session, project_session)
98
+ errors = _valid_value(given_value, resolved_term, universe_session, project_session)
99
+ if len(errors) == 0:
100
+ errors_list = errors
101
+ break
102
+ else:
103
+ errors_list.extend(errors)
104
+ else:
105
+ result.append(_create_term_error(value, term))
106
106
  else:
107
107
  result.append(_create_term_error(value, term))
108
108
  else:
@@ -110,16 +110,13 @@ def _valid_value_composite_term_with_separator(value: str,
110
110
  return result
111
111
 
112
112
 
113
- def _transform_to_pattern(term: UTerm | PTerm,
114
- universe_session: Session,
115
- project_session: Session) -> str:
113
+ def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
116
114
  match term.kind:
117
115
  case TermKind.PLAIN:
118
116
  if constants.DRS_SPECS_JSON_KEY in term.specs:
119
117
  result = term.specs[constants.DRS_SPECS_JSON_KEY]
120
118
  else:
121
- raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
122
- "Can't validate it.")
119
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
123
120
  case TermKind.PATTERN:
124
121
  result = term.specs[constants.PATTERN_JSON_KEY]
125
122
  case TermKind.COMPOSITE:
@@ -128,7 +125,7 @@ def _transform_to_pattern(term: UTerm | PTerm,
128
125
  for part in parts:
129
126
  resolved_term = _resolve_term(part, universe_session, project_session)
130
127
  pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
131
- result = f'{result}{pattern}{separator}'
128
+ result = f"{result}{pattern}{separator}"
132
129
  result = result.rstrip(separator)
133
130
  case _:
134
131
  raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
@@ -137,11 +134,9 @@ def _transform_to_pattern(term: UTerm | PTerm,
137
134
 
138
135
  # TODO: support optionality of parts of composite.
139
136
  # It is backtrack possible for more than one missing parts.
140
- def _valid_value_composite_term_separator_less(value: str,
141
- term: UTerm | PTerm,
142
- universe_session: Session,
143
- project_session: Session)\
144
- -> list[UniverseTermError | ProjectTermError]:
137
+ def _valid_value_composite_term_separator_less(
138
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
139
+ ) -> list[UniverseTermError | ProjectTermError]:
145
140
  result = list()
146
141
  try:
147
142
  pattern = _transform_to_pattern(term, universe_session, project_session)
@@ -150,8 +145,8 @@ def _valid_value_composite_term_separator_less(value: str,
150
145
  # So their regex are defined as a whole (begins by a ^, ends by a $).
151
146
  # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
152
147
  # The later, must be removed.
153
- pattern = pattern.replace('^', '').replace('$', '')
154
- pattern = f'^{pattern}$'
148
+ pattern = pattern.replace("^", "").replace("$", "")
149
+ pattern = f"^{pattern}$"
155
150
  regex = re.compile(pattern)
156
151
  except Exception as e:
157
152
  msg = f"regex compilation error while processing term '{term.id}'':\n{e}"
@@ -165,35 +160,30 @@ def _valid_value_composite_term_separator_less(value: str,
165
160
  raise EsgvocNotImplementedError(msg) from e
166
161
 
167
162
 
168
- def _valid_value_for_composite_term(value: str,
169
- term: UTerm | PTerm,
170
- universe_session: Session,
171
- project_session: Session)\
172
- -> list[UniverseTermError | ProjectTermError]:
163
+ def _valid_value_for_composite_term(
164
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
165
+ ) -> list[UniverseTermError | ProjectTermError]:
173
166
  result = list()
174
167
  separator, _ = _get_composite_term_separator_parts(term)
175
168
  if separator:
176
- result = _valid_value_composite_term_with_separator(value, term, universe_session,
177
- project_session)
169
+ result = _valid_value_composite_term_with_separator(value, term, universe_session, project_session)
178
170
  else:
179
- result = _valid_value_composite_term_separator_less(value, term, universe_session,
180
- project_session)
171
+ result = _valid_value_composite_term_separator_less(value, term, universe_session, project_session)
181
172
  return result
182
173
 
183
174
 
184
175
  def _create_term_error(value: str, term: UTerm | PTerm) -> UniverseTermError | ProjectTermError:
185
176
  if isinstance(term, UTerm):
186
- return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
187
- data_descriptor_id=term.data_descriptor.id)
177
+ return UniverseTermError(
178
+ value=value, term=term.specs, term_kind=term.kind, data_descriptor_id=term.data_descriptor.id
179
+ )
188
180
  else:
189
- return ProjectTermError(value=value, term=term.specs, term_kind=term.kind,
190
- collection_id=term.collection.id)
181
+ return ProjectTermError(value=value, term=term.specs, term_kind=term.kind, collection_id=term.collection.id)
191
182
 
192
183
 
193
- def _valid_value(value: str,
194
- term: UTerm | PTerm,
195
- universe_session: Session,
196
- project_session: Session) -> list[UniverseTermError | ProjectTermError]:
184
+ def _valid_value(
185
+ value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
186
+ ) -> list[UniverseTermError | ProjectTermError]:
197
187
  result = list()
198
188
  match term.kind:
199
189
  case TermKind.PLAIN:
@@ -201,17 +191,14 @@ def _valid_value(value: str,
201
191
  if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
202
192
  result.append(_create_term_error(value, term))
203
193
  else:
204
- raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " +
205
- "Can't validate it.")
194
+ raise EsgvocValueError(f"the term '{term.id}' doesn't have drs name. " + "Can't validate it.")
206
195
  case TermKind.PATTERN:
207
196
  # TODO: Pattern can be compiled and stored for further matching.
208
197
  pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
209
198
  if pattern_match is None:
210
199
  result.append(_create_term_error(value, term))
211
200
  case TermKind.COMPOSITE:
212
- result.extend(_valid_value_for_composite_term(value, term,
213
- universe_session,
214
- project_session))
201
+ result.extend(_valid_value_for_composite_term(value, term, universe_session, project_session))
215
202
  case _:
216
203
  raise EsgvocDbError(f"unsupported term kind '{term.kind}'")
217
204
  return result
@@ -219,33 +206,25 @@ def _valid_value(value: str,
219
206
 
220
207
  def _check_value(value: str) -> str:
221
208
  if not value or value.isspace():
222
- raise EsgvocValueError('value should be set')
209
+ raise EsgvocValueError("value should be set")
223
210
  else:
224
211
  return value
225
212
 
226
213
 
227
- def _search_plain_term_and_valid_value(value: str,
228
- collection_id: str,
229
- project_session: Session) \
230
- -> str | None:
231
- where_expression = and_(Collection.id == collection_id,
232
- PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
233
- statement = select(PTerm).join(Collection).where(where_expression)
214
+ def _search_plain_term_and_valid_value(value: str, collection_id: str, project_session: Session) -> str | None:
215
+ where_expression = and_(PCollection.id == collection_id, PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
216
+ statement = select(PTerm).join(PCollection).where(where_expression)
234
217
  term = project_session.exec(statement).one_or_none()
235
218
  return term.id if term else None
236
219
 
237
220
 
238
- def _valid_value_against_all_terms_of_collection(value: str,
239
- collection: Collection,
240
- universe_session: Session,
241
- project_session: Session) \
242
- -> list[str]:
221
+ def _valid_value_against_all_terms_of_collection(
222
+ value: str, collection: PCollection, universe_session: Session, project_session: Session
223
+ ) -> list[str]:
243
224
  if collection.terms:
244
225
  result = list()
245
226
  for pterm in collection.terms:
246
- _errors = _valid_value(value, pterm,
247
- universe_session,
248
- project_session)
227
+ _errors = _valid_value(value, pterm, universe_session, project_session)
249
228
  if not _errors:
250
229
  result.append(pterm.id)
251
230
  return result
@@ -253,35 +232,24 @@ def _valid_value_against_all_terms_of_collection(value: str,
253
232
  raise EsgvocDbError(f"collection '{collection.id}' has no term")
254
233
 
255
234
 
256
- def _valid_value_against_given_term(value: str,
257
- project_id: str,
258
- collection_id: str,
259
- term_id: str,
260
- universe_session: Session,
261
- project_session: Session)\
262
- -> list[UniverseTermError | ProjectTermError]:
235
+ def _valid_value_against_given_term(
236
+ value: str, project_id: str, collection_id: str, term_id: str, universe_session: Session, project_session: Session
237
+ ) -> list[UniverseTermError | ProjectTermError]:
263
238
  # [OPTIMIZATION]
264
239
  key = value + project_id + collection_id + term_id
265
240
  if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
266
241
  result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
267
242
  else:
268
- term = _get_term_in_collection(collection_id,
269
- term_id,
270
- project_session)
243
+ term = _get_term_in_collection(collection_id, term_id, project_session)
271
244
  if term:
272
245
  result = _valid_value(value, term, universe_session, project_session)
273
246
  else:
274
- raise EsgvocNotFoundError(f"unable to find term '{term_id}' " +
275
- f"in collection '{collection_id}'")
247
+ raise EsgvocNotFoundError(f"unable to find term '{term_id}' " + f"in collection '{collection_id}'")
276
248
  _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
277
249
  return result
278
250
 
279
251
 
280
- def valid_term(value: str,
281
- project_id: str,
282
- collection_id: str,
283
- term_id: str) \
284
- -> ValidationReport:
252
+ def valid_term(value: str, project_id: str, collection_id: str, term_id: str) -> ValidationReport:
285
253
  """
286
254
  Check if the given value may or may not represent the given term. The functions returns
287
255
  a report that contains the possible errors.
@@ -312,19 +280,16 @@ def valid_term(value: str,
312
280
  :raises EsgvocNotFoundError: If any of the provided ids is not found
313
281
  """
314
282
  value = _check_value(value)
315
- with get_universe_session() as universe_session, \
316
- _get_project_session_with_exception(project_id) as project_session:
317
- errors = _valid_value_against_given_term(value, project_id, collection_id, term_id,
318
- universe_session, project_session)
283
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
284
+ errors = _valid_value_against_given_term(
285
+ value, project_id, collection_id, term_id, universe_session, project_session
286
+ )
319
287
  return ValidationReport(expression=value, errors=errors)
320
288
 
321
289
 
322
- def _valid_term_in_collection(value: str,
323
- project_id: str,
324
- collection_id: str,
325
- universe_session: Session,
326
- project_session: Session) \
327
- -> list[MatchingTerm]:
290
+ def _valid_term_in_collection(
291
+ value: str, project_id: str, collection_id: str, universe_session: Session, project_session: Session
292
+ ) -> list[MatchingTerm]:
328
293
  # [OPTIMIZATION]
329
294
  key = value + project_id + collection_id
330
295
  if key in _VALID_TERM_IN_COLLECTION_CACHE:
@@ -336,20 +301,19 @@ def _valid_term_in_collection(value: str,
336
301
  if collection:
337
302
  match collection.term_kind:
338
303
  case TermKind.PLAIN:
339
- term_id_found = _search_plain_term_and_valid_value(value, collection_id,
340
- project_session)
304
+ term_id_found = _search_plain_term_and_valid_value(value, collection_id, project_session)
341
305
  if term_id_found:
342
- result.append(MatchingTerm(project_id=project_id,
343
- collection_id=collection_id,
344
- term_id=term_id_found))
306
+ result.append(
307
+ MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
308
+ )
345
309
  case _:
346
- term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
347
- universe_session,
348
- project_session)
310
+ term_ids_found = _valid_value_against_all_terms_of_collection(
311
+ value, collection, universe_session, project_session
312
+ )
349
313
  for term_id_found in term_ids_found:
350
- result.append(MatchingTerm(project_id=project_id,
351
- collection_id=collection_id,
352
- term_id=term_id_found))
314
+ result.append(
315
+ MatchingTerm(project_id=project_id, collection_id=collection_id, term_id=term_id_found)
316
+ )
353
317
  else:
354
318
  msg = f"unable to find collection '{collection_id}'"
355
319
  raise EsgvocNotFoundError(msg)
@@ -357,10 +321,7 @@ def _valid_term_in_collection(value: str,
357
321
  return result
358
322
 
359
323
 
360
- def valid_term_in_collection(value: str,
361
- project_id: str,
362
- collection_id: str) \
363
- -> list[MatchingTerm]:
324
+ def valid_term_in_collection(value: str, project_id: str, collection_id: str) -> list[MatchingTerm]:
364
325
  """
365
326
  Check if the given value may or may not represent a term in the given collection. The function
366
327
  returns the terms that the value matches.
@@ -388,21 +349,17 @@ def valid_term_in_collection(value: str,
388
349
  :rtype: list[MatchingTerm]
389
350
  :raises EsgvocNotFoundError: If any of the provided ids is not found
390
351
  """
391
- with get_universe_session() as universe_session, \
392
- _get_project_session_with_exception(project_id) as project_session:
393
- return _valid_term_in_collection(value, project_id, collection_id,
394
- universe_session, project_session)
352
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
353
+ return _valid_term_in_collection(value, project_id, collection_id, universe_session, project_session)
395
354
 
396
355
 
397
- def _valid_term_in_project(value: str,
398
- project_id: str,
399
- universe_session: Session,
400
- project_session: Session) -> list[MatchingTerm]:
356
+ def _valid_term_in_project(
357
+ value: str, project_id: str, universe_session: Session, project_session: Session
358
+ ) -> list[MatchingTerm]:
401
359
  result = list()
402
360
  collections = _get_all_collections_in_project(project_session)
403
361
  for collection in collections:
404
- result.extend(_valid_term_in_collection(value, project_id, collection.id,
405
- universe_session, project_session))
362
+ result.extend(_valid_term_in_collection(value, project_id, collection.id, universe_session, project_session))
406
363
  return result
407
364
 
408
365
 
@@ -431,8 +388,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
431
388
  :rtype: list[MatchingTerm]
432
389
  :raises EsgvocNotFoundError: If the `project_id` is not found
433
390
  """
434
- with get_universe_session() as universe_session, \
435
- _get_project_session_with_exception(project_id) as project_session:
391
+ with get_universe_session() as universe_session, _get_project_session_with_exception(project_id) as project_session:
436
392
  return _valid_term_in_project(value, project_id, universe_session, project_session)
437
393
 
438
394
 
@@ -460,15 +416,13 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
460
416
  with get_universe_session() as universe_session:
461
417
  for project_id in get_all_projects():
462
418
  with _get_project_session_with_exception(project_id) as project_session:
463
- result.extend(_valid_term_in_project(value, project_id,
464
- universe_session, project_session))
419
+ result.extend(_valid_term_in_project(value, project_id, universe_session, project_session))
465
420
  return result
466
421
 
467
422
 
468
- def get_all_terms_in_collection(project_id: str,
469
- collection_id: str,
470
- selected_term_fields: Iterable[str] | None = None)\
471
- -> list[DataDescriptor]:
423
+ def get_all_terms_in_collection(
424
+ project_id: str, collection_id: str, selected_term_fields: Iterable[str] | None = None
425
+ ) -> list[DataDescriptor]:
472
426
  """
473
427
  Gets all terms of the given collection of a project.
474
428
  This function performs an exact match on the `project_id` and `collection_id`,
@@ -495,7 +449,7 @@ def get_all_terms_in_collection(project_id: str,
495
449
  return result
496
450
 
497
451
 
498
- def _get_all_collections_in_project(session: Session) -> list[Collection]:
452
+ def _get_all_collections_in_project(session: Session) -> list[PCollection]:
499
453
  project = session.get(Project, constants.SQLITE_FIRST_PK)
500
454
  # Project can't be missing if session exists.
501
455
  return project.collections # type: ignore
@@ -522,15 +476,17 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
522
476
  return result
523
477
 
524
478
 
525
- def _get_all_terms_in_collection(collection: Collection,
526
- selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
479
+ def _get_all_terms_in_collection(
480
+ collection: PCollection, selected_term_fields: Iterable[str] | None
481
+ ) -> list[DataDescriptor]:
527
482
  result: list[DataDescriptor] = list()
528
483
  instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
529
484
  return result
530
485
 
531
486
 
532
- def get_all_terms_in_project(project_id: str,
533
- selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
487
+ def get_all_terms_in_project(
488
+ project_id: str, selected_term_fields: Iterable[str] | None = None
489
+ ) -> list[DataDescriptor]:
534
490
  """
535
491
  Gets all terms of the given project.
536
492
  This function performs an exact match on the `project_id` and
@@ -556,8 +512,9 @@ def get_all_terms_in_project(project_id: str,
556
512
  return result
557
513
 
558
514
 
559
- def get_all_terms_in_all_projects(selected_term_fields: Iterable[str] | None = None) \
560
- -> list[tuple[str, list[DataDescriptor]]]:
515
+ def get_all_terms_in_all_projects(
516
+ selected_term_fields: Iterable[str] | None = None,
517
+ ) -> list[tuple[str, list[DataDescriptor]]]:
561
518
  """
562
519
  Gets all terms of all projects.
563
520
 
@@ -592,8 +549,9 @@ def _get_term_in_project(term_id: str, session: Session) -> PTerm | None:
592
549
  return result
593
550
 
594
551
 
595
- def get_term_in_project(project_id: str, term_id: str,
596
- selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
552
+ def get_term_in_project(
553
+ project_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
554
+ ) -> DataDescriptor | None:
597
555
  """
598
556
  Returns the first occurrence of the terms, in the given project, whose id corresponds exactly to
599
557
  the given term id.
@@ -623,15 +581,15 @@ def get_term_in_project(project_id: str, term_id: str,
623
581
 
624
582
 
625
583
  def _get_term_in_collection(collection_id: str, term_id: str, session: Session) -> PTerm | None:
626
- statement = select(PTerm).join(Collection).where(Collection.id == collection_id,
627
- PTerm.id == term_id)
584
+ statement = select(PTerm).join(PCollection).where(PCollection.id == collection_id, PTerm.id == term_id)
628
585
  results = session.exec(statement)
629
586
  result = results.one_or_none()
630
587
  return result
631
588
 
632
589
 
633
- def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
634
- selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
590
+ def get_term_in_collection(
591
+ project_id: str, collection_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
592
+ ) -> DataDescriptor | None:
635
593
  """
636
594
  Returns the term, in the given project and collection,
637
595
  whose id corresponds exactly to the given term id.
@@ -661,8 +619,8 @@ def get_term_in_collection(project_id: str, collection_id: str, term_id: str,
661
619
  return result
662
620
 
663
621
 
664
- def _get_collection_in_project(collection_id: str, session: Session) -> Collection | None:
665
- statement = select(Collection).where(Collection.id == collection_id)
622
+ def _get_collection_in_project(collection_id: str, session: Session) -> PCollection | None:
623
+ statement = select(PCollection).where(PCollection.id == collection_id)
666
624
  results = session.exec(statement)
667
625
  result = results.one_or_none()
668
626
  return result
@@ -718,16 +676,13 @@ def get_project(project_id: str) -> ProjectSpecs | None:
718
676
  return result
719
677
 
720
678
 
721
- def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str,
722
- session: Session) -> Collection | None:
723
- statement = select(Collection).where(Collection.data_descriptor_id == data_descriptor_id)
679
+ def _get_collection_from_data_descriptor_in_project(data_descriptor_id: str, session: Session) -> PCollection | None:
680
+ statement = select(PCollection).where(PCollection.data_descriptor_id == data_descriptor_id)
724
681
  result = session.exec(statement).one_or_none()
725
682
  return result
726
683
 
727
684
 
728
- def get_collection_from_data_descriptor_in_project(project_id: str,
729
- data_descriptor_id: str) \
730
- -> tuple[str, dict] | None:
685
+ def get_collection_from_data_descriptor_in_project(project_id: str, data_descriptor_id: str) -> tuple[str, dict] | None:
731
686
  """
732
687
  Returns the collection, in the given project, that corresponds to the given data descriptor
733
688
  in the universe.
@@ -746,15 +701,13 @@ def get_collection_from_data_descriptor_in_project(project_id: str,
746
701
  result: tuple[str, dict] | None = None
747
702
  if connection := _get_project_connection(project_id):
748
703
  with connection.create_session() as session:
749
- collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id,
750
- session)
704
+ collection_found = _get_collection_from_data_descriptor_in_project(data_descriptor_id, session)
751
705
  if collection_found:
752
706
  result = collection_found.id, collection_found.context
753
707
  return result
754
708
 
755
709
 
756
- def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) \
757
- -> list[tuple[str, str, dict]]:
710
+ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str) -> list[tuple[str, str, dict]]:
758
711
  """
759
712
  Returns the collections, in all projects, that correspond to the given data descriptor
760
713
  in the universe.
@@ -773,28 +726,28 @@ def get_collection_from_data_descriptor_in_all_projects(data_descriptor_id: str)
773
726
  result = list()
774
727
  project_ids = get_all_projects()
775
728
  for project_id in project_ids:
776
- collection_found = get_collection_from_data_descriptor_in_project(project_id,
777
- data_descriptor_id)
729
+ collection_found = get_collection_from_data_descriptor_in_project(project_id, data_descriptor_id)
778
730
  if collection_found:
779
731
  result.append((project_id, collection_found[0], collection_found[1]))
780
732
  return result
781
733
 
782
734
 
783
- def _get_term_from_universe_term_id_in_project(data_descriptor_id: str,
784
- universe_term_id: str,
785
- project_session: Session) -> PTerm | None:
786
- statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id == data_descriptor_id,
787
- PTerm.id == universe_term_id)
735
+ def _get_term_from_universe_term_id_in_project(
736
+ data_descriptor_id: str, universe_term_id: str, project_session: Session
737
+ ) -> PTerm | None:
738
+ statement = (
739
+ select(PTerm)
740
+ .join(PCollection)
741
+ .where(PCollection.data_descriptor_id == data_descriptor_id, PTerm.id == universe_term_id)
742
+ )
788
743
  results = project_session.exec(statement)
789
744
  result = results.one_or_none()
790
745
  return result
791
746
 
792
747
 
793
- def get_term_from_universe_term_id_in_project(project_id: str,
794
- data_descriptor_id: str,
795
- universe_term_id: str,
796
- selected_term_fields: Iterable[str] | None = None) \
797
- -> tuple[str, DataDescriptor] | None:
748
+ def get_term_from_universe_term_id_in_project(
749
+ project_id: str, data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
750
+ ) -> tuple[str, DataDescriptor] | None:
798
751
  """
799
752
  Returns the term, in the given project, that corresponds to the given term in the universe.
800
753
  This function performs an exact match on the `project_id`, `data_descriptor_id`
@@ -818,19 +771,16 @@ def get_term_from_universe_term_id_in_project(project_id: str,
818
771
  result: tuple[str, DataDescriptor] | None = None
819
772
  if connection := _get_project_connection(project_id):
820
773
  with connection.create_session() as session:
821
- term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id,
822
- universe_term_id,
823
- session)
774
+ term_found = _get_term_from_universe_term_id_in_project(data_descriptor_id, universe_term_id, session)
824
775
  if term_found:
825
776
  pydantic_term = instantiate_pydantic_term(term_found, selected_term_fields)
826
777
  result = (term_found.collection.id, pydantic_term)
827
778
  return result
828
779
 
829
780
 
830
- def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
831
- universe_term_id: str,
832
- selected_term_fields: Iterable[str] | None = None) \
833
- -> list[tuple[str, str, DataDescriptor]]:
781
+ def get_term_from_universe_term_id_in_all_projects(
782
+ data_descriptor_id: str, universe_term_id: str, selected_term_fields: Iterable[str] | None = None
783
+ ) -> list[tuple[str, str, DataDescriptor]]:
834
784
  """
835
785
  Returns the terms, in all projects, that correspond to the given term in the universe.
836
786
  This function performs an exact match on the `data_descriptor_id`
@@ -853,38 +803,37 @@ def get_term_from_universe_term_id_in_all_projects(data_descriptor_id: str,
853
803
  result: list[tuple[str, str, DataDescriptor]] = list()
854
804
  project_ids = get_all_projects()
855
805
  for project_id in project_ids:
856
- term_found = get_term_from_universe_term_id_in_project(project_id,
857
- data_descriptor_id,
858
- universe_term_id,
859
- selected_term_fields)
806
+ term_found = get_term_from_universe_term_id_in_project(
807
+ project_id, data_descriptor_id, universe_term_id, selected_term_fields
808
+ )
860
809
  if term_found:
861
810
  result.append((project_id, term_found[0], term_found[1]))
862
811
  return result
863
812
 
864
813
 
865
- def _find_collections_in_project(expression: str,
866
- session: Session,
867
- only_id: bool = False,
868
- limit: int | None = None,
869
- offset: int | None = None) -> Sequence[Collection]:
814
+ def _find_collections_in_project(
815
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
816
+ ) -> Sequence[PCollection]:
870
817
  matching_condition = generate_matching_condition(PCollectionFTS5, expression, only_id)
871
818
  tmp_statement = select(PCollectionFTS5).where(matching_condition)
872
- statement = select(Collection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
819
+ statement = select(PCollection).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
873
820
  return execute_match_statement(expression, statement, session)
874
821
 
875
822
 
876
- def find_collections_in_project(expression: str, project_id: str,
877
- only_id: bool = False,
878
- limit: int | None = None,
879
- offset: int | None = None) -> list[tuple[str, dict]]:
823
+ def find_collections_in_project(
824
+ expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
825
+ ) -> list[tuple[str, dict]]:
880
826
  """
881
827
  Find collections in the given project based on a full text search defined by the given `expression`.
882
- The `expression` comes from the powerful
883
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
884
- and corresponds to the expression of the `MATCH` operator.
885
- It can be composed of one or multiple keywords combined with boolean
886
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
887
- with the wildcard `*`.
828
+ The `expression` can be composed of one or multiple keywords.
829
+ The keywords can combined with boolean operators: `AND`,
830
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
831
+ if no boolean operators is provided, whitespaces are handled as if there were
832
+ an implicit AND operator between each pair of keywords. Note that this
833
+ function does not provide any priority operator (parenthesis).
834
+ Keywords can define prefixes when adding a `*` at the end of them.
835
+ If the expression is composed of only one keyword, the function
836
+ automatically defines it as a prefix.
888
837
  The function returns a list of collection ids and contexts, sorted according to the
889
838
  bm25 ranking metric (list index `0` has the highest rank).
890
839
  This function performs an exact match on the `project_id`,
@@ -915,52 +864,57 @@ def find_collections_in_project(expression: str, project_id: str,
915
864
  result: list[tuple[str, dict]] = list()
916
865
  if connection := _get_project_connection(project_id):
917
866
  with connection.create_session() as session:
918
- collections_found = _find_collections_in_project(expression, session, only_id,
919
- limit, offset)
867
+ collections_found = _find_collections_in_project(expression, session, only_id, limit, offset)
920
868
  for collection in collections_found:
921
869
  result.append((collection.id, collection.context))
922
870
  return result
923
871
 
924
872
 
925
- def _find_terms_in_collection(expression: str,
926
- collection_id: str,
927
- session: Session,
928
- only_id: bool = False,
929
- limit: int | None = None,
930
- offset: int | None = None) -> Sequence[PTerm]:
873
+ def _find_terms_in_collection(
874
+ expression: str,
875
+ collection_id: str,
876
+ session: Session,
877
+ only_id: bool = False,
878
+ limit: int | None = None,
879
+ offset: int | None = None,
880
+ ) -> Sequence[PTerm]:
931
881
  matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
932
- where_condition = Collection.id == collection_id, matching_condition
933
- tmp_statement = select(PTermFTS5).join(Collection).where(*where_condition)
882
+ where_condition = PCollection.id == collection_id, matching_condition
883
+ tmp_statement = select(PTermFTS5).join(PCollection).where(*where_condition)
934
884
  statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
935
885
  return execute_match_statement(expression, statement, session)
936
886
 
937
887
 
938
- def _find_terms_in_project(expression: str,
939
- session: Session,
940
- only_id: bool = False,
941
- limit: int | None = None,
942
- offset: int | None = None) -> Sequence[PTerm]:
888
+ def _find_terms_in_project(
889
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
890
+ ) -> Sequence[PTerm]:
943
891
  matching_condition = generate_matching_condition(PTermFTS5, expression, only_id)
944
892
  tmp_statement = select(PTermFTS5).where(matching_condition)
945
893
  statement = select(PTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
946
894
  return execute_match_statement(expression, statement, session)
947
895
 
948
896
 
949
- def find_terms_in_collection(expression: str, project_id: str,
950
- collection_id: str,
951
- only_id: bool = False,
952
- limit: int | None = None,
953
- offset: int | None = None,
954
- selected_term_fields: Iterable[str] | None = None) \
955
- -> list[DataDescriptor]:
897
+ def find_terms_in_collection(
898
+ expression: str,
899
+ project_id: str,
900
+ collection_id: str,
901
+ only_id: bool = False,
902
+ limit: int | None = None,
903
+ offset: int | None = None,
904
+ selected_term_fields: Iterable[str] | None = None,
905
+ ) -> list[DataDescriptor]:
956
906
  """
957
907
  Find terms in the given project and collection based on a full text search defined by the given
958
- `expression`. The `expression` comes from the powerful
959
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
960
- and corresponds to the expression of the `MATCH` operator.
961
- It can be composed of one or multiple keywords combined with boolean
962
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
963
- with the wildcard `*`.
908
+ `expression`.
909
+ The `expression` can be composed of one or multiple keywords.
910
+ The keywords can combined with boolean operators: `AND`,
911
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
912
+ if no boolean operators is provided, whitespaces are handled as if there were
913
+ an implicit AND operator between each pair of keywords. Note that this
914
+ function does not provide any priority operator (parenthesis).
915
+ Keywords can define prefixes when adding a `*` at the end of them.
916
+ If the expression is composed of only one keyword, the function
917
+ automatically defines it as a prefix.
964
918
  The function returns a list of term instances, sorted according to the
965
919
  bm25 ranking metric (list index `0` has the highest rank).
966
920
  This function performs an exact match on the `project_id` and `collection_id`,
@@ -995,27 +949,30 @@ def find_terms_in_collection(expression: str, project_id: str,
995
949
  result: list[DataDescriptor] = list()
996
950
  if connection := _get_project_connection(project_id):
997
951
  with connection.create_session() as session:
998
- pterms_found = _find_terms_in_collection(expression, collection_id, session,
999
- only_id, limit, offset)
952
+ pterms_found = _find_terms_in_collection(expression, collection_id, session, only_id, limit, offset)
1000
953
  instantiate_pydantic_terms(pterms_found, result, selected_term_fields)
1001
954
  return result
1002
955
 
1003
956
 
1004
- def find_terms_in_project(expression: str,
1005
- project_id: str,
1006
- only_id: bool = False,
1007
- limit: int | None = None,
1008
- offset: int | None = None,
1009
- selected_term_fields: Iterable[str] | None = None) \
1010
- -> list[DataDescriptor]:
957
+ def find_terms_in_project(
958
+ expression: str,
959
+ project_id: str,
960
+ only_id: bool = False,
961
+ limit: int | None = None,
962
+ offset: int | None = None,
963
+ selected_term_fields: Iterable[str] | None = None,
964
+ ) -> list[DataDescriptor]:
1011
965
  """
1012
- Find terms in the given project on a full text search defined by the given
1013
- `expression`. The `expression` comes from the powerful
1014
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1015
- and corresponds to the expression of the `MATCH` operator.
1016
- It can be composed of one or multiple keywords combined with boolean
1017
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1018
- with the wildcard `*`.
966
+ Find terms in the given project based on a full text search defined by the given `expression`.
967
+ The `expression` can be composed of one or multiple keywords.
968
+ The keywords can combined with boolean operators: `AND`,
969
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
970
+ if no boolean operators is provided, whitespaces are handled as if there were
971
+ an implicit AND operator between each pair of keywords. Note that this
972
+ function does not provide any priority operator (parenthesis).
973
+ Keywords can define prefixes when adding a `*` at the end of them.
974
+ If the expression is composed of only one keyword, the function
975
+ automatically defines it as a prefix.
1019
976
  The function returns a list of term instances, sorted according to the
1020
977
  bm25 ranking metric (list index `0` has the highest rank).
1021
978
  This function performs an exact match on the `project_id`,
@@ -1053,20 +1010,24 @@ def find_terms_in_project(expression: str,
1053
1010
  return result
1054
1011
 
1055
1012
 
1056
- def find_terms_in_all_projects(expression: str,
1057
- only_id: bool = False,
1058
- limit: int | None = None,
1059
- offset: int | None = None,
1060
- selected_term_fields: Iterable[str] | None = None) \
1061
- -> list[tuple[str, list[DataDescriptor]]]:
1013
+ def find_terms_in_all_projects(
1014
+ expression: str,
1015
+ only_id: bool = False,
1016
+ limit: int | None = None,
1017
+ offset: int | None = None,
1018
+ selected_term_fields: Iterable[str] | None = None,
1019
+ ) -> list[tuple[str, list[DataDescriptor]]]:
1062
1020
  """
1063
- Find terms in the all projects on a full text search defined by the given
1064
- `expression`. The `expression` comes from the powerful
1065
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1066
- and corresponds to the expression of the `MATCH` operator.
1067
- It can be composed of one or multiple keywords combined with boolean
1068
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1069
- with the wildcard `*`.
1021
+ Find terms in all projects based on a full text search defined by the given `expression`.
1022
+ The `expression` can be composed of one or multiple keywords.
1023
+ The keywords can combined with boolean operators: `AND`,
1024
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
1025
+ if no boolean operators is provided, whitespaces are handled as if there were
1026
+ an implicit AND operator between each pair of keywords. Note that this
1027
+ function does not provide any priority operator (parenthesis).
1028
+ Keywords can define prefixes when adding a `*` at the end of them.
1029
+ If the expression is composed of only one keyword, the function
1030
+ automatically defines it as a prefix.
1070
1031
  The function returns a list of project ids and term instances, sorted according to the
1071
1032
  bm25 ranking metric (list index `0` has the highest rank).
1072
1033
  If the provided `expression` does not hit any term, the function returns an empty list.
@@ -1094,26 +1055,27 @@ def find_terms_in_all_projects(expression: str,
1094
1055
  result: list[tuple[str, list[DataDescriptor]]] = list()
1095
1056
  project_ids = get_all_projects()
1096
1057
  for project_id in project_ids:
1097
- terms_found = find_terms_in_project(expression, project_id, only_id,
1098
- limit, offset, selected_term_fields)
1058
+ terms_found = find_terms_in_project(expression, project_id, only_id, limit, offset, selected_term_fields)
1099
1059
  if terms_found:
1100
1060
  result.append((project_id, terms_found))
1101
1061
  return result
1102
1062
 
1103
1063
 
1104
- def find_items_in_project(expression: str,
1105
- project_id: str,
1106
- only_id: bool = False,
1107
- limit: int | None = None,
1108
- offset: int | None = None) -> list[Item]:
1064
+ def find_items_in_project(
1065
+ expression: str, project_id: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
1066
+ ) -> list[Item]:
1109
1067
  """
1110
1068
  Find items, at the moment terms and collections, in the given project based on a full-text
1111
- search defined by the given `expression`. The `expression` comes from the powerful
1112
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
1113
- and corresponds to the expression of the `MATCH` operator.
1114
- It can be composed of one or multiple keywords combined with boolean
1115
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
1116
- with the wildcard `*`.
1069
+ search defined by the given `expression`.
1070
+ The `expression` can be composed of one or multiple keywords.
1071
+ The keywords can combined with boolean operators: `AND`,
1072
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
1073
+ if no boolean operators is provided, whitespaces are handled as if there were
1074
+ an implicit AND operator between each pair of keywords. Note that this
1075
+ function does not provide any priority operator (parenthesis).
1076
+ Keywords can define prefixes when adding a `*` at the end of them.
1077
+ If the expression is composed of only one keyword, the function
1078
+ automatically defines it as a prefix.
1117
1079
  The function returns a list of item instances sorted according to the
1118
1080
  bm25 ranking metric (list index `0` has the highest rank).
1119
1081
  This function performs an exact match on the `project_id`,
@@ -1143,23 +1105,24 @@ def find_items_in_project(expression: str,
1143
1105
  result = list()
1144
1106
  if connection := _get_project_connection(project_id):
1145
1107
  with connection.create_session() as session:
1108
+ processed_expression = process_expression(expression)
1146
1109
  if only_id:
1147
1110
  collection_column = col(PCollectionFTS5.id)
1148
1111
  term_column = col(PTermFTS5.id)
1149
1112
  else:
1150
1113
  collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
1151
1114
  term_column = col(PTermFTS5.specs) # type: ignore
1152
- collection_where_condition = collection_column.match(expression)
1115
+ collection_where_condition = collection_column.match(processed_expression)
1153
1116
  collection_statement = select(PCollectionFTS5.id,
1154
1117
  text("'collection' AS TYPE"),
1155
1118
  text(f"'{project_id}' AS TYPE"),
1156
1119
  text('rank')).where(collection_where_condition)
1157
- term_where_condition = term_column.match(expression)
1120
+ term_where_condition = term_column.match(processed_expression)
1158
1121
  term_statement = select(PTermFTS5.id,
1159
1122
  text("'term' AS TYPE"),
1160
- Collection.id,
1161
- text('rank')).join(Collection) \
1123
+ PCollection.id,
1124
+ text('rank')).join(PCollection) \
1162
1125
  .where(term_where_condition)
1163
- result = execute_find_item_statements(session, expression, collection_statement,
1126
+ result = execute_find_item_statements(session, processed_expression, collection_statement,
1164
1127
  term_statement, limit, offset)
1165
1128
  return result