esgvoc 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (70) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +23 -34
  3. esgvoc/api/_utils.py +28 -14
  4. esgvoc/api/data_descriptors/__init__.py +18 -12
  5. esgvoc/api/data_descriptors/activity.py +8 -45
  6. esgvoc/api/data_descriptors/area_label.py +6 -0
  7. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  8. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  9. esgvoc/api/data_descriptors/consortium.py +16 -56
  10. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  11. esgvoc/api/data_descriptors/date.py +3 -46
  12. esgvoc/api/data_descriptors/directory_date.py +3 -46
  13. esgvoc/api/data_descriptors/experiment.py +19 -54
  14. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  15. esgvoc/api/data_descriptors/frequency.py +6 -43
  16. esgvoc/api/data_descriptors/grid_label.py +6 -44
  17. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  18. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  19. esgvoc/api/data_descriptors/institution.py +11 -54
  20. esgvoc/api/data_descriptors/license.py +4 -44
  21. esgvoc/api/data_descriptors/mip_era.py +6 -44
  22. esgvoc/api/data_descriptors/model_component.py +7 -45
  23. esgvoc/api/data_descriptors/organisation.py +3 -40
  24. esgvoc/api/data_descriptors/physic_index.py +3 -45
  25. esgvoc/api/data_descriptors/product.py +4 -43
  26. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  27. esgvoc/api/data_descriptors/realm.py +4 -42
  28. esgvoc/api/data_descriptors/resolution.py +6 -44
  29. esgvoc/api/data_descriptors/source.py +18 -53
  30. esgvoc/api/data_descriptors/source_type.py +3 -41
  31. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  32. esgvoc/api/data_descriptors/table.py +6 -48
  33. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  34. esgvoc/api/data_descriptors/time_range.py +3 -27
  35. esgvoc/api/data_descriptors/variable.py +13 -71
  36. esgvoc/api/data_descriptors/variant_label.py +3 -47
  37. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  38. esgvoc/api/projects.py +187 -171
  39. esgvoc/api/report.py +21 -12
  40. esgvoc/api/search.py +3 -1
  41. esgvoc/api/universe.py +44 -34
  42. esgvoc/apps/__init__.py +3 -4
  43. esgvoc/apps/drs/generator.py +166 -161
  44. esgvoc/apps/drs/report.py +222 -131
  45. esgvoc/apps/drs/validator.py +103 -105
  46. esgvoc/cli/drs.py +29 -19
  47. esgvoc/cli/get.py +26 -25
  48. esgvoc/cli/install.py +11 -8
  49. esgvoc/cli/main.py +0 -2
  50. esgvoc/cli/status.py +5 -5
  51. esgvoc/cli/valid.py +40 -40
  52. esgvoc/core/db/models/universe.py +3 -3
  53. esgvoc/core/db/project_ingestion.py +1 -1
  54. esgvoc/core/db/universe_ingestion.py +6 -5
  55. esgvoc/core/logging_handler.py +1 -1
  56. esgvoc/core/repo_fetcher.py +4 -3
  57. esgvoc/core/service/__init__.py +37 -5
  58. esgvoc/core/service/configuration/config_manager.py +188 -0
  59. esgvoc/core/service/configuration/setting.py +88 -0
  60. esgvoc/core/service/state.py +49 -32
  61. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/METADATA +34 -3
  62. esgvoc-0.3.0.dist-info/RECORD +78 -0
  63. esgvoc/cli/config.py +0 -82
  64. esgvoc/core/service/settings.py +0 -73
  65. esgvoc/core/service/settings.toml +0 -17
  66. esgvoc/core/service/settings_default.toml +0 -17
  67. esgvoc-0.2.1.dist-info/RECORD +0 -73
  68. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
  70. {esgvoc-0.2.1.dist-info → esgvoc-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py CHANGED
@@ -1,58 +1,51 @@
1
1
  import re
2
- from typing import Sequence
2
+ from collections.abc import Iterable, Sequence
3
+
4
+ from sqlmodel import Session, and_, select
3
5
 
4
6
  import esgvoc.api.universe as universe
5
- import esgvoc.core.constants
7
+ import esgvoc.core.constants as constants
6
8
  import esgvoc.core.service as service
7
- from esgvoc.api._utils import (get_universe_session, instantiate_pydantic_term,
9
+ from esgvoc.api._utils import (APIException, get_universe_session,
10
+ instantiate_pydantic_term,
8
11
  instantiate_pydantic_terms)
9
- from esgvoc.api.report import (ProjectTermError, UniverseTermError,
10
- ValidationError, ValidationReport)
11
- from esgvoc.api.search import MatchingTerm, SearchSettings, _create_str_comparison_expression
12
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
12
13
  from esgvoc.api.project_specs import ProjectSpecs
14
+ from esgvoc.api.report import (ProjectTermError, UniverseTermError,
15
+ ValidationReport)
16
+ from esgvoc.api.search import (MatchingTerm, SearchSettings,
17
+ _create_str_comparison_expression)
13
18
  from esgvoc.core.db.connection import DBConnection
14
19
  from esgvoc.core.db.models.mixins import TermKind
15
20
  from esgvoc.core.db.models.project import Collection, Project, PTerm
16
21
  from esgvoc.core.db.models.universe import UTerm
17
- from pydantic import BaseModel
18
- from sqlmodel import Session, and_, select
19
-
20
22
 
21
23
  # [OPTIMIZATION]
22
24
  _VALID_TERM_IN_COLLECTION_CACHE: dict[str, list[MatchingTerm]] = dict()
23
- _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[ValidationError]] = dict()
24
-
25
-
26
- def get_project_specs(project_id: str) -> ProjectSpecs:
27
- project_specs = find_project(project_id)
28
- if not project_specs:
29
- msg = f'Unable to find project {project_id}'
30
- raise ValueError(msg)
31
- try:
32
- result = ProjectSpecs(**project_specs)
33
- except Exception as e:
34
- msg = f'Unable to read specs in project {project_id}'
35
- raise RuntimeError(msg) from e
36
- return result
25
+ _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE: dict[str, list[UniverseTermError|ProjectTermError]] = dict()
37
26
 
38
27
 
39
28
  def _get_project_connection(project_id: str) -> DBConnection|None:
40
- return service.state_service.projects[project_id].db_connection
29
+ if project_id in service.current_state.projects:
30
+ return service.current_state.projects[project_id].db_connection
31
+ else:
32
+ return None
33
+
41
34
 
42
35
  def _get_project_session_with_exception(project_id: str) -> Session:
43
36
  if connection:=_get_project_connection(project_id):
44
37
  project_session = connection.create_session()
45
38
  return project_session
46
39
  else:
47
- raise ValueError(f'unable to find project {project_id}')
48
-
40
+ raise APIException(f'unable to find project {project_id}')
41
+
49
42
 
50
- def _resolve_term(term_composite_part: dict,
43
+ def _resolve_term(composite_term_part: dict,
51
44
  universe_session: Session,
52
45
  project_session: Session) -> UTerm|PTerm:
53
46
  # First find the term in the universe than in the current project
54
- term_id = term_composite_part[esgvoc.core.constants.TERM_ID_JSON_KEY]
55
- term_type = term_composite_part[esgvoc.core.constants.TERM_TYPE_JSON_KEY]
47
+ term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
48
+ term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
56
49
  uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
57
50
  term_id=term_id,
58
51
  session=universe_session,
@@ -68,24 +61,24 @@ def _resolve_term(term_composite_part: dict,
68
61
  return pterms[0]
69
62
  else:
70
63
  msg = f'unable to find the term {term_id} in {term_type}'
71
- raise RuntimeError(msg)
64
+ raise RuntimeError(msg)
72
65
 
73
66
 
74
- def _get_term_composite_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
75
- separator = term.specs[esgvoc.core.constants.COMPOSITE_SEPARATOR_JSON_KEY]
76
- parts = term.specs[esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY]
67
+ def _get_composite_term_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
68
+ separator = term.specs[constants.COMPOSITE_SEPARATOR_JSON_KEY]
69
+ parts = term.specs[constants.COMPOSITE_PARTS_JSON_KEY]
77
70
  return separator, parts
78
71
 
79
72
 
80
73
  # TODO: support optionality of parts of composite.
81
74
  # It is backtrack possible for more than one missing parts.
82
- def _valid_value_term_composite_with_separator(value: str,
75
+ def _valid_value_composite_term_with_separator(value: str,
83
76
  term: UTerm|PTerm,
84
77
  universe_session: Session,
85
78
  project_session: Session)\
86
- -> list[ValidationError]:
79
+ -> list[UniverseTermError|ProjectTermError]:
87
80
  result = list()
88
- separator, parts = _get_term_composite_separator_parts(term)
81
+ separator, parts = _get_composite_term_separator_parts(term)
89
82
  if separator in value:
90
83
  splits = value.split(separator)
91
84
  if len(splits) == len(parts):
@@ -111,11 +104,15 @@ def _transform_to_pattern(term: UTerm|PTerm,
111
104
  project_session: Session) -> str:
112
105
  match term.kind:
113
106
  case TermKind.PLAIN:
114
- result = term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY]
107
+ if constants.DRS_SPECS_JSON_KEY in term.specs:
108
+ result = term.specs[constants.DRS_SPECS_JSON_KEY]
109
+ else:
110
+ raise APIException(f"the term {term.id} doesn't have drs name. " +
111
+ "Can't validate it.")
115
112
  case TermKind.PATTERN:
116
- result = term.specs[esgvoc.core.constants.PATTERN_JSON_KEY]
113
+ result = term.specs[constants.PATTERN_JSON_KEY]
117
114
  case TermKind.COMPOSITE:
118
- separator, parts = _get_term_composite_separator_parts(term)
115
+ separator, parts = _get_composite_term_separator_parts(term)
119
116
  result = ""
120
117
  for part in parts:
121
118
  resolved_term = _resolve_term(part, universe_session, project_session)
@@ -123,22 +120,22 @@ def _transform_to_pattern(term: UTerm|PTerm,
123
120
  result = f'{result}{pattern}{separator}'
124
121
  result = result.rstrip(separator)
125
122
  case _:
126
- raise NotImplementedError(f'unsupported term kind {term.kind}')
123
+ raise RuntimeError(f'unsupported term kind {term.kind}')
127
124
  return result
128
125
 
129
126
 
130
127
  # TODO: support optionality of parts of composite.
131
128
  # It is backtrack possible for more than one missing parts.
132
- def _valid_value_term_composite_separator_less(value: str,
129
+ def _valid_value_composite_term_separator_less(value: str,
133
130
  term: UTerm|PTerm,
134
131
  universe_session: Session,
135
132
  project_session: Session)\
136
- -> list[ValidationError]:
133
+ -> list[UniverseTermError|ProjectTermError]:
137
134
  result = list()
138
135
  try:
139
136
  pattern = _transform_to_pattern(term, universe_session, project_session)
140
137
  try:
141
- # Term patterns are meant to be validated individually.
138
+ # Patterns terms are meant to be validated individually.
142
139
  # So their regex are defined as a whole (begins by a ^, ends by a $).
143
140
  # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
144
141
  # The later, must be removed.
@@ -146,34 +143,34 @@ def _valid_value_term_composite_separator_less(value: str,
146
143
  pattern = f'^{pattern}$'
147
144
  regex = re.compile(pattern)
148
145
  except Exception as e:
149
- msg = f'regex compilation error:\n{e}'
150
- raise ValueError(msg) from e
146
+ msg = f'regex compilation error while processing term {term.id}:\n{e}'
147
+ raise RuntimeError(msg) from e
151
148
  match = regex.match(value)
152
149
  if match is None:
153
150
  result.append(_create_term_error(value, term))
154
151
  return result
155
152
  except Exception as e:
156
- msg = f'cannot validate separator less composite term {term.id}:\n{e}'
153
+ msg = f'cannot validate separator less composite term {term.id}:\n{e}'
157
154
  raise RuntimeError(msg) from e
158
155
 
159
156
 
160
- def _valid_value_for_term_composite(value: str,
157
+ def _valid_value_for_composite_term(value: str,
161
158
  term: UTerm|PTerm,
162
159
  universe_session: Session,
163
160
  project_session: Session)\
164
- -> list[ValidationError]:
161
+ -> list[UniverseTermError|ProjectTermError]:
165
162
  result = list()
166
- separator, _ = _get_term_composite_separator_parts(term)
163
+ separator, _ = _get_composite_term_separator_parts(term)
167
164
  if separator:
168
- result = _valid_value_term_composite_with_separator(value, term, universe_session,
165
+ result = _valid_value_composite_term_with_separator(value, term, universe_session,
169
166
  project_session)
170
167
  else:
171
- result = _valid_value_term_composite_separator_less(value, term, universe_session,
168
+ result = _valid_value_composite_term_separator_less(value, term, universe_session,
172
169
  project_session)
173
170
  return result
174
171
 
175
172
 
176
- def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
173
+ def _create_term_error(value: str, term: UTerm|PTerm) -> UniverseTermError|ProjectTermError:
177
174
  if isinstance(term, UTerm):
178
175
  return UniverseTermError(value=value, term=term.specs, term_kind=term.kind,
179
176
  data_descriptor_id=term.data_descriptor.id)
@@ -185,29 +182,33 @@ def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
185
182
  def _valid_value(value: str,
186
183
  term: UTerm|PTerm,
187
184
  universe_session: Session,
188
- project_session: Session) -> list[ValidationError]:
185
+ project_session: Session) -> list[UniverseTermError|ProjectTermError]:
189
186
  result = list()
190
187
  match term.kind:
191
188
  case TermKind.PLAIN:
192
- if term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] != value:
193
- result.append(_create_term_error(value, term))
189
+ if constants.DRS_SPECS_JSON_KEY in term.specs:
190
+ if term.specs[constants.DRS_SPECS_JSON_KEY] != value:
191
+ result.append(_create_term_error(value, term))
192
+ else:
193
+ raise APIException(f"the term {term.id} doesn't have drs name. " +
194
+ "Can't validate it.")
194
195
  case TermKind.PATTERN:
195
196
  # OPTIM: Pattern can be compiled and stored for further matching.
196
- pattern_match = re.match(term.specs[esgvoc.core.constants.PATTERN_JSON_KEY], value)
197
+ pattern_match = re.match(term.specs[constants.PATTERN_JSON_KEY], value)
197
198
  if pattern_match is None:
198
199
  result.append(_create_term_error(value, term))
199
200
  case TermKind.COMPOSITE:
200
- result.extend(_valid_value_for_term_composite(value, term,
201
+ result.extend(_valid_value_for_composite_term(value, term,
201
202
  universe_session,
202
203
  project_session))
203
204
  case _:
204
- raise NotImplementedError(f'unsupported term kind {term.kind}')
205
+ raise RuntimeError(f'unsupported term kind {term.kind}')
205
206
  return result
206
207
 
207
208
 
208
209
  def _check_value(value: str) -> str:
209
210
  if not value or value.isspace():
210
- raise ValueError('value should be set')
211
+ raise APIException('value should be set')
211
212
  else:
212
213
  return value
213
214
 
@@ -217,7 +218,7 @@ def _search_plain_term_and_valid_value(value: str,
217
218
  project_session: Session) \
218
219
  -> str|None:
219
220
  where_expression = and_(Collection.id == collection_id,
220
- PTerm.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
221
+ PTerm.specs[constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
221
222
  statement = select(PTerm).join(Collection).where(where_expression)
222
223
  term = project_session.exec(statement).one_or_none()
223
224
  return term.id if term else None
@@ -247,27 +248,22 @@ def _valid_value_against_given_term(value: str,
247
248
  term_id: str,
248
249
  universe_session: Session,
249
250
  project_session: Session)\
250
- -> list[ValidationError]:
251
+ -> list[UniverseTermError|ProjectTermError]:
251
252
  # [OPTIMIZATION]
252
253
  key = value + project_id + collection_id + term_id
253
254
  if key in _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE:
254
255
  result = _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key]
255
256
  else:
256
- try:
257
- terms = _find_terms_in_collection(collection_id,
258
- term_id,
259
- project_session,
260
- None)
261
- if terms:
262
- term = terms[0]
263
- result = _valid_value(value, term, universe_session, project_session)
264
- else:
265
- raise ValueError(f'unable to find term {term_id} ' +
266
- f'in collection {collection_id}')
267
- except Exception as e:
268
- msg = f'unable to valid term {term_id} ' +\
269
- f'in collection {collection_id}'
270
- raise RuntimeError(msg) from e
257
+ terms = _find_terms_in_collection(collection_id,
258
+ term_id,
259
+ project_session,
260
+ None)
261
+ if terms:
262
+ term = terms[0]
263
+ result = _valid_value(value, term, universe_session, project_session)
264
+ else:
265
+ raise APIException(f'unable to find term {term_id} ' +
266
+ f'in collection {collection_id}')
271
267
  _VALID_VALUE_AGAINST_GIVEN_TERM_CACHE[key] = result
272
268
  return result
273
269
 
@@ -280,11 +276,11 @@ def valid_term(value: str,
280
276
  """
281
277
  Check if the given value may or may not represent the given term. The functions returns
282
278
  a report that contains the possible errors.
283
-
279
+
284
280
  Behavior based on the nature of the term:
285
281
  - plain term: the function try to match the value on the drs_name field.
286
- - term pattern: the function try to match the value on the pattern field (regex).
287
- - term composite:
282
+ - pattern term: the function try to match the value on the pattern field (regex).
283
+ - composite term:
288
284
  - if the composite has got a separator, the function splits the value according to the\
289
285
  separator of the term then it try to match every part of the composite\
290
286
  with every split of the value.
@@ -292,7 +288,7 @@ def valid_term(value: str,
292
288
  composite so as to compare it as a regex to the value.
293
289
 
294
290
  If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
295
- the function raises a ValueError.
291
+ the function raises a APIException.
296
292
 
297
293
  :param value: A value to be validated
298
294
  :type value: str
@@ -304,7 +300,7 @@ def valid_term(value: str,
304
300
  :type term_id: str
305
301
  :returns: A validation report that contains the possible errors
306
302
  :rtype: ValidationReport
307
- :raises ValueError: If any of the provided ids is not found
303
+ :raises APIException: If any of the provided ids is not found
308
304
  """
309
305
  value = _check_value(value)
310
306
  with get_universe_session() as universe_session, \
@@ -350,7 +346,7 @@ def _valid_term_in_collection(value: str,
350
346
  term_id=term_id_found))
351
347
  else:
352
348
  msg = f'unable to find collection {collection_id}'
353
- raise ValueError(msg)
349
+ raise APIException(msg)
354
350
  _VALID_TERM_IN_COLLECTION_CACHE[key] = result
355
351
  return result
356
352
 
@@ -362,11 +358,11 @@ def valid_term_in_collection(value: str,
362
358
  """
363
359
  Check if the given value may or may not represent a term in the given collection. The function
364
360
  returns the terms that the value matches.
365
-
361
+
366
362
  Behavior based on the nature of the term:
367
363
  - plain term: the function try to match the value on the drs_name field.
368
- - term pattern: the function try to match the value on the pattern field (regex).
369
- - term composite:
364
+ - pattern term: the function try to match the value on the pattern field (regex).
365
+ - composite term:
370
366
  - if the composite has got a separator, the function splits the value according to the \
371
367
  separator of the term then it try to match every part of the composite \
372
368
  with every split of the value.
@@ -374,7 +370,7 @@ def valid_term_in_collection(value: str,
374
370
  composite so as to compare it as a regex to the value.
375
371
 
376
372
  If any of the provided ids (`project_id` or `collection_id`) is not found,
377
- the function raises a ValueError.
373
+ the function raises a APIException.
378
374
 
379
375
  :param value: A value to be validated
380
376
  :type value: str
@@ -384,7 +380,7 @@ def valid_term_in_collection(value: str,
384
380
  :type collection_id: str
385
381
  :returns: The list of terms that the value matches.
386
382
  :rtype: list[MatchingTerm]
387
- :raises ValueError: If any of the provided ids is not found
383
+ :raises APIException: If any of the provided ids is not found
388
384
  """
389
385
  with get_universe_session() as universe_session, \
390
386
  _get_project_session_with_exception(project_id) as project_session:
@@ -408,18 +404,18 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
408
404
  """
409
405
  Check if the given value may or may not represent a term in the given project. The function
410
406
  returns the terms that the value matches.
411
-
407
+
412
408
  Behavior based on the nature of the term:
413
409
  - plain term: the function try to match the value on the drs_name field.
414
- - term pattern: the function try to match the value on the pattern field (regex).
415
- - term composite:
410
+ - pattern term: the function try to match the value on the pattern field (regex).
411
+ - composite term:
416
412
  - if the composite has got a separator, the function splits the value according to the \
417
413
  separator of the term then it try to match every part of the composite \
418
414
  with every split of the value.
419
415
  - if the composite hasn't got a separator, the function aggregates the parts of the \
420
416
  composite so as to compare it as a regex to the value.
421
417
 
422
- If the `project_id` is not found, the function raises a ValueError.
418
+ If the `project_id` is not found, the function raises a APIException.
423
419
 
424
420
  :param value: A value to be validated
425
421
  :type value: str
@@ -427,7 +423,7 @@ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
427
423
  :type project_id: str
428
424
  :returns: The list of terms that the value matches.
429
425
  :rtype: list[MatchingTerm]
430
- :raises ValueError: If the `project_id` is not found
426
+ :raises APIException: If the `project_id` is not found
431
427
  """
432
428
  with get_universe_session() as universe_session, \
433
429
  _get_project_session_with_exception(project_id) as project_session:
@@ -438,11 +434,11 @@ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
438
434
  """
439
435
  Check if the given value may or may not represent a term in all projects. The function
440
436
  returns the terms that the value matches.
441
-
437
+
442
438
  Behavior based on the nature of the term:
443
439
  - plain term: the function try to match the value on the drs_name field.
444
- - term pattern: the function try to match the value on the pattern field (regex).
445
- - term composite:
440
+ - pattern term: the function try to match the value on the pattern field (regex).
441
+ - composite term:
446
442
  - if the composite has got a separator, the function splits the value according to the \
447
443
  separator of the term then it try to match every part of the composite \
448
444
  with every split of the value.
@@ -474,7 +470,7 @@ def _find_terms_in_collection(collection_id: str,
474
470
  statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
475
471
  where_expression)
476
472
  results = session.exec(statement)
477
- result = results.all()
473
+ result = results.all()
478
474
  return result
479
475
 
480
476
 
@@ -482,21 +478,21 @@ def find_terms_in_collection(project_id:str,
482
478
  collection_id: str,
483
479
  term_id: str,
484
480
  settings: SearchSettings|None = None) \
485
- -> list[BaseModel]:
481
+ -> list[DataDescriptor]:
486
482
  """
487
483
  Finds one or more terms, based on the specified search settings, in the given collection of a project.
488
- This function performs an exact match on the `project_id` and `collection_id`,
484
+ This function performs an exact match on the `project_id` and `collection_id`,
489
485
  and does **not** search for similar or related projects and collections.
490
486
  The given `term_id` is searched according to the search type specified in the parameter `settings`,
491
487
  which allows a flexible matching (e.g., `LIKE` may return multiple results).
492
488
  If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
493
489
  If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
494
490
  the function returns an empty list.
495
-
491
+
496
492
  Behavior based on search type:
497
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
493
+ - `EXACT` and absence of `settings`: returns zero or one term instance in the list.
498
494
  - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
499
- Pydantic term instances in the list.
495
+ term instances in the list.
500
496
 
501
497
  :param project_id: A project id
502
498
  :type project_id: str
@@ -506,14 +502,15 @@ def find_terms_in_collection(project_id:str,
506
502
  :type term_id: str
507
503
  :param settings: The search settings
508
504
  :type settings: SearchSettings|None
509
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
510
- :rtype: list[BaseModel]
505
+ :returns: A list of term instances. Returns an empty list if no matches are found.
506
+ :rtype: list[DataDescriptor]
511
507
  """
512
- result: list[BaseModel] = list()
508
+ result: list[DataDescriptor] = list()
513
509
  if connection:=_get_project_connection(project_id):
514
510
  with connection.create_session() as session:
515
511
  terms = _find_terms_in_collection(collection_id, term_id, session, settings)
516
- instantiate_pydantic_terms(terms, result)
512
+ instantiate_pydantic_terms(terms, result,
513
+ settings.selected_term_fields if settings else None)
517
514
  return result
518
515
 
519
516
 
@@ -529,7 +526,7 @@ def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
529
526
  statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
530
527
  where_expression)
531
528
  results = session.exec(statement)
532
- result = results.all()
529
+ result = results.all()
533
530
  return result
534
531
 
535
532
 
@@ -537,23 +534,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
537
534
  data_descriptor_id: str,
538
535
  term_id: str,
539
536
  settings: SearchSettings|None = None) \
540
- -> list[tuple[BaseModel, str]]:
537
+ -> list[tuple[DataDescriptor, str]]:
541
538
  """
542
539
  Finds one or more terms in the given project which are instances of the given data descriptor
543
540
  in the universe, based on the specified search settings, in the given collection of a project.
544
- This function performs an exact match on the `project_id` and `data_descriptor_id`,
541
+ This function performs an exact match on the `project_id` and `data_descriptor_id`,
545
542
  and does **not** search for similar or related projects and data descriptors.
546
543
  The given `term_id` is searched according to the search type specified in the parameter `settings`,
547
544
  which allows a flexible matching (e.g., `LIKE` may return multiple results).
548
545
  If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
549
546
  If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
550
547
  the function returns an empty list.
551
-
548
+
552
549
  Behavior based on search type:
553
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
550
+ - `EXACT` and absence of `settings`: returns zero or one term instance and \
554
551
  collection id in the list.
555
552
  - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
556
- Pydantic term instances and collection ids in the list.
553
+ term instances and collection ids in the list.
557
554
 
558
555
  :param project_id: A project id
559
556
  :type project_id: str
@@ -563,9 +560,9 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
563
560
  :type term_id: str
564
561
  :param settings: The search settings
565
562
  :type settings: SearchSettings|None
566
- :returns: A list of tuple of Pydantic term instances and related collection ids. \
563
+ :returns: A list of tuple of term instances and related collection ids. \
567
564
  Returns an empty list if no matches are found.
568
- :rtype: list[tuple[BaseModel, str]]
565
+ :rtype: list[tuple[DataDescriptor, str]]
569
566
  """
570
567
  result = list()
571
568
  if connection:=_get_project_connection(project_id):
@@ -576,7 +573,8 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
576
573
  settings)
577
574
  for pterm in terms:
578
575
  collection_id = pterm.collection.id
579
- term = instantiate_pydantic_term(pterm)
576
+ term = instantiate_pydantic_term(pterm,
577
+ settings.selected_term_fields if settings else None)
580
578
  result.append((term, collection_id))
581
579
  return result
582
580
 
@@ -584,23 +582,23 @@ def find_terms_from_data_descriptor_in_project(project_id: str,
584
582
  def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
585
583
  term_id: str,
586
584
  settings: SearchSettings|None = None) \
587
- -> list[tuple[list[tuple[BaseModel, str]], str]]:
585
+ -> list[tuple[list[tuple[DataDescriptor, str]], str]]:
588
586
  """
589
587
  Finds one or more terms in all projects which are instances of the given data descriptor
590
588
  in the universe, based on the specified search settings, in the given collection of a project.
591
- This function performs an exact match on the `data_descriptor_id`,
589
+ This function performs an exact match on the `data_descriptor_id`,
592
590
  and does **not** search for similar or related data descriptors.
593
591
  The given `term_id` is searched according to the search type specified in the parameter `settings`,
594
592
  which allows a flexible matching (e.g., `LIKE` may return multiple results).
595
593
  If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
596
594
  If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
597
595
  the function returns an empty list.
598
-
596
+
599
597
  Behavior based on search type:
600
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and \
598
+ - `EXACT` and absence of `settings`: returns zero or one term instance and \
601
599
  collection id in the list.
602
600
  - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
603
- Pydantic term instances and collection ids in the list.
601
+ term instances and collection ids in the list.
604
602
 
605
603
  :param data_descriptor_id: A data descriptor
606
604
  :type data_descriptor_id: str
@@ -610,16 +608,17 @@ def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
610
608
  :type settings: SearchSettings|None
611
609
  :returns: A list of tuple of matching terms with their collection id, per project. \
612
610
  Returns an empty list if no matches are found.
613
- :rtype: list[tuple[list[tuple[BaseModel, str]], str]]
611
+ :rtype: list[tuple[list[tuple[DataDescriptor, str]], str]]
614
612
  """
615
613
  project_ids = get_all_projects()
616
- result: list[tuple[list[tuple[BaseModel, str]], str]] = list()
614
+ result: list[tuple[list[tuple[DataDescriptor, str]], str]] = list()
617
615
  for project_id in project_ids:
618
616
  matching_terms = find_terms_from_data_descriptor_in_project(project_id,
619
617
  data_descriptor_id,
620
618
  term_id,
621
619
  settings)
622
- result.append((matching_terms, project_id))
620
+ if matching_terms:
621
+ result.append((matching_terms, project_id))
623
622
  return result
624
623
 
625
624
 
@@ -636,7 +635,7 @@ def _find_terms_in_project(term_id: str,
636
635
 
637
636
  def find_terms_in_all_projects(term_id: str,
638
637
  settings: SearchSettings|None = None) \
639
- -> list[BaseModel]:
638
+ -> list[DataDescriptor]:
640
639
  """
641
640
  Finds one or more terms, based on the specified search settings, in all projects.
642
641
  The given `term_id` is searched according to the search type specified in the parameter `settings`,
@@ -649,8 +648,8 @@ def find_terms_in_all_projects(term_id: str,
649
648
  :type term_id: str
650
649
  :param settings: The search settings
651
650
  :type settings: SearchSettings|None
652
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
653
- :rtype: list[BaseModel]
651
+ :returns: A list of term instances. Returns an empty list if no matches are found.
652
+ :rtype: list[DataDescriptor]
654
653
  """
655
654
  project_ids = get_all_projects()
656
655
  result = list()
@@ -662,10 +661,10 @@ def find_terms_in_all_projects(term_id: str,
662
661
  def find_terms_in_project(project_id: str,
663
662
  term_id: str,
664
663
  settings: SearchSettings|None = None) \
665
- -> list[BaseModel]:
664
+ -> list[DataDescriptor]:
666
665
  """
667
666
  Finds one or more terms, based on the specified search settings, in a project.
668
- This function performs an exact match on the `project_id` and
667
+ This function performs an exact match on the `project_id` and
669
668
  does **not** search for similar or related projects.
670
669
  The given `term_id` is searched according to the search type specified in the parameter `settings`,
671
670
  which allows a flexible matching (e.g., `LIKE` may return multiple results).
@@ -680,20 +679,22 @@ def find_terms_in_project(project_id: str,
680
679
  :type term_id: str
681
680
  :param settings: The search settings
682
681
  :type settings: SearchSettings|None
683
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
684
- :rtype: list[BaseModel]
682
+ :returns: A list of term instances. Returns an empty list if no matches are found.
683
+ :rtype: list[DataDescriptor]
685
684
  """
686
- result: list[BaseModel] = list()
685
+ result: list[DataDescriptor] = list()
687
686
  if connection:=_get_project_connection(project_id):
688
687
  with connection.create_session() as session:
689
688
  terms = _find_terms_in_project(term_id, session, settings)
690
- instantiate_pydantic_terms(terms, result)
689
+ instantiate_pydantic_terms(terms, result,
690
+ settings.selected_term_fields if settings else None)
691
691
  return result
692
692
 
693
693
 
694
694
  def get_all_terms_in_collection(project_id: str,
695
- collection_id: str)\
696
- -> list[BaseModel]:
695
+ collection_id: str,
696
+ selected_term_fields: Iterable[str]|None = None)\
697
+ -> list[DataDescriptor]:
697
698
  """
698
699
  Gets all terms of the given collection of a project.
699
700
  This function performs an exact match on the `project_id` and `collection_id`,
@@ -705,8 +706,11 @@ def get_all_terms_in_collection(project_id: str,
705
706
  :type project_id: str
706
707
  :param collection_id: A collection id
707
708
  :type collection_id: str
708
- :returns: a list of Pydantic term instances. Returns an empty list if no matches are found.
709
- :rtype: list[BaseModel]
709
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
710
+ fields of the terms are returned.
711
+ :type selected_term_fields: Iterable[str]|None
712
+ :returns: a list of term instances. Returns an empty list if no matches are found.
713
+ :rtype: list[DataDescriptor]
710
714
  """
711
715
  result = list()
712
716
  if connection:=_get_project_connection(project_id):
@@ -716,7 +720,7 @@ def get_all_terms_in_collection(project_id: str,
716
720
  None)
717
721
  if collections:
718
722
  collection = collections[0]
719
- result = _get_all_terms_in_collection(collection)
723
+ result = _get_all_terms_in_collection(collection, selected_term_fields)
720
724
  return result
721
725
 
722
726
 
@@ -739,15 +743,15 @@ def find_collections_in_project(project_id: str,
739
743
  -> list[dict]:
740
744
  """
741
745
  Finds one or more collections of the given project.
742
- This function performs an exact match on the `project_id` and
746
+ This function performs an exact match on the `project_id` and
743
747
  does **not** search for similar or related projects.
744
- The given `collection_id` is searched according to the search type specified in
748
+ The given `collection_id` is searched according to the search type specified in
745
749
  the parameter `settings`,
746
750
  which allows a flexible matching (e.g., `LIKE` may return multiple results).
747
751
  If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
748
752
  If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
749
753
  an empty list.
750
-
754
+
751
755
  Behavior based on search type:
752
756
  - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
753
757
  - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
@@ -774,7 +778,7 @@ def find_collections_in_project(project_id: str,
774
778
 
775
779
 
776
780
  def _get_all_collections_in_project(session: Session) -> list[Collection]:
777
- project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
781
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
778
782
  # Project can't be missing if session exists.
779
783
  return project.collections # type: ignore
780
784
 
@@ -782,7 +786,7 @@ def _get_all_collections_in_project(session: Session) -> list[Collection]:
782
786
  def get_all_collections_in_project(project_id: str) -> list[str]:
783
787
  """
784
788
  Gets all collections of the given project.
785
- This function performs an exact match on the `project_id` and
789
+ This function performs an exact match on the `project_id` and
786
790
  does **not** search for similar or related projects.
787
791
  If the provided `project_id` is not found, the function returns an empty list.
788
792
 
@@ -800,24 +804,29 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
800
804
  return result
801
805
 
802
806
 
803
- def _get_all_terms_in_collection(collection: Collection) -> list[BaseModel]:
804
- result: list[BaseModel] = list()
805
- instantiate_pydantic_terms(collection.terms, result)
807
+ def _get_all_terms_in_collection(collection: Collection,
808
+ selected_term_fields: Iterable[str]|None) -> list[DataDescriptor]:
809
+ result: list[DataDescriptor] = list()
810
+ instantiate_pydantic_terms(collection.terms, result, selected_term_fields)
806
811
  return result
807
812
 
808
813
 
809
- def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
814
+ def get_all_terms_in_project(project_id: str,
815
+ selected_term_fields: Iterable[str]|None = None) -> list[DataDescriptor]:
810
816
  """
811
817
  Gets all terms of the given project.
812
- This function performs an exact match on the `project_id` and
818
+ This function performs an exact match on the `project_id` and
813
819
  does **not** search for similar or related projects.
814
820
  Terms are unique within a collection but may have some synonyms in a project.
815
821
  If the provided `project_id` is not found, the function returns an empty list.
816
822
 
817
823
  :param project_id: A project id
818
824
  :type project_id: str
819
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
820
- :rtype: list[BaseModel]
825
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
826
+ fields of the terms are returned.
827
+ :type selected_term_fields: Iterable[str]|None
828
+ :returns: A list of term instances. Returns an empty list if no matches are found.
829
+ :rtype: list[DataDescriptor]
821
830
  """
822
831
  result = list()
823
832
  if connection:=_get_project_connection(project_id):
@@ -825,60 +834,67 @@ def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
825
834
  collections = _get_all_collections_in_project(session)
826
835
  for collection in collections:
827
836
  # Term may have some synonyms in a project.
828
- result.extend(_get_all_terms_in_collection(collection))
837
+ result.extend(_get_all_terms_in_collection(collection, selected_term_fields))
829
838
  return result
830
839
 
831
840
 
832
- def get_all_terms_in_all_projects() -> list[BaseModel]:
841
+ def get_all_terms_in_all_projects(selected_term_fields: Iterable[str]|None = None) \
842
+ -> list[tuple[str, list[DataDescriptor]]]:
833
843
  """
834
844
  Gets all terms of all projects.
835
845
 
836
- :returns: A list of Pydantic term instances.
837
- :rtype: list[BaseModel]
846
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
847
+ fields of the terms are returned.
848
+ :type selected_term_fields: Iterable[str]|None
849
+ :returns: A list of tuple project_id and term instances of that project.
850
+ :rtype: list[tuple[str, list[DataDescriptor]]]
838
851
  """
839
852
  project_ids = get_all_projects()
840
853
  result = list()
841
854
  for project_id in project_ids:
842
- result.extend(get_all_terms_in_project(project_id))
855
+ terms = get_all_terms_in_project(project_id, selected_term_fields)
856
+ result.append((project_id, terms))
843
857
  return result
844
858
 
845
859
 
846
- def find_project(project_id: str) -> dict|None:
860
+ def find_project(project_id: str) -> ProjectSpecs|None:
847
861
  """
848
- Finds a project.
849
- This function performs an exact match on the `project_id` and
862
+ Finds a project and returns its specifications.
863
+ This function performs an exact match on the `project_id` and
850
864
  does **not** search for similar or related projects.
851
865
  If the provided `project_id` is not found, the function returns `None`.
852
-
866
+
853
867
  :param project_id: A project id to be found
854
868
  :type project_id: str
855
869
  :returns: The specs of the project found. Returns `None` if no matches are found.
856
- :rtype: dict|None
870
+ :rtype: ProjectSpecs|None
857
871
  """
858
- result = None
872
+ result: ProjectSpecs|None = None
859
873
  if connection:=_get_project_connection(project_id):
860
874
  with connection.create_session() as session:
861
- project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
862
- # Project can't be missing if session exists.
863
- result = project.specs # type: ignore
875
+ project = session.get(Project, constants.SQLITE_FIRST_PK)
876
+ try:
877
+ # Project can't be missing if session exists.
878
+ result = ProjectSpecs(**project.specs) # type: ignore
879
+ except Exception as e:
880
+ msg = f'Unable to read specs in project {project_id}'
881
+ raise RuntimeError(msg) from e
864
882
  return result
865
883
 
866
884
 
867
885
  def get_all_projects() -> list[str]:
868
886
  """
869
887
  Gets all projects.
870
-
888
+
871
889
  :returns: A list of project ids.
872
890
  :rtype: list[str]
873
891
  """
874
- return list(service.state_service.projects.keys())
892
+ return list(service.current_state.projects.keys())
875
893
 
876
894
 
877
895
  if __name__ == "__main__":
878
- vr = valid_term('r1i1p1f111', 'cmip6plus', 'member_id', 'ripf')
879
- if vr:
880
- print('OK')
881
- else:
882
- print(vr)
883
- for error in vr.errors:
884
- print(error)
896
+ settings = SearchSettings()
897
+ settings.selected_term_fields = ('id', 'drs_name')
898
+ settings.case_sensitive = False
899
+ matching_terms = find_terms_from_data_descriptor_in_all_projects('organisation', 'IpsL', settings)
900
+ print(matching_terms)