esgvoc 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (66) hide show
  1. esgvoc/__init__.py +1 -0
  2. esgvoc/api/__init__.py +62 -0
  3. esgvoc/api/_utils.py +39 -0
  4. esgvoc/api/data_descriptors/__init__.py +60 -0
  5. esgvoc/api/data_descriptors/activity.py +51 -0
  6. esgvoc/api/data_descriptors/consortium.py +66 -0
  7. esgvoc/api/data_descriptors/date.py +48 -0
  8. esgvoc/api/data_descriptors/experiment.py +60 -0
  9. esgvoc/api/data_descriptors/forcing_index.py +47 -0
  10. esgvoc/api/data_descriptors/frequency.py +45 -0
  11. esgvoc/api/data_descriptors/grid_label.py +46 -0
  12. esgvoc/api/data_descriptors/initialisation_index.py +46 -0
  13. esgvoc/api/data_descriptors/institution.py +58 -0
  14. esgvoc/api/data_descriptors/license.py +47 -0
  15. esgvoc/api/data_descriptors/mip_era.py +46 -0
  16. esgvoc/api/data_descriptors/model_component.py +47 -0
  17. esgvoc/api/data_descriptors/organisation.py +42 -0
  18. esgvoc/api/data_descriptors/physic_index.py +47 -0
  19. esgvoc/api/data_descriptors/product.py +45 -0
  20. esgvoc/api/data_descriptors/realisation_index.py +46 -0
  21. esgvoc/api/data_descriptors/realm.py +44 -0
  22. esgvoc/api/data_descriptors/resolution.py +46 -0
  23. esgvoc/api/data_descriptors/source.py +57 -0
  24. esgvoc/api/data_descriptors/source_type.py +43 -0
  25. esgvoc/api/data_descriptors/sub_experiment.py +43 -0
  26. esgvoc/api/data_descriptors/table.py +50 -0
  27. esgvoc/api/data_descriptors/time_range.py +28 -0
  28. esgvoc/api/data_descriptors/variable.py +77 -0
  29. esgvoc/api/data_descriptors/variant_label.py +49 -0
  30. esgvoc/api/projects.py +854 -0
  31. esgvoc/api/report.py +86 -0
  32. esgvoc/api/search.py +92 -0
  33. esgvoc/api/universe.py +218 -0
  34. esgvoc/apps/drs/__init__.py +16 -0
  35. esgvoc/apps/drs/models.py +43 -0
  36. esgvoc/apps/drs/parser.py +27 -0
  37. esgvoc/cli/config.py +79 -0
  38. esgvoc/cli/get.py +142 -0
  39. esgvoc/cli/install.py +14 -0
  40. esgvoc/cli/main.py +22 -0
  41. esgvoc/cli/status.py +26 -0
  42. esgvoc/cli/valid.py +156 -0
  43. esgvoc/core/constants.py +13 -0
  44. esgvoc/core/convert.py +0 -0
  45. esgvoc/core/data_handler.py +133 -0
  46. esgvoc/core/db/__init__.py +5 -0
  47. esgvoc/core/db/connection.py +31 -0
  48. esgvoc/core/db/models/mixins.py +18 -0
  49. esgvoc/core/db/models/project.py +65 -0
  50. esgvoc/core/db/models/universe.py +59 -0
  51. esgvoc/core/db/project_ingestion.py +152 -0
  52. esgvoc/core/db/universe_ingestion.py +120 -0
  53. esgvoc/core/logging.conf +21 -0
  54. esgvoc/core/logging_handler.py +4 -0
  55. esgvoc/core/repo_fetcher.py +259 -0
  56. esgvoc/core/service/__init__.py +8 -0
  57. esgvoc/core/service/data_merger.py +83 -0
  58. esgvoc/core/service/esg_voc.py +79 -0
  59. esgvoc/core/service/settings.py +64 -0
  60. esgvoc/core/service/settings.toml +12 -0
  61. esgvoc/core/service/settings_default.toml +20 -0
  62. esgvoc/core/service/state.py +222 -0
  63. esgvoc-0.1.2.dist-info/METADATA +54 -0
  64. esgvoc-0.1.2.dist-info/RECORD +66 -0
  65. esgvoc-0.1.2.dist-info/WHEEL +4 -0
  66. esgvoc-0.1.2.dist-info/entry_points.txt +2 -0
esgvoc/api/projects.py ADDED
@@ -0,0 +1,854 @@
1
+ import re
2
+ from typing import Sequence
3
+
4
+ import esgvoc.api.universe as universe
5
+ import esgvoc.core.constants
6
+ import esgvoc.core.service as service
7
+ from esgvoc.api._utils import (get_universe_session, instantiate_pydantic_term,
8
+ instantiate_pydantic_terms)
9
+ from esgvoc.api.report import (ProjectTermError, UniverseTermError,
10
+ ValidationError, ValidationReport)
11
+ from esgvoc.api.search import MatchingTerm, SearchSettings, create_str_comparison_expression
12
+ from esgvoc.core.db.connection import DBConnection
13
+ from esgvoc.core.db.models.mixins import TermKind
14
+ from esgvoc.core.db.models.project import Collection, Project, PTerm
15
+ from esgvoc.core.db.models.universe import UTerm
16
+ from pydantic import BaseModel
17
+ from sqlmodel import Session, and_, select
18
+
19
+
20
+ def _get_project_connection(project_id: str) -> DBConnection|None:
21
+ return service.state_service.projects[project_id].db_connection
22
+
23
+ def _get_project_session_with_exception(project_id: str) -> Session:
24
+ if connection:=_get_project_connection(project_id):
25
+ project_session = connection.create_session()
26
+ return project_session
27
+ else:
28
+ raise ValueError(f'unable to find project {project_id}')
29
+
30
+
31
+ def _resolve_term(term_composite_part: dict,
32
+ universe_session: Session,
33
+ project_session: Session) -> UTerm|PTerm:
34
+ '''First find the term in the universe than in the current project'''
35
+ term_id = term_composite_part[esgvoc.core.constants.TERM_ID_JSON_KEY]
36
+ term_type = term_composite_part[esgvoc.core.constants.TERM_TYPE_JSON_KEY]
37
+ uterms = universe._find_terms_in_data_descriptor(data_descriptor_id=term_type,
38
+ term_id=term_id,
39
+ session=universe_session,
40
+ settings=None)
41
+ if uterms:
42
+ return uterms[0]
43
+ else:
44
+ pterms = _find_terms_in_collection(collection_id=term_type,
45
+ term_id=term_id,
46
+ session=project_session,
47
+ settings=None)
48
+ if pterms:
49
+ return pterms[0]
50
+ else:
51
+ msg = f'unable to find the term {term_id} in {term_type}'
52
+ raise RuntimeError(msg)
53
+
54
+
55
+ def _get_term_composite_separator_parts(term: UTerm|PTerm) -> tuple[str, list]:
56
+ separator = term.specs[esgvoc.core.constants.COMPOSITE_SEPARATOR_JSON_KEY]
57
+ parts = term.specs[esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY]
58
+ return separator, parts
59
+
60
+
61
+ # TODO: support optionality of parts of composite.
62
+ # It is backtrack possible for more than one missing parts.
63
+ def _valid_value_term_composite_with_separator(value: str,
64
+ term: UTerm|PTerm,
65
+ universe_session: Session,
66
+ project_session: Session)\
67
+ -> list[ValidationError]:
68
+ result = list()
69
+ separator, parts = _get_term_composite_separator_parts(term)
70
+ if separator in value:
71
+ splits = value.split(separator)
72
+ if len(splits) == len(parts):
73
+ for index in range(0, len(splits)):
74
+ given_value = splits[index]
75
+ resolved_term = _resolve_term(parts[index],
76
+ universe_session,
77
+ project_session)
78
+ errors = _valid_value(given_value,
79
+ resolved_term,
80
+ universe_session,
81
+ project_session)
82
+ result.extend(errors)
83
+ else:
84
+ result.append(_create_term_error(value, term))
85
+ else:
86
+ result.append(_create_term_error(value, term))
87
+ return result
88
+
89
+
90
+ def _transform_to_pattern(term: UTerm|PTerm,
91
+ universe_session: Session,
92
+ project_session: Session) -> str:
93
+ match term.kind:
94
+ case TermKind.PLAIN:
95
+ result = term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY]
96
+ case TermKind.PATTERN:
97
+ result = term.specs[esgvoc.core.constants.PATTERN_JSON_KEY]
98
+ case TermKind.COMPOSITE:
99
+ separator, parts = _get_term_composite_separator_parts(term)
100
+ result = ""
101
+ for part in parts:
102
+ resolved_term = _resolve_term(part, universe_session, project_session)
103
+ pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
104
+ result = f'{result}{pattern}{separator}'
105
+ result = result.rstrip(separator)
106
+ case _:
107
+ raise NotImplementedError(f'unsupported term kind {term.kind}')
108
+ return result
109
+
110
+
111
+ # TODO: support optionality of parts of composite.
112
+ # It is backtrack possible for more than one missing parts.
113
+ def _valid_value_term_composite_separator_less(value: str,
114
+ term: UTerm|PTerm,
115
+ universe_session: Session,
116
+ project_session: Session)\
117
+ -> list[ValidationError]:
118
+ result = list()
119
+ try:
120
+ pattern = _transform_to_pattern(term, universe_session, project_session)
121
+ try:
122
+ # Term patterns are meant to be validated individually.
123
+ # So their regex are defined as a whole (begins by a ^, ends by a $).
124
+ # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
125
+ # The later, must be removed.
126
+ pattern = pattern.replace('^', '').replace('$', '')
127
+ pattern = f'^{pattern}$'
128
+ regex = re.compile(pattern)
129
+ except Exception as e:
130
+ msg = f'regex compilation error:\n{e}'
131
+ raise ValueError(msg) from e
132
+ match = regex.match(value)
133
+ if match is None:
134
+ result.append(_create_term_error(value, term))
135
+ return result
136
+ except Exception as e:
137
+ msg = f'cannot validate separator less composite term {term.id}:\n{e}'
138
+ raise RuntimeError(msg) from e
139
+
140
+
141
+ def _valid_value_for_term_composite(value: str,
142
+ term: UTerm|PTerm,
143
+ universe_session: Session,
144
+ project_session: Session)\
145
+ -> list[ValidationError]:
146
+ result = list()
147
+ separator, _ = _get_term_composite_separator_parts(term)
148
+ if separator:
149
+ result = _valid_value_term_composite_with_separator(value, term, universe_session,
150
+ project_session)
151
+ else:
152
+ result = _valid_value_term_composite_separator_less(value, term, universe_session,
153
+ project_session)
154
+ return result
155
+
156
+
157
+ def _create_term_error(value: str, term: UTerm|PTerm) -> ValidationError:
158
+ if isinstance(term, UTerm):
159
+ return UniverseTermError(value, term)
160
+ else:
161
+ return ProjectTermError(value, term)
162
+
163
+
164
+ def _valid_value(value: str,
165
+ term: UTerm|PTerm,
166
+ universe_session: Session,
167
+ project_session: Session) -> list[ValidationError]:
168
+ result = list()
169
+ match term.kind:
170
+ case TermKind.PLAIN:
171
+ if term.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] != value:
172
+ result.append(_create_term_error(value, term))
173
+ case TermKind.PATTERN:
174
+ # OPTIM: Pattern can be compiled and stored for further matching.
175
+ pattern_match = re.match(term.specs[esgvoc.core.constants.PATTERN_JSON_KEY], value)
176
+ if pattern_match is None:
177
+ result.append(_create_term_error(value, term))
178
+ case TermKind.COMPOSITE:
179
+ result.extend(_valid_value_for_term_composite(value, term,
180
+ universe_session,
181
+ project_session))
182
+ case _:
183
+ raise NotImplementedError(f'unsupported term kind {term.kind}')
184
+ return result
185
+
186
+
187
+ def _check_and_strip_value(value: str) -> str:
188
+ if not value:
189
+ raise ValueError('value should be set')
190
+ if result:= value.strip():
191
+ return result
192
+ else:
193
+ raise ValueError('value should not be empty')
194
+
195
+
196
+ def _search_plain_term_and_valid_value(value: str,
197
+ collection_id: str,
198
+ project_session: Session) \
199
+ -> str|None:
200
+ where_expression = and_(Collection.id == collection_id,
201
+ PTerm.specs[esgvoc.core.constants.DRS_SPECS_JSON_KEY] == f'"{value}"')
202
+ statement = select(PTerm).join(Collection).where(where_expression)
203
+ term = project_session.exec(statement).one_or_none()
204
+ return term.id if term else None
205
+
206
+
207
+ def _valid_value_against_all_terms_of_collection(value: str,
208
+ collection: Collection,
209
+ universe_session: Session,
210
+ project_session: Session) \
211
+ -> list[str]:
212
+ if collection.terms:
213
+ result = list()
214
+ for pterm in collection.terms:
215
+ _errors = _valid_value(value, pterm,
216
+ universe_session,
217
+ project_session)
218
+ if not _errors:
219
+ result.append(pterm.id)
220
+ return result
221
+ else:
222
+ raise RuntimeError(f'collection {collection.id} has no term')
223
+
224
+
225
+ def _valid_value_against_given_term(value: str,
226
+ collection_id: str,
227
+ term_id: str,
228
+ universe_session: Session,
229
+ project_session: Session)\
230
+ -> list[ValidationError]:
231
+ try:
232
+ terms = _find_terms_in_collection(collection_id,
233
+ term_id,
234
+ project_session,
235
+ None)
236
+ if terms:
237
+ term = terms[0]
238
+ result = _valid_value(value, term, universe_session, project_session)
239
+ else:
240
+ raise ValueError(f'unable to find term {term_id} ' +
241
+ f'in collection {collection_id}')
242
+ except Exception as e:
243
+ msg = f'unable to valid term {term_id} ' +\
244
+ f'in collection {collection_id}'
245
+ raise RuntimeError(msg) from e
246
+ return result
247
+
248
+
249
+ def valid_term(value: str,
250
+ project_id: str,
251
+ collection_id: str,
252
+ term_id: str) \
253
+ -> ValidationReport:
254
+ """
255
+ Check if the given value may or may not represent the given term. The functions returns
256
+ a report that contains the possible errors.
257
+
258
+ Behavior based on the nature of the term:
259
+ - plain term: the function try to match the value on the drs_name field.
260
+ - term pattern: the function try to match the value on the pattern field (regex).
261
+ - term composite:
262
+ - if the composite has got a separator, the function splits the value according to the
263
+ separator of the term then it try to match every part of the composite
264
+ with every split of the value.
265
+ - if the composite hasn't got a separator, the function aggregates the parts of the composite
266
+ so as to compare it as a regex to the value.
267
+
268
+ If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
269
+ the function raises a ValueError.
270
+
271
+ :param value: A value to be validated
272
+ :type value: str
273
+ :param project_id: A project id
274
+ :type project_id: str
275
+ :param collection_id: A collection id
276
+ :type collection_id: str
277
+ :param term_id: A term id
278
+ :type term_id: str
279
+ :returns: A validation report that contains the possible errors
280
+ :rtype: ValidationReport
281
+ :raises ValueError: If any of the provided ids is not found
282
+ """
283
+ value = _check_and_strip_value(value)
284
+ with get_universe_session() as universe_session, \
285
+ _get_project_session_with_exception(project_id) as project_session:
286
+ errors = _valid_value_against_given_term(value, collection_id, term_id,
287
+ universe_session, project_session)
288
+ return ValidationReport(value, errors)
289
+
290
+
291
+ def _valid_term_in_collection(value: str,
292
+ project_id: str,
293
+ collection_id: str,
294
+ universe_session: Session,
295
+ project_session: Session) \
296
+ -> list[MatchingTerm]:
297
+ value = _check_and_strip_value(value)
298
+ result = list()
299
+ collections = _find_collections_in_project(collection_id,
300
+ project_session,
301
+ None)
302
+ if collections:
303
+ collection = collections[0]
304
+ match collection.term_kind:
305
+ case TermKind.PLAIN:
306
+ term_id_found = _search_plain_term_and_valid_value(value, collection_id,
307
+ project_session)
308
+ if term_id_found:
309
+ result.append(MatchingTerm(project_id, collection_id, term_id_found))
310
+ case _:
311
+ term_ids_found = _valid_value_against_all_terms_of_collection(value, collection,
312
+ universe_session,
313
+ project_session)
314
+ for term_id_found in term_ids_found:
315
+ result.append(MatchingTerm(project_id, collection_id, term_id_found))
316
+ else:
317
+ msg = f'unable to find collection {collection_id}'
318
+ raise ValueError(msg)
319
+ return result
320
+
321
+
322
+ def valid_term_in_collection(value: str,
323
+ project_id: str,
324
+ collection_id: str) \
325
+ -> list[MatchingTerm]:
326
+ """
327
+ Check if the given value may or may not represent a term in the given collection. The function
328
+ returns the terms that the value matches.
329
+
330
+ Behavior based on the nature of the term:
331
+ - plain term: the function try to match the value on the drs_name field.
332
+ - term pattern: the function try to match the value on the pattern field (regex).
333
+ - term composite:
334
+ - if the composite has got a separator, the function splits the value according to the
335
+ separator of the term then it try to match every part of the composite
336
+ with every split of the value.
337
+ - if the composite hasn't got a separator, the function aggregates the parts of the composite
338
+ so as to compare it as a regex to the value.
339
+
340
+ If any of the provided ids (`project_id` or `collection_id`) is not found,
341
+ the function raises a ValueError.
342
+
343
+ :param value: A value to be validated
344
+ :type value: str
345
+ :param project_id: A project id
346
+ :type project_id: str
347
+ :param collection_id: A collection id
348
+ :type collection_id: str
349
+ :returns: The list of terms that the value matches.
350
+ :rtype: list[MatchingTerm]
351
+ :raises ValueError: If any of the provided ids is not found
352
+ """
353
+ with get_universe_session() as universe_session, \
354
+ _get_project_session_with_exception(project_id) as project_session:
355
+ return _valid_term_in_collection(value, project_id, collection_id,
356
+ universe_session, project_session)
357
+
358
+
359
+ def _valid_term_in_project(value: str,
360
+ project_id: str,
361
+ universe_session: Session,
362
+ project_session: Session) -> list[MatchingTerm]:
363
+ result = list()
364
+ collections = _get_all_collections_in_project(project_session)
365
+ for collection in collections:
366
+ result.extend(_valid_term_in_collection(value, project_id, collection.id,
367
+ universe_session, project_session))
368
+ return result
369
+
370
+
371
+ def valid_term_in_project(value: str, project_id: str) -> list[MatchingTerm]:
372
+ """
373
+ Check if the given value may or may not represent a term in the given project. The function
374
+ returns the terms that the value matches.
375
+
376
+ Behavior based on the nature of the term:
377
+ - plain term: the function try to match the value on the drs_name field.
378
+ - term pattern: the function try to match the value on the pattern field (regex).
379
+ - term composite:
380
+ - if the composite has got a separator, the function splits the value according to the
381
+ separator of the term then it try to match every part of the composite
382
+ with every split of the value.
383
+ - if the composite hasn't got a separator, the function aggregates the parts of the composite
384
+ so as to compare it as a regex to the value.
385
+
386
+ If the `project_id` is not found, the function raises a ValueError.
387
+
388
+ :param value: A value to be validated
389
+ :type value: str
390
+ :param project_id: A project id
391
+ :type project_id: str
392
+ :returns: The list of terms that the value matches.
393
+ :rtype: list[MatchingTerm]
394
+ :raises ValueError: If the `project_id` is not found
395
+ """
396
+ with get_universe_session() as universe_session, \
397
+ _get_project_session_with_exception(project_id) as project_session:
398
+ return _valid_term_in_project(value, project_id, universe_session, project_session)
399
+
400
+
401
+ def valid_term_in_all_projects(value: str) -> list[MatchingTerm]:
402
+ """
403
+ Check if the given value may or may not represent a term in all projects. The function
404
+ returns the terms that the value matches.
405
+
406
+ Behavior based on the nature of the term:
407
+ - plain term: the function try to match the value on the drs_name field.
408
+ - term pattern: the function try to match the value on the pattern field (regex).
409
+ - term composite:
410
+ - if the composite has got a separator, the function splits the value according to the
411
+ separator of the term then it try to match every part of the composite
412
+ with every split of the value.
413
+ - if the composite hasn't got a separator, the function aggregates the parts of the composite
414
+ so as to compare it as a regex to the value.
415
+
416
+ :param value: A value to be validated
417
+ :type value: str
418
+ :returns: The list of terms that the value matches.
419
+ :rtype: list[MatchingTerm]
420
+ """
421
+ result = list()
422
+ with get_universe_session() as universe_session:
423
+ for project_id in get_all_projects():
424
+ with _get_project_session_with_exception(project_id) as project_session:
425
+ result.extend(_valid_term_in_project(value, project_id,
426
+ universe_session, project_session))
427
+ return result
428
+
429
+
430
+ def _find_terms_in_collection(collection_id: str,
431
+ term_id: str,
432
+ session: Session,
433
+ settings: SearchSettings|None = None) -> Sequence[PTerm]:
434
+ """Settings only apply on the term_id comparison."""
435
+ where_expression = create_str_comparison_expression(field=PTerm.id,
436
+ value=term_id,
437
+ settings=settings)
438
+ statement = select(PTerm).join(Collection).where(Collection.id==collection_id,
439
+ where_expression)
440
+ results = session.exec(statement)
441
+ result = results.all()
442
+ return result
443
+
444
+
445
+ def find_terms_in_collection(project_id:str,
446
+ collection_id: str,
447
+ term_id: str,
448
+ settings: SearchSettings|None = None) \
449
+ -> list[BaseModel]:
450
+ """
451
+ Finds one or more terms, based on the specified search settings, in the given collection of a project.
452
+ This function performs an exact match on the `project_id` and `collection_id`,
453
+ and does **not** search for similar or related projects and collections.
454
+ The given `term_id` is searched according to the search type specified in the parameter `settings`,
455
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
456
+ If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
457
+ If any of the provided ids (`project_id`, `collection_id` or `term_id`) is not found,
458
+ the function returns an empty list.
459
+
460
+ Behavior based on search type:
461
+ - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
462
+ - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
463
+ Pydantic term instances in the list.
464
+
465
+ :param project_id: A project id
466
+ :type project_id: str
467
+ :param collection_id: A collection
468
+ :type collection_id: str
469
+ :param term_id: A term id to be found
470
+ :type term_id: str
471
+ :param settings: The search settings
472
+ :type settings: SearchSettings|None
473
+ :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
474
+ :rtype: list[BaseModel]
475
+ """
476
+ result: list[BaseModel] = list()
477
+ if connection:=_get_project_connection(project_id):
478
+ with connection.create_session() as session:
479
+ terms = _find_terms_in_collection(collection_id, term_id, session, settings)
480
+ instantiate_pydantic_terms(terms, result)
481
+ return result
482
+
483
+
484
+ def _find_terms_from_data_descriptor_in_project(data_descriptor_id: str,
485
+ term_id: str,
486
+ session: Session,
487
+ settings: SearchSettings|None = None) \
488
+ -> Sequence[PTerm]:
489
+ """Settings only apply on the term_id comparison."""
490
+ where_expression = create_str_comparison_expression(field=PTerm.id,
491
+ value=term_id,
492
+ settings=settings)
493
+ statement = select(PTerm).join(Collection).where(Collection.data_descriptor_id==data_descriptor_id,
494
+ where_expression)
495
+ results = session.exec(statement)
496
+ result = results.all()
497
+ return result
498
+
499
+
500
+ def find_terms_from_data_descriptor_in_project(project_id: str,
501
+ data_descriptor_id: str,
502
+ term_id: str,
503
+ settings: SearchSettings|None = None) \
504
+ -> list[tuple[BaseModel, str]]:
505
+ """
506
+ Finds one or more terms in the given project which are instances of the given data descriptor
507
+ in the universe, based on the specified search settings, in the given collection of a project.
508
+ This function performs an exact match on the `project_id` and `data_descriptor_id`,
509
+ and does **not** search for similar or related projects and data descriptors.
510
+ The given `term_id` is searched according to the search type specified in the parameter `settings`,
511
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
512
+ If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
513
+ If any of the provided ids (`project_id`, `data_descriptor_id` or `term_id`) is not found,
514
+ the function returns an empty list.
515
+
516
+ Behavior based on search type:
517
+ - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and
518
+ collection id in the list.
519
+ - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
520
+ Pydantic term instances and collection ids in the list.
521
+
522
+ :param project_id: A project id
523
+ :type project_id: str
524
+ :param data_descriptor_id: A data descriptor
525
+ :type data_descriptor_id: str
526
+ :param term_id: A term id to be found
527
+ :type term_id: str
528
+ :param settings: The search settings
529
+ :type settings: SearchSettings|None
530
+ :returns: A list of tuple of Pydantic term instances and related collection ids.
531
+ Returns an empty list if no matches are found.
532
+ :rtype: list[tuple[BaseModel, str]]
533
+ """
534
+ result = list()
535
+ if connection:=_get_project_connection(project_id):
536
+ with connection.create_session() as session:
537
+ terms = _find_terms_from_data_descriptor_in_project(data_descriptor_id,
538
+ term_id,
539
+ session,
540
+ settings)
541
+ for pterm in terms:
542
+ collection_id = pterm.collection.id
543
+ term = instantiate_pydantic_term(pterm)
544
+ result.append((term, collection_id))
545
+ return result
546
+
547
+
548
+ def find_terms_from_data_descriptor_in_all_projects(data_descriptor_id: str,
549
+ term_id: str,
550
+ settings: SearchSettings|None = None) \
551
+ -> list[tuple[BaseModel, str]]:
552
+ """
553
+ Finds one or more terms in all projects which are instances of the given data descriptor
554
+ in the universe, based on the specified search settings, in the given collection of a project.
555
+ This function performs an exact match on the `data_descriptor_id`,
556
+ and does **not** search for similar or related data descriptors.
557
+ The given `term_id` is searched according to the search type specified in the parameter `settings`,
558
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
559
+ If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
560
+ If any of the provided ids (`data_descriptor_id` or `term_id`) is not found,
561
+ the function returns an empty list.
562
+
563
+ Behavior based on search type:
564
+ - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance and
565
+ collection id in the list.
566
+ - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
567
+ Pydantic term instances and collection ids in the list.
568
+
569
+ :param data_descriptor_id: A data descriptor
570
+ :type data_descriptor_id: str
571
+ :param term_id: A term id to be found
572
+ :type term_id: str
573
+ :param settings: The search settings
574
+ :type settings: SearchSettings|None
575
+ :returns: A list of tuple of Pydantic term instances and related collection ids.
576
+ Returns an empty list if no matches are found.
577
+ :rtype: list[tuple[BaseModel, str]]
578
+ """
579
+ project_ids = get_all_projects()
580
+ result = list()
581
+ for project_id in project_ids:
582
+ result.extend(find_terms_from_data_descriptor_in_project(project_id,
583
+ data_descriptor_id,
584
+ term_id,
585
+ settings))
586
+ return result
587
+
588
+
589
+ def _find_terms_in_project(term_id: str,
590
+ session: Session,
591
+ settings: SearchSettings|None) -> Sequence[PTerm]:
592
+ where_expression = create_str_comparison_expression(field=PTerm.id,
593
+ value=term_id,
594
+ settings=settings)
595
+ statement = select(PTerm).where(where_expression)
596
+ results = session.exec(statement).all()
597
+ return results
598
+
599
+
600
+ def find_terms_in_all_projects(term_id: str,
601
+ settings: SearchSettings|None = None) \
602
+ -> list[BaseModel]:
603
+ """
604
+ Finds one or more terms, based on the specified search settings, in all projects.
605
+ The given `term_id` is searched according to the search type specified in the parameter `settings`,
606
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
607
+ If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
608
+ Terms are unique within a collection but may have some synonyms within a project.
609
+ If the provided `term_id` is not found, the function returns an empty list.
610
+
611
+ :param term_id: A term id to be found
612
+ :type term_id: str
613
+ :param settings: The search settings
614
+ :type settings: SearchSettings|None
615
+ :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
616
+ :rtype: list[BaseModel]
617
+ """
618
+ project_ids = get_all_projects()
619
+ result = list()
620
+ for project_id in project_ids:
621
+ result.extend(find_terms_in_project(project_id, term_id, settings))
622
+ return result
623
+
624
+
625
+ def find_terms_in_project(project_id: str,
626
+ term_id: str,
627
+ settings: SearchSettings|None = None) \
628
+ -> list[BaseModel]:
629
+ """
630
+ Finds one or more terms, based on the specified search settings, in a project.
631
+ This function performs an exact match on the `project_id` and
632
+ does **not** search for similar or related projects.
633
+ The given `term_id` is searched according to the search type specified in the parameter `settings`,
634
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
635
+ If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
636
+ Terms are unique within a collection but may have some synonyms within a project.
637
+ If any of the provided ids (`project_id` or `term_id`) is not found, the function returns
638
+ an empty list.
639
+
640
+ :param project_id: A project id
641
+ :type project_id: str
642
+ :param term_id: A term id to be found
643
+ :type term_id: str
644
+ :param settings: The search settings
645
+ :type settings: SearchSettings|None
646
+ :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
647
+ :rtype: list[BaseModel]
648
+ """
649
+ result: list[BaseModel] = list()
650
+ if connection:=_get_project_connection(project_id):
651
+ with connection.create_session() as session:
652
+ terms = _find_terms_in_project(term_id, session, settings)
653
+ instantiate_pydantic_terms(terms, result)
654
+ return result
655
+
656
+
657
+ def get_all_terms_in_collection(project_id: str,
658
+ collection_id: str)\
659
+ -> list[BaseModel]:
660
+ """
661
+ Gets all terms of the given collection of a project.
662
+ This function performs an exact match on the `project_id` and `collection_id`,
663
+ and does **not** search for similar or related projects and collections.
664
+ If any of the provided ids (`project_id` or `collection_id`) is not found, the function
665
+ returns an empty list.
666
+
667
+ :param project_id: A project id
668
+ :type project_id: str
669
+ :param collection_id: A collection id
670
+ :type collection_id: str
671
+ :returns: a list of Pydantic term instances.
672
+ Returns an empty list if no matches are found.
673
+ :rtype: list[BaseModel]
674
+ """
675
+ result = list()
676
+ if connection:=_get_project_connection(project_id):
677
+ with connection.create_session() as session:
678
+ collections = _find_collections_in_project(collection_id,
679
+ session,
680
+ None)
681
+ if collections:
682
+ collection = collections[0]
683
+ result = _get_all_terms_in_collection(collection)
684
+ return result
685
+
686
+
687
+ def _find_collections_in_project(collection_id: str,
688
+ session: Session,
689
+ settings: SearchSettings|None) \
690
+ -> Sequence[Collection]:
691
+ where_exp = create_str_comparison_expression(field=Collection.id,
692
+ value=collection_id,
693
+ settings=settings)
694
+ statement = select(Collection).where(where_exp)
695
+ results = session.exec(statement)
696
+ result = results.all()
697
+ return result
698
+
699
+
700
+ def find_collections_in_project(project_id: str,
701
+ collection_id: str,
702
+ settings: SearchSettings|None = None) \
703
+ -> list[dict]:
704
+ """
705
+ Finds one or more collections of the given project.
706
+ This function performs an exact match on the `project_id` and
707
+ does **not** search for similar or related projects.
708
+ The given `collection_id` is searched according to the search type specified in
709
+ the parameter `settings`,
710
+ which allows a flexible matching (e.g., `LIKE` may return multiple results).
711
+ If the parameter `settings` is `None`, this function performs an exact match on the `collection_id`.
712
+ If any of the provided ids (`project_id` or `collection_id`) is not found, the function returns
713
+ an empty list.
714
+
715
+ Behavior based on search type:
716
+ - `EXACT` and absence of `settings`: returns zero or one collection context in the list.
717
+ - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more
718
+ collection contexts in the list.
719
+
720
+ :param project_id: A project id
721
+ :type project_id: str
722
+ :param collection_id: A collection id to be found
723
+ :type collection_id: str
724
+ :param settings: The search settings
725
+ :type settings: SearchSettings|None
726
+ :returns: A list of collection contexts.
727
+ Returns an empty list if no matches are found.
728
+ :rtype: list[dict]
729
+ """
730
+ result = list()
731
+ if connection:=_get_project_connection(project_id):
732
+ with connection.create_session() as session:
733
+ collections = _find_collections_in_project(collection_id,
734
+ session,
735
+ settings)
736
+ for collection in collections:
737
+ result.append(collection.context)
738
+ return result
739
+
740
+
741
+ def _get_all_collections_in_project(session: Session) -> list[Collection]:
742
+ project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
743
+ # Project can't be missing if session exists.
744
+ return project.collections # type: ignore
745
+
746
+
747
+ def get_all_collections_in_project(project_id: str) -> list[str]:
748
+ """
749
+ Gets all collections of the given project.
750
+ This function performs an exact match on the `project_id` and
751
+ does **not** search for similar or related projects.
752
+ If the provided `project_id` is not found, the function returns an empty list.
753
+
754
+ :param project_id: A project id
755
+ :type project_id: str
756
+ :returns: A list of collection ids.
757
+ Returns an empty list if no matches are found.
758
+ :rtype: list[str]
759
+ """
760
+ result = list()
761
+ if connection:=_get_project_connection(project_id):
762
+ with connection.create_session() as session:
763
+ collections = _get_all_collections_in_project(session)
764
+ for collection in collections:
765
+ result.append(collection.id)
766
+ return result
767
+
768
+
769
+ def _get_all_terms_in_collection(collection: Collection) -> list[BaseModel]:
770
+ result: list[BaseModel] = list()
771
+ instantiate_pydantic_terms(collection.terms, result)
772
+ return result
773
+
774
+
775
+ def get_all_terms_in_project(project_id: str) -> list[BaseModel]:
776
+ """
777
+ Gets all terms of the given project.
778
+ This function performs an exact match on the `project_id` and
779
+ does **not** search for similar or related projects.
780
+ Terms are unique within a collection but may have some synonyms in a project.
781
+ If the provided `project_id` is not found, the function returns an empty list.
782
+
783
+ :param project_id: A project id
784
+ :type project_id: str
785
+ :returns: A list of Pydantic term instances.
786
+ Returns an empty list if no matches are found.
787
+ :rtype: list[BaseModel]
788
+ """
789
+ result = list()
790
+ if connection:=_get_project_connection(project_id):
791
+ with connection.create_session() as session:
792
+ collections = _get_all_collections_in_project(session)
793
+ for collection in collections:
794
+ # Term may have some synonyms in a project.
795
+ result.extend(_get_all_terms_in_collection(collection))
796
+ return result
797
+
798
+
799
+ def get_all_terms_in_all_projects() -> list[BaseModel]:
800
+ """
801
+ Gets all terms of all projects.
802
+
803
+ :returns: A list of Pydantic term instances.
804
+ :rtype: list[BaseModel]
805
+ """
806
+ project_ids = get_all_projects()
807
+ result = list()
808
+ for project_id in project_ids:
809
+ result.extend(get_all_terms_in_project(project_id))
810
+ return result
811
+
812
+
813
+ def find_project(project_id: str) -> dict|None:
814
+ """
815
+ Finds a project.
816
+ This function performs an exact match on the `project_id` and
817
+ does **not** search for similar or related projects.
818
+ If the provided `project_id` is not found, the function returns `None`.
819
+
820
+ :param project_id: A project id to be found
821
+ :type project_id: str
822
+ :returns: The specs of the project found.
823
+ Returns `None` if no matches are found.
824
+ :rtype: dict|None
825
+ """
826
+ result = None
827
+ if connection:=_get_project_connection(project_id):
828
+ with connection.create_session() as session:
829
+ project = session.get(Project, esgvoc.core.constants.SQLITE_FIRST_PK)
830
+ # Project can't be missing if session exists.
831
+ result = project.specs # type: ignore
832
+ return result
833
+
834
+
835
+ def get_all_projects() -> list[str]:
836
+ """
837
+ Gets all projects.
838
+
839
+ :returns: A list of project ids.
840
+ :rtype: list[str]
841
+ """
842
+ return list(service.state_service.projects.keys())
843
+
844
+
845
+ if __name__ == "__main__":
846
+ vr = valid_term('r1i1p1f111', 'cmip6plus', 'member_id', 'ripf')
847
+ if vr:
848
+ print('OK')
849
+ else:
850
+ print(vr)
851
+ from esgvoc.api import BasicValidationErrorVisitor
852
+ visitor = BasicValidationErrorVisitor()
853
+ for error in vr.errors:
854
+ print(error.accept(visitor))