esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +96 -72
  3. esgvoc/api/data_descriptors/__init__.py +18 -12
  4. esgvoc/api/data_descriptors/activity.py +8 -45
  5. esgvoc/api/data_descriptors/area_label.py +6 -0
  6. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  8. esgvoc/api/data_descriptors/consortium.py +16 -56
  9. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  10. esgvoc/api/data_descriptors/date.py +3 -46
  11. esgvoc/api/data_descriptors/directory_date.py +3 -46
  12. esgvoc/api/data_descriptors/experiment.py +19 -54
  13. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  14. esgvoc/api/data_descriptors/frequency.py +6 -43
  15. esgvoc/api/data_descriptors/grid_label.py +6 -44
  16. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  17. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  18. esgvoc/api/data_descriptors/institution.py +11 -54
  19. esgvoc/api/data_descriptors/license.py +4 -44
  20. esgvoc/api/data_descriptors/mip_era.py +6 -44
  21. esgvoc/api/data_descriptors/model_component.py +7 -45
  22. esgvoc/api/data_descriptors/organisation.py +3 -40
  23. esgvoc/api/data_descriptors/physic_index.py +3 -45
  24. esgvoc/api/data_descriptors/product.py +4 -43
  25. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  26. esgvoc/api/data_descriptors/realm.py +4 -42
  27. esgvoc/api/data_descriptors/resolution.py +6 -44
  28. esgvoc/api/data_descriptors/source.py +18 -53
  29. esgvoc/api/data_descriptors/source_type.py +3 -41
  30. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  31. esgvoc/api/data_descriptors/table.py +6 -48
  32. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  33. esgvoc/api/data_descriptors/time_range.py +3 -27
  34. esgvoc/api/data_descriptors/variable.py +13 -71
  35. esgvoc/api/data_descriptors/variant_label.py +3 -47
  36. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  37. esgvoc/api/project_specs.py +3 -2
  38. esgvoc/api/projects.py +727 -446
  39. esgvoc/api/py.typed +0 -0
  40. esgvoc/api/report.py +29 -16
  41. esgvoc/api/search.py +140 -95
  42. esgvoc/api/universe.py +362 -156
  43. esgvoc/apps/__init__.py +3 -4
  44. esgvoc/apps/drs/constants.py +1 -1
  45. esgvoc/apps/drs/generator.py +185 -198
  46. esgvoc/apps/drs/report.py +272 -136
  47. esgvoc/apps/drs/validator.py +132 -145
  48. esgvoc/apps/py.typed +0 -0
  49. esgvoc/cli/drs.py +32 -21
  50. esgvoc/cli/get.py +35 -31
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +0 -2
  53. esgvoc/cli/status.py +5 -5
  54. esgvoc/cli/valid.py +40 -40
  55. esgvoc/core/constants.py +1 -1
  56. esgvoc/core/db/__init__.py +2 -4
  57. esgvoc/core/db/connection.py +5 -3
  58. esgvoc/core/db/models/project.py +50 -8
  59. esgvoc/core/db/models/universe.py +51 -12
  60. esgvoc/core/db/project_ingestion.py +60 -46
  61. esgvoc/core/db/universe_ingestion.py +58 -29
  62. esgvoc/core/exceptions.py +33 -0
  63. esgvoc/core/logging_handler.py +1 -1
  64. esgvoc/core/repo_fetcher.py +4 -3
  65. esgvoc/core/service/__init__.py +37 -5
  66. esgvoc/core/service/configuration/config_manager.py +188 -0
  67. esgvoc/core/service/configuration/setting.py +88 -0
  68. esgvoc/core/service/state.py +49 -32
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
  70. esgvoc-0.4.0.dist-info/RECORD +80 -0
  71. esgvoc/api/_utils.py +0 -39
  72. esgvoc/cli/config.py +0 -82
  73. esgvoc/core/service/settings.py +0 -73
  74. esgvoc/core/service/settings.toml +0 -17
  75. esgvoc/core/service/settings_default.toml +0 -17
  76. esgvoc-0.2.1.dist-info/RECORD +0 -73
  77. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
  79. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/universe.py CHANGED
@@ -1,217 +1,423 @@
1
- from typing import Sequence
2
-
3
- from esgvoc.api._utils import (get_universe_session,
4
- instantiate_pydantic_terms)
5
- from esgvoc.api.search import SearchSettings, _create_str_comparison_expression
6
- from esgvoc.core.db.models.universe import DataDescriptor, UTerm
7
- from pydantic import BaseModel
8
- from sqlmodel import Session, select
9
-
10
-
11
- def _find_terms_in_data_descriptor(data_descriptor_id: str,
12
- term_id: str,
13
- session: Session,
14
- settings: SearchSettings|None) -> Sequence[UTerm]:
15
- """Settings only apply on the term_id comparison."""
16
- where_expression = _create_str_comparison_expression(field=UTerm.id,
17
- value=term_id,
18
- settings=settings)
19
- statement = select(UTerm).join(DataDescriptor).where(DataDescriptor.id==data_descriptor_id,
20
- where_expression)
21
- results = session.exec(statement)
22
- result = results.all()
1
+ from typing import Iterable, Sequence
2
+
3
+ from sqlalchemy import text
4
+ from sqlmodel import Session, col, select
5
+
6
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
7
+ from esgvoc.api.search import (
8
+ Item,
9
+ execute_find_item_statements,
10
+ execute_match_statement,
11
+ generate_matching_condition,
12
+ get_universe_session,
13
+ handle_rank_limit_offset,
14
+ instantiate_pydantic_term,
15
+ instantiate_pydantic_terms,
16
+ )
17
+ from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
18
+
19
+
20
+ def _get_all_terms_in_data_descriptor(data_descriptor: UDataDescriptor,
21
+ selected_term_fields: Iterable[str] | None) -> list[DataDescriptor]:
22
+ result: list[DataDescriptor] = list()
23
+ instantiate_pydantic_terms(data_descriptor.terms, result, selected_term_fields)
23
24
  return result
24
25
 
25
26
 
26
- def find_terms_in_data_descriptor(data_descriptor_id: str,
27
- term_id: str,
28
- settings: SearchSettings|None = None) \
29
- -> list[BaseModel]:
27
+ def get_all_terms_in_data_descriptor(data_descriptor_id: str,
28
+ selected_term_fields: Iterable[str] | None = None) \
29
+ -> list[DataDescriptor]:
30
30
  """
31
- Finds one or more terms in the given data descriptor based on the specified search settings.
32
- This function performs an exact match on the `data_descriptor_id` and
33
- does **not** search for similar or related descriptors.
34
- The given `term_id` is searched according to the search type specified in
35
- the parameter `settings`,
36
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
37
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
38
- If any of the provided ids (`data_descriptor_id` or `term_id`) is not found, the function
39
- returns an empty list.
40
-
41
- Behavior based on search type:
42
- - `EXACT` and absence of `settings`: returns zero or one Pydantic term instance in the list.
43
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more Pydantic term \
44
- instances in the list.
31
+ Gets all the terms of the given data descriptor.
32
+ This function performs an exact match on the `data_descriptor_id` and does not search
33
+ for similar or related descriptors.
34
+ If the provided `data_descriptor_id` is not found, the function returns an empty list.
45
35
 
46
36
  :param data_descriptor_id: A data descriptor id
47
37
  :type data_descriptor_id: str
48
- :param term_id: A term id to be found
49
- :type term_id: str
50
- :param settings: The search settings
51
- :type settings: SearchSettings|None
52
- :returns: A list of Pydantic model term instances. Returns an empty list if no matches are found.
53
- :rtype: list[BaseModel]
38
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
39
+ fields of the terms are returned. If empty, selects the id and type fields.
40
+ :type selected_term_fields: Iterable[str] | None
41
+ :returns: a list of term instances. Returns an empty list if no matches are found.
42
+ :rtype: list[DataDescriptor]
54
43
  """
55
- result: list[BaseModel] = list()
56
44
  with get_universe_session() as session:
57
- terms = _find_terms_in_data_descriptor(data_descriptor_id, term_id, session, settings)
58
- instantiate_pydantic_terms(terms, result)
45
+ data_descriptor = _get_data_descriptor_in_universe(data_descriptor_id, session)
46
+ if data_descriptor:
47
+ result = _get_all_terms_in_data_descriptor(data_descriptor, selected_term_fields)
48
+ else:
49
+ result = list()
59
50
  return result
60
51
 
61
52
 
62
- def _find_terms_in_universe(term_id: str,
63
- session: Session,
64
- settings: SearchSettings|None) -> Sequence[UTerm]:
65
- where_expression = _create_str_comparison_expression(field=UTerm.id,
66
- value=term_id,
67
- settings=settings)
68
- statement = select(UTerm).where(where_expression)
69
- results = session.exec(statement).all()
70
- return results
53
+ def _get_all_data_descriptors_in_universe(session: Session) -> Sequence[UDataDescriptor]:
54
+ statement = select(UDataDescriptor)
55
+ data_descriptors = session.exec(statement)
56
+ result = data_descriptors.all()
57
+ return result
71
58
 
72
59
 
73
- def find_terms_in_universe(term_id: str,
74
- settings: SearchSettings|None = None) \
75
- -> list[BaseModel]:
60
+ def get_all_data_descriptors_in_universe() -> list[str]:
76
61
  """
77
- Finds one or more terms of the universe.
78
- The given `term_id` is searched according to the search type specified in
79
- the parameter `settings`,
80
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
81
- If the parameter `settings` is `None`, this function performs an exact match on the `term_id`.
82
- Terms are unique within a data descriptor but may have some synonyms in the universe.
83
- If the provided `term_id` is not found, the function returns an empty list.
62
+ Gets all the data descriptors of the universe.
84
63
 
85
- :param term_id: A term id to be found
86
- :type term_id: str
87
- :param settings: The search settings
88
- :type settings: SearchSettings|None
89
- :returns: A list of Pydantic term instances. Returns an empty list if no matches are found.
90
- :rtype: list[BaseModel]
64
+ :returns: A list of data descriptor ids.
65
+ :rtype: list[str]
91
66
  """
92
- result: list[BaseModel] = list()
67
+ result = list()
93
68
  with get_universe_session() as session:
94
- terms = _find_terms_in_universe(term_id, session, settings)
95
- instantiate_pydantic_terms(terms, result)
69
+ data_descriptors = _get_all_data_descriptors_in_universe(session)
70
+ for data_descriptor in data_descriptors:
71
+ result.append(data_descriptor.id)
96
72
  return result
97
73
 
98
74
 
99
- def _get_all_terms_in_data_descriptor(data_descriptor: DataDescriptor) -> list[BaseModel]:
100
- result: list[BaseModel] = list()
101
- instantiate_pydantic_terms(data_descriptor.terms, result)
75
+ def get_all_terms_in_universe(selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
76
+ """
77
+ Gets all the terms of the universe.
78
+ Terms are unique within a data descriptor but may have some synonyms in the universe.
79
+
80
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
81
+ fields of the terms are returned. If empty, selects the id and type fields.
82
+ :type selected_term_fields: Iterable[str] | None
83
+ :returns: A list of term instances.
84
+ :rtype: list[DataDescriptor]
85
+ """
86
+ result = list()
87
+ with get_universe_session() as session:
88
+ data_descriptors = _get_all_data_descriptors_in_universe(session)
89
+ for data_descriptor in data_descriptors:
90
+ # Term may have some synonyms within the whole universe.
91
+ terms = _get_all_terms_in_data_descriptor(data_descriptor, selected_term_fields)
92
+ result.extend(terms)
102
93
  return result
103
94
 
104
95
 
105
- def _find_data_descriptors_in_universe(data_descriptor_id: str,
106
- session: Session,
107
- settings: SearchSettings|None) -> Sequence[DataDescriptor]:
108
- where_expression = _create_str_comparison_expression(field=DataDescriptor.id,
109
- value=data_descriptor_id,
110
- settings=settings)
111
- statement = select(DataDescriptor).where(where_expression)
96
+ def _get_term_in_data_descriptor(data_descriptor_id: str, term_id: str, session: Session) \
97
+ -> UTerm | None:
98
+ statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id,
99
+ UTerm.id == term_id)
112
100
  results = session.exec(statement)
113
- result = results.all()
101
+ result = results.one_or_none()
114
102
  return result
115
103
 
116
104
 
117
- def get_all_terms_in_data_descriptor(data_descriptor_id: str) \
118
- -> list[BaseModel]:
105
+ def get_term_in_data_descriptor(data_descriptor_id: str,
106
+ term_id: str,
107
+ selected_term_fields: Iterable[str] | None = None) \
108
+ -> DataDescriptor | None:
119
109
  """
120
- Gets all the terms of the given data descriptor.
121
- This function performs an exact match on the `data_descriptor_id` and does **not** search
122
- for similar or related descriptors.
123
- If the provided `data_descriptor_id` is not found, the function returns an empty list.
110
+ Returns the term, in the given data descriptor, whose id corresponds exactly to the given term id.
111
+ This function performs an exact match on the `term_id` and the `data_descriptor_id` and does
112
+ not search for similar or related terms and data descriptors.
113
+ If the provided `term_id` is not found, the function returns `None`.
124
114
 
125
- :param data_descriptor_id: A data descriptor id
115
+ :param data_descriptor_id: The id of the given data descriptor.
126
116
  :type data_descriptor_id: str
127
- :returns: a list of Pydantic term instances. Returns an empty list if no matches are found.
128
- :rtype: list[BaseModel]
117
+ :param term_id: The id of a term to be found.
118
+ :type term_id: str
119
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
120
+ fields of the terms are returned. If empty, selects the id and type fields.
121
+ :type selected_term_fields: Iterable[str] | None
122
+ :returns: A term instance. Returns `None` if no match is found.
123
+ :rtype: DataDescriptor | None
129
124
  """
130
125
  with get_universe_session() as session:
131
- data_descriptors = _find_data_descriptors_in_universe(data_descriptor_id,
132
- session,
133
- None)
134
- if data_descriptors:
135
- data_descriptor = data_descriptors[0]
136
- result = _get_all_terms_in_data_descriptor(data_descriptor)
126
+ term_found = _get_term_in_data_descriptor(data_descriptor_id, term_id, session)
127
+ if term_found:
128
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
137
129
  else:
138
- result = list()
130
+ result = None
131
+ return result
132
+
133
+
134
+ def _get_term_in_universe(term_id: str, session: Session) -> UTerm | None:
135
+ statement = select(UTerm).where(UTerm.id == term_id)
136
+ results = session.exec(statement)
137
+ result = results.first() # Term ids are not supposed to be unique within the universe.
139
138
  return result
140
139
 
141
140
 
142
- def find_data_descriptors_in_universe(data_descriptor_id: str,
143
- settings: SearchSettings|None = None) \
144
- -> list[dict]:
141
+ def get_term_in_universe(term_id: str,
142
+ selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
145
143
  """
146
- Finds one or more data descriptor of the universe, based on the specified search settings.
147
- The given `data_descriptor_id` is searched according to the search type specified in
148
- the parameter `settings`,
149
- which allows a flexible matching (e.g., `LIKE` may return multiple results).
150
- If the parameter `settings` is `None`, this function performs an exact match on
151
- the `data_descriptor_id`.
152
- If the provided `data_descriptor_id` is not found, the function returns an empty list.
153
-
154
- Behavior based on search type:
155
- - `EXACT` and absence of `settings`: returns zero or one data descriptor context in the list.
156
- - `REGEX`, `LIKE`, `STARTS_WITH` and `ENDS_WITH`: returns zero, one or more \
157
- data descriptor contexts in the list.
144
+ Returns the first occurrence of the terms, in the universe, whose id corresponds exactly to
145
+ the given term id.
146
+ Terms are unique within a data descriptor but may have some synonyms in the universe.
147
+ This function performs an exact match on the `term_id` and does not search
148
+ for similar or related terms. If the provided `term_id` is not found, the function returns `None`.
158
149
 
159
- :param data_descriptor_id: A data descriptor id to be found
160
- :type data_descriptor_id: str
161
- :param settings: The search settings
162
- :type settings: SearchSettings|None
163
- :returns: A list of data descriptor contexts. Returns an empty list if no matches are found.
164
- :rtype: list[dict]
150
+ :param term_id: The id of a term to be found.
151
+ :type term_id: str
152
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
153
+ fields of the terms are returned. If empty, selects the id and type fields.
154
+ :type selected_term_fields: Iterable[str] | None
155
+ :returns: A term instance. Returns `None` if no match is found.
156
+ :rtype: DataDescriptor | None
165
157
  """
166
- result = list()
167
158
  with get_universe_session() as session:
168
- data_descriptors = _find_data_descriptors_in_universe(data_descriptor_id,
169
- session,
170
- settings)
171
- for data_descriptor in data_descriptors:
172
- result.append(data_descriptor.context)
159
+ term_found = _get_term_in_universe(term_id, session)
160
+ if term_found:
161
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
162
+ else:
163
+ result = None
173
164
  return result
174
165
 
175
166
 
176
- def _get_all_data_descriptors_in_universe(session: Session) -> Sequence[DataDescriptor]:
177
- statement = select(DataDescriptor)
178
- data_descriptors = session.exec(statement)
179
- result = data_descriptors.all()
167
+ def _get_data_descriptor_in_universe(data_descriptor_id: str, session: Session) -> UDataDescriptor | None:
168
+ statement = select(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id)
169
+ results = session.exec(statement)
170
+ result = results.one_or_none()
180
171
  return result
181
172
 
182
173
 
183
- def get_all_data_descriptors_in_universe() -> list[str]:
174
+ def get_data_descriptor_in_universe(data_descriptor_id: str) -> tuple[str, dict] | None:
184
175
  """
185
- Gets all the data descriptors of the universe.
176
+ Returns the id and the context of the data descriptor, in the universe whose, id corresponds
177
+ exactly to the given data descriptor id.
178
+ This function performs an exact match on the `data_descriptor_id` and does not
179
+ search for similar or related data descriptors.
180
+ If the provided `data_descriptor_id` is not found, the function returns `None`.
186
181
 
187
- :returns: A list of data descriptor ids.
188
- :rtype: list[str]
182
+ :param data_descriptor_id: An id of a data descriptor to be found.
183
+ :type data_descriptor_id: str
184
+ :returns: The data descriptor id and context. Returns `None` if no match is found.
185
+ :rtype: tuple[str, dict] | None
189
186
  """
190
- result = list()
191
187
  with get_universe_session() as session:
192
- data_descriptors = _get_all_data_descriptors_in_universe(session)
193
- for data_descriptor in data_descriptors:
194
- result.append(data_descriptor.id)
188
+ data_descriptor_found = _get_data_descriptor_in_universe(data_descriptor_id, session)
189
+ if data_descriptor_found:
190
+ result = data_descriptor_found.id, data_descriptor_found.context
191
+ else:
192
+ result = None
195
193
  return result
196
194
 
197
195
 
198
- def get_all_terms_in_universe() -> list[BaseModel]:
196
+ def _find_data_descriptors_in_universe(expression: str,
197
+ session: Session,
198
+ only_id: bool = False,
199
+ limit: int | None = None,
200
+ offset: int | None = None) -> Sequence[UDataDescriptor]:
201
+ matching_condition = generate_matching_condition(UDataDescriptorFTS5, expression, only_id)
202
+ tmp_statement = select(UDataDescriptorFTS5).where(matching_condition)
203
+ statement = select(UDataDescriptor).from_statement(handle_rank_limit_offset(tmp_statement,
204
+ limit, offset))
205
+ return execute_match_statement(expression, statement, session)
206
+
207
+
208
+ def find_data_descriptors_in_universe(expression: str,
209
+ only_id: bool = False,
210
+ limit: int | None = None,
211
+ offset: int | None = None) -> list[tuple[str, dict]]:
199
212
  """
200
- Gets all the terms of the universe.
201
- Terms are unique within a data descriptor but may have some synonyms in the universe.
213
+ Find data descriptors in the universe based on a full text search defined by the given `expression`.
214
+ The `expression` comes from the powerful
215
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
216
+ and corresponds to the expression of the `MATCH` operator.
217
+ It can be composed of one or multiple keywords combined with boolean
218
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
219
+ with the wildcard `*`.
220
+ The function returns a list of data descriptor ids and contexts, sorted according to the
221
+ bm25 ranking metric (list index `0` has the highest rank).
222
+ If the provided `expression` does not hit any data descriptor, the function returns an empty list.
223
+ The function searches for the `expression` in the data descriptor specifications.
224
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
225
+ data descriptors. **At the moment, `only_id` is set to `True` as the data descriptors
226
+ haven't got any description.**
227
+
228
+ :param expression: The full text search expression.
229
+ :type expression: str
230
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
231
+ :type only_id: bool
232
+ :param limit: Limit the number of returned items found. Returns all items found the if \
233
+ `limit` is either `None`, zero or negative.
234
+ :type limit: int | None
235
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
236
+ either `None`, zero or negative.
237
+ :type offset: int | None
238
+ :returns: A list of data descriptor ids and contexts. Returns an empty list if no matches are found.
239
+ :rtype: list[tuple[str, dict]]
240
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
241
+ """
242
+ result: list[tuple[str, dict]] = list()
243
+ with get_universe_session() as session:
244
+ data_descriptors_found = _find_data_descriptors_in_universe(expression, session, only_id,
245
+ limit, offset)
246
+ if data_descriptors_found:
247
+ for data_descriptor_found in data_descriptors_found:
248
+ result.append((data_descriptor_found.id, data_descriptor_found.context))
249
+ return result
250
+
202
251
 
203
- :returns: A list of Pydantic term instances.
204
- :rtype: list[BaseModel]
252
+ def _find_terms_in_universe(expression: str, session: Session,
253
+ only_id: bool = False,
254
+ limit: int | None = None,
255
+ offset: int | None = None) -> Sequence[UTerm]:
256
+ matching_condition = generate_matching_condition(UTermFTS5, expression, only_id)
257
+ tmp_statement = select(UTermFTS5).where(matching_condition)
258
+ statement = select(UTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
259
+ return execute_match_statement(expression, statement, session)
260
+
261
+
262
+ def find_terms_in_universe(expression: str,
263
+ only_id: bool = False,
264
+ limit: int | None = None,
265
+ offset: int | None = None,
266
+ selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
205
267
  """
206
- result = list()
268
+ Find terms in the universe based on a full-text search defined by the given `expression`.
269
+ The `expression` comes from the powerful
270
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
271
+ and corresponds to the expression of the `MATCH` operator.
272
+ It can be composed of one or multiple keywords combined with boolean
273
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
274
+ with the wildcard `*`.
275
+ The function returns a list of term instances sorted according to the
276
+ bm25 ranking metric (list index `0` has the highest rank).
277
+ If the provided `expression` does not hit any term, the function returns an empty list.
278
+ The function searches for the `expression` in the term specifications.
279
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the terms.
280
+
281
+ :param expression: The full text search expression.
282
+ :type expression: str
283
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
284
+ :type only_id: bool
285
+ :param limit: Limit the number of returned items found. Returns all items found the if \
286
+ `limit` is either `None`, zero or negative.
287
+ :type limit: int | None
288
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
289
+ either `None`, zero or negative.
290
+ :type offset: int | None
291
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
292
+ fields of the terms are returned. If empty, selects the id and type fields.
293
+ :type selected_term_fields: Iterable[str] | None
294
+ :returns: A list of term instances. Returns an empty list if no matches are found.
295
+ :rtype: list[DataDescriptor]
296
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
297
+ """
298
+ result: list[DataDescriptor] = list()
207
299
  with get_universe_session() as session:
208
- data_descriptors = _get_all_data_descriptors_in_universe(session)
209
- for data_descriptor in data_descriptors:
210
- # Term may have some synonyms within the whole universe.
211
- terms = _get_all_terms_in_data_descriptor(data_descriptor)
212
- result.extend(terms)
300
+ uterms_found = _find_terms_in_universe(expression, session, only_id, limit, offset)
301
+ if uterms_found:
302
+ instantiate_pydantic_terms(uterms_found, result, selected_term_fields)
213
303
  return result
214
304
 
215
305
 
216
- if __name__ == "__main__":
217
- print(find_terms_in_data_descriptor('institution', 'ipsl'))
306
+ def _find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
307
+ session: Session,
308
+ only_id: bool = False,
309
+ limit: int | None = None,
310
+ offset: int | None = None) -> Sequence[UTerm]:
311
+ matching_condition = generate_matching_condition(UTermFTS5, expression, only_id)
312
+ where_condition = UDataDescriptor.id == data_descriptor_id, matching_condition
313
+ tmp_statement = select(UTermFTS5).join(UDataDescriptor).where(*where_condition)
314
+ statement = select(UTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
315
+ return execute_match_statement(expression, statement, session)
316
+
317
+
318
+ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
319
+ only_id: bool = False,
320
+ limit: int | None = None,
321
+ offset: int | None = None,
322
+ selected_term_fields: Iterable[str] | None = None) \
323
+ -> list[DataDescriptor]:
324
+ """
325
+ Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
326
+ The `expression` comes from the powerful
327
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
328
+ and corresponds to the expression of the `MATCH` operator.
329
+ It can be composed of one or multiple keywords combined with boolean
330
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
331
+ with the wildcard `*`.
332
+ The function returns a list of term instances sorted according to the
333
+ bm25 ranking metric (list index `0` has the highest rank).
334
+ This function performs an exact match on the `data_descriptor_id`,
335
+ and does not search for similar or related data descriptor.
336
+ If the provided `expression` does not hit any term or the given `data_descriptor_id` does not
337
+ match exactly to an id of a data descriptor, the function returns an empty list.
338
+ The function searches for the `expression` in the term specifications.
339
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the terms.
340
+
341
+ :param expression: The full text search expression.
342
+ :type expression: str
343
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
344
+ :type only_id: bool
345
+ :param limit: Limit the number of returned items found. Returns all items found the if \
346
+ `limit` is either `None`, zero or negative.
347
+ :type limit: int | None
348
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
349
+ either `None`, zero or negative.
350
+ :type offset: int | None
351
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
352
+ fields of the terms are returned. If empty, selects the id and type fields.
353
+ :type selected_term_fields: Iterable[str] | None
354
+ :returns: A list of term instances. Returns an empty list if no matches are found.
355
+ :rtype: list[DataDescriptor]
356
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
357
+ """
358
+ result: list[DataDescriptor] = list()
359
+ with get_universe_session() as session:
360
+ uterms_found = _find_terms_in_data_descriptor(expression, data_descriptor_id,
361
+ session, only_id, limit, offset)
362
+ if uterms_found:
363
+ instantiate_pydantic_terms(uterms_found, result, selected_term_fields)
364
+ return result
365
+
366
+
367
+ def find_items_in_universe(expression: str,
368
+ only_id: bool = False,
369
+ limit: int | None = None,
370
+ offset: int | None = None) -> list[Item]:
371
+ """
372
+ Find items, at the moment terms and data descriptors, in the universe based on a full-text
373
+ search defined by the given `expression`. The `expression` comes from the powerful
374
+ `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
375
+ and corresponds to the expression of the `MATCH` operator.
376
+ It can be composed of one or multiple keywords combined with boolean
377
+ operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
378
+ with the wildcard `*`.
379
+ The function returns a list of item instances sorted according to the
380
+ bm25 ranking metric (list index `0` has the highest rank).
381
+ If the provided `expression` does not hit any item, the function returns an empty list.
382
+ The function searches for the `expression` in the term and data descriptor specifications.
383
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
384
+ terms and data descriptors. **At the moment, `only_id` is set to `True` for the data descriptors
385
+ because they haven't got any description.**
386
+
387
+ :param expression: The full text search expression.
388
+ :type expression: str
389
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
390
+ :type only_id: bool
391
+ :param limit: Limit the number of returned items found. Returns all items found the if \
392
+ `limit` is either `None`, zero or negative.
393
+ :type limit: int | None
394
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
395
+ either `None`, zero or negative.
396
+ :type offset: int | None
397
+ :returns: A list of item instances. Returns an empty list if no matches are found.
398
+ :rtype: list[Item]
399
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
400
+ """
401
+ # TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
402
+ result = list()
403
+ with get_universe_session() as session:
404
+ if only_id:
405
+ dd_column = col(UDataDescriptorFTS5.id)
406
+ term_column = col(UTermFTS5.id)
407
+ else:
408
+ dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
409
+ term_column = col(UTermFTS5.specs) # type: ignore
410
+ dd_where_condition = dd_column.match(expression)
411
+ dd_statement = select(UDataDescriptorFTS5.id,
412
+ text("'data_descriptor' AS TYPE"),
413
+ text("'universe' AS TYPE"),
414
+ text('rank')).where(dd_where_condition)
415
+ term_where_condition = term_column.match(expression)
416
+ term_statement = select(UTermFTS5.id,
417
+ text("'term' AS TYPE"),
418
+ UDataDescriptor.id,
419
+ text('rank')).join(UDataDescriptor) \
420
+ .where(term_where_condition)
421
+ result = execute_find_item_statements(session, expression, dd_statement,
422
+ term_statement, limit, offset)
423
+ return result
esgvoc/apps/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
1
 
2
- from esgvoc.apps.drs.validator import DrsValidator
3
- from esgvoc.apps.drs.report import DrsValidationReport
4
2
  from esgvoc.apps.drs.generator import DrsGenerator
5
- from esgvoc.apps.drs.report import DrsGeneratorReport
3
+ from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
4
+ from esgvoc.apps.drs.validator import DrsValidator
6
5
 
7
- __all__ = ["DrsValidator", "DrsValidationReport", "DrsGenerator", "DrsGeneratorReport"]
6
+ __all__ = ["DrsValidator", "DrsValidationReport", "DrsGenerator", "DrsGenerationReport"]
@@ -1,2 +1,2 @@
1
1
  FILE_NAME_EXTENSION_KEY = 'extension'
2
- FILE_NAME_EXTENSION_SEPARATOR_KEY = 'extension_separator'
2
+ FILE_NAME_EXTENSION_SEPARATOR_KEY = 'extension_separator'