esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
esgvoc/api/universe.py ADDED
@@ -0,0 +1,434 @@
1
+ from typing import Iterable, Sequence
2
+
3
+ from sqlalchemy import text
4
+ from sqlmodel import Session, col, select
5
+
6
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
7
+ from esgvoc.api.pydantic_handler import instantiate_pydantic_term
8
+ from esgvoc.api.search import (
9
+ Item,
10
+ execute_find_item_statements,
11
+ execute_match_statement,
12
+ generate_matching_condition,
13
+ get_universe_session,
14
+ handle_rank_limit_offset,
15
+ instantiate_pydantic_terms,
16
+ process_expression,
17
+ )
18
+ from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
19
+
20
+
21
+ def _get_all_terms_in_data_descriptor(
22
+ data_descriptor: UDataDescriptor, selected_term_fields: Iterable[str] | None
23
+ ) -> list[DataDescriptor]:
24
+ result: list[DataDescriptor] = list()
25
+ instantiate_pydantic_terms(data_descriptor.terms, result, selected_term_fields)
26
+ return result
27
+
28
+
29
+ def get_all_terms_in_data_descriptor(
30
+ data_descriptor_id: str, selected_term_fields: Iterable[str] | None = None
31
+ ) -> list[DataDescriptor]:
32
+ """
33
+ Gets all the terms of the given data descriptor.
34
+ This function performs an exact match on the `data_descriptor_id` and does not search
35
+ for similar or related descriptors.
36
+ If the provided `data_descriptor_id` is not found, the function returns an empty list.
37
+
38
+ :param data_descriptor_id: A data descriptor id
39
+ :type data_descriptor_id: str
40
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
41
+ fields of the terms are returned. If empty, selects the id and type fields.
42
+ :type selected_term_fields: Iterable[str] | None
43
+ :returns: a list of term instances. Returns an empty list if no matches are found.
44
+ :rtype: list[DataDescriptor]
45
+ """
46
+ with get_universe_session() as session:
47
+ data_descriptor = _get_data_descriptor_in_universe(data_descriptor_id, session)
48
+ if data_descriptor:
49
+ result = _get_all_terms_in_data_descriptor(data_descriptor, selected_term_fields)
50
+ else:
51
+ result = list()
52
+ return result
53
+
54
+
55
+ def _get_all_data_descriptors_in_universe(session: Session) -> Sequence[UDataDescriptor]:
56
+ statement = select(UDataDescriptor)
57
+ data_descriptors = session.exec(statement)
58
+ result = data_descriptors.all()
59
+ return result
60
+
61
+
62
+ def get_all_data_descriptors_in_universe() -> list[str]:
63
+ """
64
+ Gets all the data descriptors of the universe.
65
+
66
+ :returns: A list of data descriptor ids.
67
+ :rtype: list[str]
68
+ """
69
+ result = list()
70
+ with get_universe_session() as session:
71
+ data_descriptors = _get_all_data_descriptors_in_universe(session)
72
+ for data_descriptor in data_descriptors:
73
+ result.append(data_descriptor.id)
74
+ return result
75
+
76
+
77
+ def get_all_terms_in_universe(selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
78
+ """
79
+ Gets all the terms of the universe.
80
+ Terms are unique within a data descriptor but may have some synonyms in the universe.
81
+
82
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
83
+ fields of the terms are returned. If empty, selects the id and type fields.
84
+ :type selected_term_fields: Iterable[str] | None
85
+ :returns: A list of term instances.
86
+ :rtype: list[DataDescriptor]
87
+ """
88
+ result = list()
89
+ with get_universe_session() as session:
90
+ data_descriptors = _get_all_data_descriptors_in_universe(session)
91
+ for data_descriptor in data_descriptors:
92
+ # Term may have some synonyms within the whole universe.
93
+ terms = _get_all_terms_in_data_descriptor(data_descriptor, selected_term_fields)
94
+ result.extend(terms)
95
+ return result
96
+
97
+
98
+ def _get_term_in_data_descriptor(data_descriptor_id: str, term_id: str, session: Session) -> UTerm | None:
99
+ statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
100
+ results = session.exec(statement)
101
+ result = results.one_or_none()
102
+ return result
103
+
104
+
105
+ def get_term_in_data_descriptor(
106
+ data_descriptor_id: str, term_id: str, selected_term_fields: Iterable[str] | None = None
107
+ ) -> DataDescriptor | None:
108
+ """
109
+ Returns the term, in the given data descriptor, whose id corresponds exactly to the given term id.
110
+ This function performs an exact match on the `term_id` and the `data_descriptor_id` and does
111
+ not search for similar or related terms and data descriptors.
112
+ If the provided `term_id` is not found, the function returns `None`.
113
+
114
+ :param data_descriptor_id: The id of the given data descriptor.
115
+ :type data_descriptor_id: str
116
+ :param term_id: The id of a term to be found.
117
+ :type term_id: str
118
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
119
+ fields of the terms are returned. If empty, selects the id and type fields.
120
+ :type selected_term_fields: Iterable[str] | None
121
+ :returns: A term instance. Returns `None` if no match is found.
122
+ :rtype: DataDescriptor | None
123
+ """
124
+ with get_universe_session() as session:
125
+ term_found = _get_term_in_data_descriptor(data_descriptor_id, term_id, session)
126
+ if term_found:
127
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
128
+ else:
129
+ result = None
130
+ return result
131
+
132
+
133
+ def _get_term_in_universe(term_id: str, session: Session) -> UTerm | None:
134
+ statement = select(UTerm).where(UTerm.id == term_id)
135
+ results = session.exec(statement)
136
+ result = results.first() # Term ids are not supposed to be unique within the universe.
137
+ return result
138
+
139
+
140
+ def get_term_in_universe(term_id: str, selected_term_fields: Iterable[str] | None = None) -> DataDescriptor | None:
141
+ """
142
+ Returns the first occurrence of the terms, in the universe, whose id corresponds exactly to
143
+ the given term id.
144
+ Terms are unique within a data descriptor but may have some synonyms in the universe.
145
+ This function performs an exact match on the `term_id` and does not search
146
+ for similar or related terms. If the provided `term_id` is not found, the function returns `None`.
147
+
148
+ :param term_id: The id of a term to be found.
149
+ :type term_id: str
150
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
151
+ fields of the terms are returned. If empty, selects the id and type fields.
152
+ :type selected_term_fields: Iterable[str] | None
153
+ :returns: A term instance. Returns `None` if no match is found.
154
+ :rtype: DataDescriptor | None
155
+ """
156
+ with get_universe_session() as session:
157
+ term_found = _get_term_in_universe(term_id, session)
158
+ if term_found:
159
+ result = instantiate_pydantic_term(term_found, selected_term_fields)
160
+ else:
161
+ result = None
162
+ return result
163
+
164
+
165
+ def _get_data_descriptor_in_universe(data_descriptor_id: str, session: Session) -> UDataDescriptor | None:
166
+ statement = select(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id)
167
+ results = session.exec(statement)
168
+ result = results.one_or_none()
169
+ return result
170
+
171
+
172
+ def get_data_descriptor_in_universe(data_descriptor_id: str) -> tuple[str, dict] | None:
173
+ """
174
+ Returns the id and the context of the data descriptor, in the universe whose, id corresponds
175
+ exactly to the given data descriptor id.
176
+ This function performs an exact match on the `data_descriptor_id` and does not
177
+ search for similar or related data descriptors.
178
+ If the provided `data_descriptor_id` is not found, the function returns `None`.
179
+
180
+ :param data_descriptor_id: An id of a data descriptor to be found.
181
+ :type data_descriptor_id: str
182
+ :returns: The data descriptor id and context. Returns `None` if no match is found.
183
+ :rtype: tuple[str, dict] | None
184
+ """
185
+ with get_universe_session() as session:
186
+ data_descriptor_found = _get_data_descriptor_in_universe(data_descriptor_id, session)
187
+ if data_descriptor_found:
188
+ result = data_descriptor_found.id, data_descriptor_found.context
189
+ else:
190
+ result = None
191
+ return result
192
+
193
+
194
+ def _find_data_descriptors_in_universe(
195
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
196
+ ) -> Sequence[UDataDescriptor]:
197
+ matching_condition = generate_matching_condition(UDataDescriptorFTS5, expression, only_id)
198
+ tmp_statement = select(UDataDescriptorFTS5).where(matching_condition)
199
+ statement = select(UDataDescriptor).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
200
+ return execute_match_statement(expression, statement, session)
201
+
202
+
203
+ def find_data_descriptors_in_universe(
204
+ expression: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
205
+ ) -> list[tuple[str, dict]]:
206
+ """
207
+ Find data descriptors in the universe based on a full text search defined by the given `expression`.
208
+ The `expression` can be composed of one or multiple keywords.
209
+ The keywords can combined with boolean operators: `AND`,
210
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
211
+ if no boolean operators is provided, whitespaces are handled as if there were
212
+ an implicit AND operator between each pair of keywords. Note that this
213
+ function does not provide any priority operator (parenthesis).
214
+ Keywords can define prefixes when adding a `*` at the end of them.
215
+ If the expression is composed of only one keyword, the function
216
+ automatically defines it as a prefix.
217
+ The function returns a list of data descriptor ids and contexts, sorted according to the
218
+ bm25 ranking metric (list index `0` has the highest rank).
219
+ If the provided `expression` does not hit any data descriptor, the function returns an empty list.
220
+ The function searches for the `expression` in the data descriptor specifications.
221
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
222
+ data descriptors. **At the moment, `only_id` is set to `True` as the data descriptors
223
+ haven't got any description.**
224
+
225
+ :param expression: The full text search expression.
226
+ :type expression: str
227
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
228
+ :type only_id: bool
229
+ :param limit: Limit the number of returned items found. Returns all items found the if \
230
+ `limit` is either `None`, zero or negative.
231
+ :type limit: int | None
232
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
233
+ either `None`, zero or negative.
234
+ :type offset: int | None
235
+ :returns: A list of data descriptor ids and contexts. Returns an empty list if no matches are found.
236
+ :rtype: list[tuple[str, dict]]
237
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
238
+ """
239
+ result: list[tuple[str, dict]] = list()
240
+ with get_universe_session() as session:
241
+ data_descriptors_found = _find_data_descriptors_in_universe(expression, session, only_id, limit, offset)
242
+ if data_descriptors_found:
243
+ for data_descriptor_found in data_descriptors_found:
244
+ result.append((data_descriptor_found.id, data_descriptor_found.context))
245
+ return result
246
+
247
+
248
+ def _find_terms_in_universe(
249
+ expression: str, session: Session, only_id: bool = False, limit: int | None = None, offset: int | None = None
250
+ ) -> Sequence[UTerm]:
251
+ matching_condition = generate_matching_condition(UTermFTS5, expression, only_id)
252
+ tmp_statement = select(UTermFTS5).where(matching_condition)
253
+ statement = select(UTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
254
+ return execute_match_statement(expression, statement, session)
255
+
256
+
257
+ def find_terms_in_universe(
258
+ expression: str,
259
+ only_id: bool = False,
260
+ limit: int | None = None,
261
+ offset: int | None = None,
262
+ selected_term_fields: Iterable[str] | None = None,
263
+ ) -> list[DataDescriptor]:
264
+ """
265
+ Find terms in the universe based on a full-text search defined by the given `expression`.
266
+ The `expression` can be composed of one or multiple keywords.
267
+ The keywords can combined with boolean operators: `AND`,
268
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
269
+ if no boolean operators is provided, whitespaces are handled as if there were
270
+ an implicit AND operator between each pair of keywords. Note that this
271
+ function does not provide any priority operator (parenthesis).
272
+ Keywords can define prefixes when adding a `*` at the end of them.
273
+ If the expression is composed of only one keyword, the function
274
+ automatically defines it as a prefix.
275
+ The function returns a list of term instances sorted according to the
276
+ bm25 ranking metric (list index `0` has the highest rank).
277
+ If the provided `expression` does not hit any term, the function returns an empty list.
278
+ The function searches for the `expression` in the term specifications.
279
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the terms.
280
+
281
+ :param expression: The full text search expression.
282
+ :type expression: str
283
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
284
+ :type only_id: bool
285
+ :param limit: Limit the number of returned items found. Returns all items found the if \
286
+ `limit` is either `None`, zero or negative.
287
+ :type limit: int | None
288
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
289
+ either `None`, zero or negative.
290
+ :type offset: int | None
291
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
292
+ fields of the terms are returned. If empty, selects the id and type fields.
293
+ :type selected_term_fields: Iterable[str] | None
294
+ :returns: A list of term instances. Returns an empty list if no matches are found.
295
+ :rtype: list[DataDescriptor]
296
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
297
+ """
298
+ result: list[DataDescriptor] = list()
299
+ with get_universe_session() as session:
300
+ uterms_found = _find_terms_in_universe(expression, session, only_id, limit, offset)
301
+ if uterms_found:
302
+ instantiate_pydantic_terms(uterms_found, result, selected_term_fields)
303
+ return result
304
+
305
+
306
+ def _find_terms_in_data_descriptor(
307
+ expression: str,
308
+ data_descriptor_id: str,
309
+ session: Session,
310
+ only_id: bool = False,
311
+ limit: int | None = None,
312
+ offset: int | None = None,
313
+ ) -> Sequence[UTerm]:
314
+ matching_condition = generate_matching_condition(UTermFTS5, expression, only_id)
315
+ where_condition = UDataDescriptor.id == data_descriptor_id, matching_condition
316
+ tmp_statement = select(UTermFTS5).join(UDataDescriptor).where(*where_condition)
317
+ statement = select(UTerm).from_statement(handle_rank_limit_offset(tmp_statement, limit, offset))
318
+ return execute_match_statement(expression, statement, session)
319
+
320
+
321
+ def find_terms_in_data_descriptor(
322
+ expression: str,
323
+ data_descriptor_id: str,
324
+ only_id: bool = False,
325
+ limit: int | None = None,
326
+ offset: int | None = None,
327
+ selected_term_fields: Iterable[str] | None = None,
328
+ ) -> list[DataDescriptor]:
329
+ """
330
+ Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
331
+ The `expression` can be composed of one or multiple keywords.
332
+ The keywords can combined with boolean operators: `AND`,
333
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
334
+ if no boolean operators is provided, whitespaces are handled as if there were
335
+ an implicit AND operator between each pair of keywords. Note that this
336
+ function does not provide any priority operator (parenthesis).
337
+ Keywords can define prefixes when adding a `*` at the end of them.
338
+ If the expression is composed of only one keyword, the function
339
+ automatically defines it as a prefix.
340
+ The function returns a list of term instances sorted according to the
341
+ bm25 ranking metric (list index `0` has the highest rank).
342
+ This function performs an exact match on the `data_descriptor_id`,
343
+ and does not search for similar or related data descriptor.
344
+ If the provided `expression` does not hit any term or the given `data_descriptor_id` does not
345
+ match exactly to an id of a data descriptor, the function returns an empty list.
346
+ The function searches for the `expression` in the term specifications.
347
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the terms.
348
+
349
+ :param expression: The full text search expression.
350
+ :type expression: str
351
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
352
+ :type only_id: bool
353
+ :param limit: Limit the number of returned items found. Returns all items found the if \
354
+ `limit` is either `None`, zero or negative.
355
+ :type limit: int | None
356
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
357
+ either `None`, zero or negative.
358
+ :type offset: int | None
359
+ :param selected_term_fields: A list of term fields to select or `None`. If `None`, all the \
360
+ fields of the terms are returned. If empty, selects the id and type fields.
361
+ :type selected_term_fields: Iterable[str] | None
362
+ :returns: A list of term instances. Returns an empty list if no matches are found.
363
+ :rtype: list[DataDescriptor]
364
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
365
+ """
366
+ result: list[DataDescriptor] = list()
367
+ with get_universe_session() as session:
368
+ uterms_found = _find_terms_in_data_descriptor(expression, data_descriptor_id, session, only_id, limit, offset)
369
+ if uterms_found:
370
+ instantiate_pydantic_terms(uterms_found, result, selected_term_fields)
371
+ return result
372
+
373
+
374
+ def find_items_in_universe(
375
+ expression: str, only_id: bool = False, limit: int | None = None, offset: int | None = None
376
+ ) -> list[Item]:
377
+ """
378
+ Find items, at the moment terms and data descriptors, in the universe based on a full-text
379
+ search defined by the given `expression`.
380
+ The `expression` can be composed of one or multiple keywords.
381
+ The keywords can combined with boolean operators: `AND`,
382
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
383
+ if no boolean operators is provided, whitespaces are handled as if there were
384
+ an implicit AND operator between each pair of keywords. Note that this
385
+ function does not provide any priority operator (parenthesis).
386
+ Keywords can define prefixes when adding a `*` at the end of them.
387
+ If the expression is composed of only one keyword, the function
388
+ automatically defines it as a prefix.
389
+ The function returns a list of item instances sorted according to the
390
+ bm25 ranking metric (list index `0` has the highest rank).
391
+ If the provided `expression` does not hit any item, the function returns an empty list.
392
+ The function searches for the `expression` in the term and data descriptor specifications.
393
+ However, if `only_id` is `True` (default is `False`), the search is restricted to the id of the
394
+ terms and data descriptors. **At the moment, `only_id` is set to `True` for the data descriptors
395
+ because they haven't got any description.**
396
+
397
+ :param expression: The full text search expression.
398
+ :type expression: str
399
+ :param only_id: Performs the search only on ids, otherwise on all the specifications.
400
+ :type only_id: bool
401
+ :param limit: Limit the number of returned items found. Returns all items found the if \
402
+ `limit` is either `None`, zero or negative.
403
+ :type limit: int | None
404
+ :param offset: Skips `offset` number of items found. Ignored if `offset` is \
405
+ either `None`, zero or negative.
406
+ :type offset: int | None
407
+ :returns: A list of item instances. Returns an empty list if no matches are found.
408
+ :rtype: list[Item]
409
+ :raises EsgvocValueError: If the `expression` cannot be interpreted.
410
+ """
411
+ # TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
412
+ result = list()
413
+ with get_universe_session() as session:
414
+ processed_expression = process_expression(expression)
415
+ if only_id:
416
+ dd_column = col(UDataDescriptorFTS5.id)
417
+ term_column = col(UTermFTS5.id)
418
+ else:
419
+ dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
420
+ term_column = col(UTermFTS5.specs) # type: ignore
421
+ dd_where_condition = dd_column.match(processed_expression)
422
+ dd_statement = select(
423
+ UDataDescriptorFTS5.id, text("'data_descriptor' AS TYPE"), text("'universe' AS TYPE"), text("rank")
424
+ ).where(dd_where_condition)
425
+ term_where_condition = term_column.match(processed_expression)
426
+ term_statement = (
427
+ select(UTermFTS5.id, text("'term' AS TYPE"), UDataDescriptor.id, text("rank"))
428
+ .join(UDataDescriptor)
429
+ .where(term_where_condition)
430
+ )
431
+ result = execute_find_item_statements(
432
+ session, processed_expression, dd_statement, term_statement, limit, offset
433
+ )
434
+ return result
@@ -0,0 +1,6 @@
1
+
2
+ from esgvoc.apps.drs.generator import DrsGenerator
3
+ from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
4
+ from esgvoc.apps.drs.validator import DrsValidator
5
+
6
+ __all__ = ["DrsValidator", "DrsValidationReport", "DrsGenerator", "DrsGenerationReport"]
@@ -0,0 +1,7 @@
1
+ """
2
+ Support for generating CMOR tables
3
+ """
4
+
5
+ from esgvoc.apps.cmor_tables.cvs_table import generate_cvs_table
6
+
7
+ __all__ = ["generate_cvs_table"]