esgvoc 0.4.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/data_descriptors/__init__.py +52 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +117 -2
- esgvoc/api/projects.py +328 -287
- esgvoc/api/search.py +30 -3
- esgvoc/api/universe.py +42 -27
- esgvoc/apps/drs/generator.py +87 -74
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +194 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +43 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/db/models/project.py +11 -11
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +40 -40
- esgvoc/core/db/universe_ingestion.py +36 -33
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/METADATA +5 -20
- esgvoc-1.0.1.dist-info/RECORD +95 -0
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.4.0.dist-info/RECORD +0 -80
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/WHEEL +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/search.py
CHANGED
|
@@ -76,18 +76,45 @@ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
|
|
|
76
76
|
list_to_populate.append(term)
|
|
77
77
|
|
|
78
78
|
|
|
79
|
+
def process_expression(expression: str) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
|
|
82
|
+
"""
|
|
83
|
+
# 1. Remove single and double quotes.
|
|
84
|
+
result = expression.replace('"', '')
|
|
85
|
+
result = result.replace("'", '')
|
|
86
|
+
|
|
87
|
+
# 2. Escape keywords.
|
|
88
|
+
result = result.replace('NEAR', '"NEAR"')
|
|
89
|
+
result = result.replace('+', '"+"')
|
|
90
|
+
result = result.replace('-', '"-"')
|
|
91
|
+
result = result.replace(':', '":"')
|
|
92
|
+
result = result.replace('^', '"^"')
|
|
93
|
+
result = result.replace('(', '"("')
|
|
94
|
+
result = result.replace(')', '")"')
|
|
95
|
+
result = result.replace(',', '","')
|
|
96
|
+
|
|
97
|
+
# 3. Make single word request a prefix search.
|
|
98
|
+
if not result.endswith('*'):
|
|
99
|
+
tokens = result.split(sep=None)
|
|
100
|
+
if len(tokens) == 1:
|
|
101
|
+
result += '*'
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
|
|
79
105
|
def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
|
|
80
106
|
type[PTermFTS5] | type[PCollectionFTS5],
|
|
81
107
|
expression: str,
|
|
82
108
|
only_id: bool) -> ColumnElement[bool]:
|
|
109
|
+
processed_expression = process_expression(expression)
|
|
83
110
|
# TODO: fix this when specs will ba available in collections and Data descriptors.
|
|
84
111
|
if cls is PTermFTS5 or cls is UTermFTS5:
|
|
85
112
|
if only_id:
|
|
86
|
-
result = col(cls.id).match(
|
|
113
|
+
result = col(cls.id).match(processed_expression)
|
|
87
114
|
else:
|
|
88
|
-
result = col(cls.specs).match(
|
|
115
|
+
result = col(cls.specs).match(processed_expression) # type: ignore
|
|
89
116
|
else:
|
|
90
|
-
result = col(cls.id).match(
|
|
117
|
+
result = col(cls.id).match(processed_expression)
|
|
91
118
|
return result
|
|
92
119
|
|
|
93
120
|
|
esgvoc/api/universe.py
CHANGED
|
@@ -13,6 +13,7 @@ from esgvoc.api.search import (
|
|
|
13
13
|
handle_rank_limit_offset,
|
|
14
14
|
instantiate_pydantic_term,
|
|
15
15
|
instantiate_pydantic_terms,
|
|
16
|
+
process_expression,
|
|
16
17
|
)
|
|
17
18
|
from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
|
|
18
19
|
|
|
@@ -211,12 +212,15 @@ def find_data_descriptors_in_universe(expression: str,
|
|
|
211
212
|
offset: int | None = None) -> list[tuple[str, dict]]:
|
|
212
213
|
"""
|
|
213
214
|
Find data descriptors in the universe based on a full text search defined by the given `expression`.
|
|
214
|
-
The `expression`
|
|
215
|
-
|
|
216
|
-
and
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
The `expression` can be composed of one or multiple keywords.
|
|
216
|
+
The keywords can combined with boolean operators: `AND`,
|
|
217
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
218
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
219
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
220
|
+
function does not provide any priority operator (parenthesis).
|
|
221
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
222
|
+
If the expression is composed of only one keyword, the function
|
|
223
|
+
automatically defines it as a prefix.
|
|
220
224
|
The function returns a list of data descriptor ids and contexts, sorted according to the
|
|
221
225
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
222
226
|
If the provided `expression` does not hit any data descriptor, the function returns an empty list.
|
|
@@ -266,12 +270,15 @@ def find_terms_in_universe(expression: str,
|
|
|
266
270
|
selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
|
|
267
271
|
"""
|
|
268
272
|
Find terms in the universe based on a full-text search defined by the given `expression`.
|
|
269
|
-
The `expression`
|
|
270
|
-
|
|
271
|
-
and
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
273
|
+
The `expression` can be composed of one or multiple keywords.
|
|
274
|
+
The keywords can combined with boolean operators: `AND`,
|
|
275
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
276
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
277
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
278
|
+
function does not provide any priority operator (parenthesis).
|
|
279
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
280
|
+
If the expression is composed of only one keyword, the function
|
|
281
|
+
automatically defines it as a prefix.
|
|
275
282
|
The function returns a list of term instances sorted according to the
|
|
276
283
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
277
284
|
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
@@ -323,12 +330,15 @@ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
|
|
|
323
330
|
-> list[DataDescriptor]:
|
|
324
331
|
"""
|
|
325
332
|
Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
|
|
326
|
-
The `expression`
|
|
327
|
-
|
|
328
|
-
and
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
333
|
+
The `expression` can be composed of one or multiple keywords.
|
|
334
|
+
The keywords can combined with boolean operators: `AND`,
|
|
335
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
336
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
337
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
338
|
+
function does not provide any priority operator (parenthesis).
|
|
339
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
340
|
+
If the expression is composed of only one keyword, the function
|
|
341
|
+
automatically defines it as a prefix.
|
|
332
342
|
The function returns a list of term instances sorted according to the
|
|
333
343
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
334
344
|
This function performs an exact match on the `data_descriptor_id`,
|
|
@@ -370,12 +380,16 @@ def find_items_in_universe(expression: str,
|
|
|
370
380
|
offset: int | None = None) -> list[Item]:
|
|
371
381
|
"""
|
|
372
382
|
Find items, at the moment terms and data descriptors, in the universe based on a full-text
|
|
373
|
-
search defined by the given `expression`.
|
|
374
|
-
`
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
383
|
+
search defined by the given `expression`.
|
|
384
|
+
The `expression` can be composed of one or multiple keywords.
|
|
385
|
+
The keywords can combined with boolean operators: `AND`,
|
|
386
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
387
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
388
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
389
|
+
function does not provide any priority operator (parenthesis).
|
|
390
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
391
|
+
If the expression is composed of only one keyword, the function
|
|
392
|
+
automatically defines it as a prefix.
|
|
379
393
|
The function returns a list of item instances sorted according to the
|
|
380
394
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
381
395
|
If the provided `expression` does not hit any item, the function returns an empty list.
|
|
@@ -401,23 +415,24 @@ def find_items_in_universe(expression: str,
|
|
|
401
415
|
# TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
|
|
402
416
|
result = list()
|
|
403
417
|
with get_universe_session() as session:
|
|
418
|
+
processed_expression = process_expression(expression)
|
|
404
419
|
if only_id:
|
|
405
420
|
dd_column = col(UDataDescriptorFTS5.id)
|
|
406
421
|
term_column = col(UTermFTS5.id)
|
|
407
422
|
else:
|
|
408
423
|
dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
|
|
409
424
|
term_column = col(UTermFTS5.specs) # type: ignore
|
|
410
|
-
dd_where_condition = dd_column.match(
|
|
425
|
+
dd_where_condition = dd_column.match(processed_expression)
|
|
411
426
|
dd_statement = select(UDataDescriptorFTS5.id,
|
|
412
427
|
text("'data_descriptor' AS TYPE"),
|
|
413
428
|
text("'universe' AS TYPE"),
|
|
414
429
|
text('rank')).where(dd_where_condition)
|
|
415
|
-
term_where_condition = term_column.match(
|
|
430
|
+
term_where_condition = term_column.match(processed_expression)
|
|
416
431
|
term_statement = select(UTermFTS5.id,
|
|
417
432
|
text("'term' AS TYPE"),
|
|
418
433
|
UDataDescriptor.id,
|
|
419
434
|
text('rank')).join(UDataDescriptor) \
|
|
420
435
|
.where(term_where_condition)
|
|
421
|
-
result = execute_find_item_statements(session,
|
|
436
|
+
result = execute_find_item_statements(session, processed_expression, dd_statement,
|
|
422
437
|
term_statement, limit, offset)
|
|
423
438
|
return result
|
esgvoc/apps/drs/generator.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Any, Iterable, Mapping, cast
|
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
4
|
from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
|
|
5
|
+
from esgvoc.api.search import MatchingTerm
|
|
5
6
|
from esgvoc.apps.drs.report import (
|
|
6
7
|
AssignedTerm,
|
|
7
8
|
ConflictingCollections,
|
|
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
|
|
|
92
93
|
:rtype: DrsGeneratorReport
|
|
93
94
|
"""
|
|
94
95
|
report = self._generate_from_mapping(mapping, self.file_name_specs)
|
|
95
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
96
|
-
self._get_full_file_name_extension() # noqa E127
|
|
96
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
97
97
|
return report
|
|
98
98
|
|
|
99
99
|
def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
|
|
|
108
108
|
:rtype: DrsGeneratorReport
|
|
109
109
|
"""
|
|
110
110
|
report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
|
|
111
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
112
|
-
self._get_full_file_name_extension() # noqa E127
|
|
111
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
113
112
|
return report
|
|
114
113
|
|
|
115
|
-
def generate_from_mapping(self, mapping: Mapping[str, str],
|
|
116
|
-
drs_type: DrsType | str) -> DrsGenerationReport:
|
|
114
|
+
def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
|
|
117
115
|
"""
|
|
118
116
|
Generate a DRS expression from a mapping of collection ids and terms.
|
|
119
117
|
|
|
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
|
|
|
134
132
|
case _:
|
|
135
133
|
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
136
134
|
|
|
137
|
-
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str)
|
|
138
|
-
-> DrsGenerationReport: # noqa E127
|
|
135
|
+
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
|
|
139
136
|
"""
|
|
140
137
|
Generate a DRS expression from an unordered bag of terms.
|
|
141
138
|
|
|
@@ -156,23 +153,24 @@ class DrsGenerator(DrsApplication):
|
|
|
156
153
|
case _:
|
|
157
154
|
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
158
155
|
|
|
159
|
-
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification)
|
|
160
|
-
-> DrsGenerationReport: # noqa E127
|
|
156
|
+
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
161
157
|
drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
|
|
162
158
|
if self.pedantic:
|
|
163
159
|
errors.extend(warnings)
|
|
164
160
|
warnings.clear()
|
|
165
|
-
return DrsGenerationReport(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
161
|
+
return DrsGenerationReport(
|
|
162
|
+
project_id=self.project_id,
|
|
163
|
+
type=specs.type,
|
|
164
|
+
given_mapping_or_bag_of_terms=mapping,
|
|
165
|
+
mapping_used=mapping,
|
|
166
|
+
generated_drs_expression=drs_expression,
|
|
167
|
+
errors=cast(list[GenerationError], errors),
|
|
168
|
+
warnings=cast(list[GenerationWarning], warnings),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def __generate_from_mapping(
|
|
172
|
+
self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
|
|
173
|
+
) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
|
|
176
174
|
errors: list[GenerationIssue] = list()
|
|
177
175
|
warnings: list[GenerationIssue] = list()
|
|
178
176
|
drs_expression = ""
|
|
@@ -185,18 +183,17 @@ class DrsGenerator(DrsApplication):
|
|
|
185
183
|
if collection_id in mapping:
|
|
186
184
|
part_value = mapping[collection_id]
|
|
187
185
|
if has_to_valid_terms:
|
|
188
|
-
matching_terms = projects.valid_term_in_collection(part_value,
|
|
189
|
-
self.project_id,
|
|
190
|
-
collection_id)
|
|
186
|
+
matching_terms = projects.valid_term_in_collection(part_value, self.project_id, collection_id)
|
|
191
187
|
if not matching_terms:
|
|
192
|
-
issue = InvalidTerm(
|
|
193
|
-
|
|
194
|
-
|
|
188
|
+
issue = InvalidTerm(
|
|
189
|
+
term=part_value,
|
|
190
|
+
term_position=part_position,
|
|
191
|
+
collection_id_or_constant_value=collection_id,
|
|
192
|
+
)
|
|
195
193
|
errors.append(issue)
|
|
196
194
|
part_value = DrsGenerationReport.INVALID_TAG
|
|
197
195
|
else:
|
|
198
|
-
other_issue = MissingTerm(collection_id=collection_id,
|
|
199
|
-
collection_position=part_position)
|
|
196
|
+
other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
|
|
200
197
|
if collection_part.is_required:
|
|
201
198
|
errors.append(other_issue)
|
|
202
199
|
part_value = DrsGenerationReport.MISSING_TAG
|
|
@@ -209,14 +206,18 @@ class DrsGenerator(DrsApplication):
|
|
|
209
206
|
|
|
210
207
|
drs_expression += part_value + specs.separator
|
|
211
208
|
|
|
212
|
-
drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
|
|
209
|
+
drs_expression = drs_expression[0 : len(drs_expression) - len(specs.separator)]
|
|
213
210
|
return drs_expression, errors, warnings
|
|
214
211
|
|
|
215
|
-
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification)
|
|
216
|
-
-> DrsGenerationReport: # noqa E127
|
|
212
|
+
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
217
213
|
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
218
214
|
for term in terms:
|
|
219
|
-
matching_terms =
|
|
215
|
+
matching_terms: list[MatchingTerm] = []
|
|
216
|
+
for col in [part.collection_id for part in specs.parts if part.kind == DrsPartKind.COLLECTION]:
|
|
217
|
+
matching_terms_in_col = projects.valid_term_in_collection(term, self.project_id, col)
|
|
218
|
+
for mtic in matching_terms_in_col:
|
|
219
|
+
matching_terms.append(mtic)
|
|
220
|
+
# matching_terms = projects.valid_term_in_project(term, self.project_id)
|
|
220
221
|
for matching_term in matching_terms:
|
|
221
222
|
if matching_term.collection_id not in collection_terms_mapping:
|
|
222
223
|
collection_terms_mapping[matching_term.collection_id] = set()
|
|
@@ -229,15 +230,20 @@ class DrsGenerator(DrsApplication):
|
|
|
229
230
|
if self.pedantic:
|
|
230
231
|
errors.extend(warnings)
|
|
231
232
|
warnings.clear()
|
|
232
|
-
return DrsGenerationReport(
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
return DrsGenerationReport(
|
|
234
|
+
project_id=self.project_id,
|
|
235
|
+
type=specs.type,
|
|
236
|
+
given_mapping_or_bag_of_terms=terms,
|
|
237
|
+
mapping_used=mapping,
|
|
238
|
+
generated_drs_expression=drs_expression,
|
|
239
|
+
errors=cast(list[GenerationError], errors),
|
|
240
|
+
warnings=cast(list[GenerationWarning], warnings),
|
|
241
|
+
)
|
|
237
242
|
|
|
238
243
|
@staticmethod
|
|
239
|
-
def _resolve_conflicts(
|
|
240
|
-
|
|
244
|
+
def _resolve_conflicts(
|
|
245
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
246
|
+
) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
|
|
241
247
|
warnings: list[GenerationIssue] = list()
|
|
242
248
|
conflicting_collection_ids_list: list[list[str]] = list()
|
|
243
249
|
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
@@ -247,13 +253,16 @@ class DrsGenerator(DrsApplication):
|
|
|
247
253
|
conflicting_collection_ids: list[str] = list()
|
|
248
254
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
249
255
|
if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
|
|
250
|
-
|
|
256
|
+
collection_terms_mapping[collection_ids[r_collection_index]]
|
|
257
|
+
):
|
|
251
258
|
continue
|
|
252
259
|
else:
|
|
253
260
|
not_registered = True
|
|
254
261
|
for cc_ids in conflicting_collection_ids_list:
|
|
255
|
-
if
|
|
256
|
-
|
|
262
|
+
if (
|
|
263
|
+
collection_ids[l_collection_index] in cc_ids
|
|
264
|
+
and collection_ids[r_collection_index] in cc_ids
|
|
265
|
+
):
|
|
257
266
|
not_registered = False
|
|
258
267
|
break
|
|
259
268
|
if not_registered:
|
|
@@ -287,10 +296,12 @@ class DrsGenerator(DrsApplication):
|
|
|
287
296
|
# raise errors, remove the faulty collections and their term.
|
|
288
297
|
if collection_ids_with_len_eq_1_list:
|
|
289
298
|
for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
|
|
290
|
-
DrsGenerator._remove_ids_from_conflicts(
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
299
|
+
DrsGenerator._remove_ids_from_conflicts(
|
|
300
|
+
conflicting_collection_ids_list, collection_ids_to_be_removed
|
|
301
|
+
)
|
|
302
|
+
DrsGenerator._remove_term_from_other_term_sets(
|
|
303
|
+
collection_terms_mapping, collection_ids_to_be_removed
|
|
304
|
+
)
|
|
294
305
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
295
306
|
# as conflicting collections may be resolved.
|
|
296
307
|
continue
|
|
@@ -307,10 +318,8 @@ class DrsGenerator(DrsApplication):
|
|
|
307
318
|
warnings.append(issue)
|
|
308
319
|
# 3.b Update conflicting collections.
|
|
309
320
|
if wining_collection_ids:
|
|
310
|
-
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
311
|
-
|
|
312
|
-
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
313
|
-
wining_collection_ids)
|
|
321
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
322
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
314
323
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
315
324
|
# as conflicting collections may be resolved.
|
|
316
325
|
continue
|
|
@@ -320,13 +329,15 @@ class DrsGenerator(DrsApplication):
|
|
|
320
329
|
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
321
330
|
for collection_ids in conflicting_collection_ids_list:
|
|
322
331
|
for collection_index in range(0, len(collection_ids)):
|
|
323
|
-
collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
|
|
324
|
-
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]
|
|
325
|
-
|
|
326
|
-
|
|
332
|
+
collection_set = collection_ids[collection_index + 1 :] + collection_ids[:collection_index]
|
|
333
|
+
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
|
|
334
|
+
*[
|
|
335
|
+
collection_terms_mapping[index] # noqa E127
|
|
336
|
+
for index in collection_set
|
|
337
|
+
]
|
|
338
|
+
)
|
|
327
339
|
if len(diff) == 1:
|
|
328
|
-
wining_id_and_term_pairs.append((collection_ids[collection_index],
|
|
329
|
-
_get_first_item(diff)))
|
|
340
|
+
wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
|
|
330
341
|
# 4.b Update conflicting collections.
|
|
331
342
|
if wining_id_and_term_pairs:
|
|
332
343
|
wining_collection_ids = list()
|
|
@@ -336,18 +347,17 @@ class DrsGenerator(DrsApplication):
|
|
|
336
347
|
collection_terms_mapping[collection_id].add(term)
|
|
337
348
|
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
338
349
|
warnings.append(issue)
|
|
339
|
-
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
340
|
-
|
|
341
|
-
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
342
|
-
wining_collection_ids)
|
|
350
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
351
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
343
352
|
continue
|
|
344
353
|
else:
|
|
345
354
|
break # Stop the loop when no progress is made.
|
|
346
355
|
return collection_terms_mapping, warnings
|
|
347
356
|
|
|
348
357
|
@staticmethod
|
|
349
|
-
def _check_collection_terms_mapping(
|
|
350
|
-
|
|
358
|
+
def _check_collection_terms_mapping(
|
|
359
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
360
|
+
) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
|
|
351
361
|
errors: list[GenerationIssue] = list()
|
|
352
362
|
# 1. Looking for collections that share strictly the same term(s).
|
|
353
363
|
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
@@ -363,8 +373,7 @@ class DrsGenerator(DrsApplication):
|
|
|
363
373
|
if l_term_set and (not l_term_set.difference(r_term_set)):
|
|
364
374
|
not_registered = True
|
|
365
375
|
for faulty_collections in faulty_collections_list:
|
|
366
|
-
if l_collection_id in faulty_collections or
|
|
367
|
-
r_collection_id in faulty_collections:
|
|
376
|
+
if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
|
|
368
377
|
faulty_collections.add(l_collection_id)
|
|
369
378
|
faulty_collections.add(r_collection_id)
|
|
370
379
|
not_registered = False
|
|
@@ -373,8 +382,9 @@ class DrsGenerator(DrsApplication):
|
|
|
373
382
|
faulty_collections_list.append({l_collection_id, r_collection_id})
|
|
374
383
|
for faulty_collections in faulty_collections_list:
|
|
375
384
|
terms = collection_terms_mapping[_get_first_item(faulty_collections)]
|
|
376
|
-
issue = ConflictingCollections(
|
|
377
|
-
|
|
385
|
+
issue = ConflictingCollections(
|
|
386
|
+
collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
|
|
387
|
+
)
|
|
378
388
|
errors.append(issue)
|
|
379
389
|
for collection_id in faulty_collections:
|
|
380
390
|
del collection_terms_mapping[collection_id]
|
|
@@ -386,25 +396,28 @@ class DrsGenerator(DrsApplication):
|
|
|
386
396
|
if len_term_set == 1:
|
|
387
397
|
result[collection_id] = _get_first_item(term_set)
|
|
388
398
|
elif len_term_set > 1:
|
|
389
|
-
other_issue = TooManyTermCollection(
|
|
390
|
-
|
|
399
|
+
other_issue = TooManyTermCollection(
|
|
400
|
+
collection_id=collection_id, terms=_transform_set_and_sort(term_set)
|
|
401
|
+
)
|
|
391
402
|
errors.append(other_issue)
|
|
392
403
|
# else: Don't add emptied collection to the result.
|
|
393
404
|
return result, errors
|
|
394
405
|
|
|
395
406
|
@staticmethod
|
|
396
|
-
def _remove_term_from_other_term_sets(
|
|
397
|
-
|
|
407
|
+
def _remove_term_from_other_term_sets(
|
|
408
|
+
collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
|
|
409
|
+
) -> None:
|
|
398
410
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
399
411
|
# Should only be one term.
|
|
400
412
|
term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
|
|
401
413
|
for collection_id in collection_terms_mapping.keys():
|
|
402
|
-
if
|
|
414
|
+
if collection_id not in collection_ids_to_be_removed:
|
|
403
415
|
collection_terms_mapping[collection_id].discard(term_to_be_removed)
|
|
404
416
|
|
|
405
417
|
@staticmethod
|
|
406
|
-
def _remove_ids_from_conflicts(
|
|
407
|
-
|
|
418
|
+
def _remove_ids_from_conflicts(
|
|
419
|
+
conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
|
|
420
|
+
) -> None:
|
|
408
421
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
409
422
|
for conflicting_collection_ids in conflicting_collection_ids_list:
|
|
410
423
|
if collection_id_to_be_removed in conflicting_collection_ids:
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
|
|
4
|
+
"title": "CMIP6 Extension",
|
|
5
|
+
"description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"stac_extensions"
|
|
9
|
+
],
|
|
10
|
+
"properties": {
|
|
11
|
+
"stac_extensions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"contains": {
|
|
14
|
+
"const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"$comment": "This is the schema for STAC Items.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": [
|
|
23
|
+
"type",
|
|
24
|
+
"properties"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"const": "Feature"
|
|
29
|
+
},
|
|
30
|
+
"properties": {
|
|
31
|
+
"allOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/require_any"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"$ref": "#/definitions/fields"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
+
"type": "object",
|
|
45
|
+
"required": [
|
|
46
|
+
"type",
|
|
47
|
+
"summaries"
|
|
48
|
+
],
|
|
49
|
+
"properties": {
|
|
50
|
+
"type": {
|
|
51
|
+
"const": "Collection"
|
|
52
|
+
},
|
|
53
|
+
"summaries": {
|
|
54
|
+
"$ref": "#/definitions/require_any"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"definitions": {
|
|
60
|
+
"require_any": {
|
|
61
|
+
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
+
},
|
|
63
|
+
"fields": {
|
|
64
|
+
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
},
|
|
68
|
+
"patternProperties": {
|
|
69
|
+
"^(?!cmip6:)": {}
|
|
70
|
+
},
|
|
71
|
+
"additionalProperties": false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|