esgvoc 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/data_descriptors/__init__.py +2 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/projects.py +90 -12
- esgvoc/apps/drs/generator.py +87 -74
- esgvoc/apps/jsg/json_schema_generator.py +21 -12
- {esgvoc-1.0.0.dist-info → esgvoc-1.0.1.dist-info}/METADATA +1 -3
- {esgvoc-1.0.0.dist-info → esgvoc-1.0.1.dist-info}/RECORD +11 -11
- esgvoc/apps/jsg/cmip6plus_template.json +0 -74
- {esgvoc-1.0.0.dist-info → esgvoc-1.0.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.0.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.0.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/__init__.py
CHANGED
|
@@ -20,6 +20,7 @@ from esgvoc.api.data_descriptors.initialisation_index import InitialisationIndex
|
|
|
20
20
|
from esgvoc.api.data_descriptors.institution import Institution
|
|
21
21
|
from esgvoc.api.data_descriptors.known_branded_variable import KnownBrandedVariable
|
|
22
22
|
from esgvoc.api.data_descriptors.license import License
|
|
23
|
+
from esgvoc.api.data_descriptors.member_id import MemberId
|
|
23
24
|
from esgvoc.api.data_descriptors.mip_era import MipEra
|
|
24
25
|
from esgvoc.api.data_descriptors.model_component import ModelComponent
|
|
25
26
|
from esgvoc.api.data_descriptors.obs_type import ObsType
|
|
@@ -87,5 +88,6 @@ DATA_DESCRIPTOR_CLASS_MAPPING: dict[str, type[DataDescriptor]] = {
|
|
|
87
88
|
"title": Title,
|
|
88
89
|
"contact": Contact,
|
|
89
90
|
"region": Region,
|
|
91
|
+
"member_id": MemberId,
|
|
90
92
|
"obs_type": ObsType, # obs4Mips
|
|
91
93
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MemberId(CompositeTermDataDescriptor):
|
|
5
|
+
"""
|
|
6
|
+
The member_id uniquely identifies a specific model simulation within an experiment. It is created by combining the sub_experiment, which describes the setup or timing of the simulation (like a specific start year), and the variant_label, which details the configuration of the model (including initial conditions, physics, and forcings). Together, they form a code like s1960-r1i1p1f1. This allows users to distinguish between different ensemble members and understand how each run differs from others within the same experiment.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
description: str
|
esgvoc/api/projects.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
import re
|
|
2
3
|
from typing import Iterable, Sequence
|
|
3
4
|
|
|
@@ -72,9 +73,86 @@ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]
|
|
|
72
73
|
return separator, parts
|
|
73
74
|
|
|
74
75
|
|
|
76
|
+
def _valid_value_composite_term_with_separator(
|
|
77
|
+
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
78
|
+
) -> list[UniverseTermError | ProjectTermError]:
|
|
79
|
+
result = []
|
|
80
|
+
separator, parts = _get_composite_term_separator_parts(term)
|
|
81
|
+
required_indices = {i for i, p in enumerate(parts) if p.get("is_required", False)}
|
|
82
|
+
|
|
83
|
+
splits = value.split(separator)
|
|
84
|
+
nb_splits = len(splits)
|
|
85
|
+
nb_parts = len(parts)
|
|
86
|
+
|
|
87
|
+
if nb_splits > nb_parts:
|
|
88
|
+
return [_create_term_error(value, term)]
|
|
89
|
+
|
|
90
|
+
# Generate all possible assignments of split values into parts
|
|
91
|
+
# Only keep those that include all required parts
|
|
92
|
+
all_positions = [i for i in range(nb_parts)]
|
|
93
|
+
valid_combinations = [
|
|
94
|
+
comb for comb in itertools.combinations(all_positions, nb_splits) if required_indices.issubset(comb)
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
for positions in valid_combinations:
|
|
98
|
+
candidate = [None] * nb_parts
|
|
99
|
+
for idx, pos in enumerate(positions):
|
|
100
|
+
candidate[pos] = splits[idx]
|
|
101
|
+
|
|
102
|
+
# Separator structure validation:
|
|
103
|
+
# - No leading separator if the first part is None
|
|
104
|
+
# - No trailing separator if the last part is None
|
|
105
|
+
# - No double separators where two adjacent optional parts are missing
|
|
106
|
+
if candidate[0] is None and value.startswith(separator):
|
|
107
|
+
continue
|
|
108
|
+
if candidate[-1] is None and value.endswith(separator):
|
|
109
|
+
continue
|
|
110
|
+
if any(
|
|
111
|
+
candidate[i] is None and candidate[i + 1] is None and separator * 2 in value for i in range(nb_parts - 1)
|
|
112
|
+
):
|
|
113
|
+
continue # invalid double separator between two missing parts
|
|
114
|
+
|
|
115
|
+
# Validate each filled part value
|
|
116
|
+
all_valid = True
|
|
117
|
+
for i, given_value in enumerate(candidate):
|
|
118
|
+
if given_value is None:
|
|
119
|
+
if parts[i].get("is_required", False):
|
|
120
|
+
all_valid = False
|
|
121
|
+
break
|
|
122
|
+
continue # optional and missing part is allowed
|
|
123
|
+
|
|
124
|
+
part = parts[i]
|
|
125
|
+
|
|
126
|
+
# Resolve term ID list if not present
|
|
127
|
+
if "id" not in part:
|
|
128
|
+
terms = universe.get_all_terms_in_data_descriptor(part["type"], None)
|
|
129
|
+
part["id"] = [term.id for term in terms]
|
|
130
|
+
if isinstance(part["id"], str):
|
|
131
|
+
part["id"] = [part["id"]]
|
|
132
|
+
|
|
133
|
+
# Try all possible term IDs to find a valid match
|
|
134
|
+
valid_for_this_part = False
|
|
135
|
+
for id in part["id"]:
|
|
136
|
+
part_copy = dict(part)
|
|
137
|
+
part_copy["id"] = id
|
|
138
|
+
resolved_term = _resolve_term(part_copy, universe_session, project_session)
|
|
139
|
+
errors = _valid_value(given_value, resolved_term, universe_session, project_session)
|
|
140
|
+
if not errors:
|
|
141
|
+
valid_for_this_part = True
|
|
142
|
+
break
|
|
143
|
+
if not valid_for_this_part:
|
|
144
|
+
all_valid = False
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
if all_valid:
|
|
148
|
+
return [] # At least one valid combination found
|
|
149
|
+
|
|
150
|
+
return [_create_term_error(value, term)] # No valid combination found
|
|
151
|
+
|
|
152
|
+
|
|
75
153
|
# TODO: support optionality of parts of composite.
|
|
76
154
|
# It is backtrack possible for more than one missing parts.
|
|
77
|
-
def
|
|
155
|
+
def _valid_value_composite_term_with_separator2(
|
|
78
156
|
value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
|
|
79
157
|
) -> list[UniverseTermError | ProjectTermError]:
|
|
80
158
|
result = list()
|
|
@@ -1113,16 +1191,16 @@ def find_items_in_project(
|
|
|
1113
1191
|
collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
|
|
1114
1192
|
term_column = col(PTermFTS5.specs) # type: ignore
|
|
1115
1193
|
collection_where_condition = collection_column.match(processed_expression)
|
|
1116
|
-
collection_statement = select(
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
text('rank')).where(collection_where_condition)
|
|
1194
|
+
collection_statement = select(
|
|
1195
|
+
PCollectionFTS5.id, text("'collection' AS TYPE"), text(f"'{project_id}' AS TYPE"), text("rank")
|
|
1196
|
+
).where(collection_where_condition)
|
|
1120
1197
|
term_where_condition = term_column.match(processed_expression)
|
|
1121
|
-
term_statement =
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
result = execute_find_item_statements(
|
|
1127
|
-
|
|
1198
|
+
term_statement = (
|
|
1199
|
+
select(PTermFTS5.id, text("'term' AS TYPE"), PCollection.id, text("rank"))
|
|
1200
|
+
.join(PCollection)
|
|
1201
|
+
.where(term_where_condition)
|
|
1202
|
+
)
|
|
1203
|
+
result = execute_find_item_statements(
|
|
1204
|
+
session, processed_expression, collection_statement, term_statement, limit, offset
|
|
1205
|
+
)
|
|
1128
1206
|
return result
|
esgvoc/apps/drs/generator.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Any, Iterable, Mapping, cast
|
|
|
2
2
|
|
|
3
3
|
import esgvoc.api.projects as projects
|
|
4
4
|
from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
|
|
5
|
+
from esgvoc.api.search import MatchingTerm
|
|
5
6
|
from esgvoc.apps.drs.report import (
|
|
6
7
|
AssignedTerm,
|
|
7
8
|
ConflictingCollections,
|
|
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
|
|
|
92
93
|
:rtype: DrsGeneratorReport
|
|
93
94
|
"""
|
|
94
95
|
report = self._generate_from_mapping(mapping, self.file_name_specs)
|
|
95
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
96
|
-
self._get_full_file_name_extension() # noqa E127
|
|
96
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
97
97
|
return report
|
|
98
98
|
|
|
99
99
|
def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
|
|
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
|
|
|
108
108
|
:rtype: DrsGeneratorReport
|
|
109
109
|
"""
|
|
110
110
|
report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
|
|
111
|
-
report.generated_drs_expression = report.generated_drs_expression +
|
|
112
|
-
self._get_full_file_name_extension() # noqa E127
|
|
111
|
+
report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
|
|
113
112
|
return report
|
|
114
113
|
|
|
115
|
-
def generate_from_mapping(self, mapping: Mapping[str, str],
|
|
116
|
-
drs_type: DrsType | str) -> DrsGenerationReport:
|
|
114
|
+
def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
|
|
117
115
|
"""
|
|
118
116
|
Generate a DRS expression from a mapping of collection ids and terms.
|
|
119
117
|
|
|
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
|
|
|
134
132
|
case _:
|
|
135
133
|
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
136
134
|
|
|
137
|
-
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str)
|
|
138
|
-
-> DrsGenerationReport: # noqa E127
|
|
135
|
+
def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
|
|
139
136
|
"""
|
|
140
137
|
Generate a DRS expression from an unordered bag of terms.
|
|
141
138
|
|
|
@@ -156,23 +153,24 @@ class DrsGenerator(DrsApplication):
|
|
|
156
153
|
case _:
|
|
157
154
|
raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
|
|
158
155
|
|
|
159
|
-
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification)
|
|
160
|
-
-> DrsGenerationReport: # noqa E127
|
|
156
|
+
def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
161
157
|
drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
|
|
162
158
|
if self.pedantic:
|
|
163
159
|
errors.extend(warnings)
|
|
164
160
|
warnings.clear()
|
|
165
|
-
return DrsGenerationReport(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
161
|
+
return DrsGenerationReport(
|
|
162
|
+
project_id=self.project_id,
|
|
163
|
+
type=specs.type,
|
|
164
|
+
given_mapping_or_bag_of_terms=mapping,
|
|
165
|
+
mapping_used=mapping,
|
|
166
|
+
generated_drs_expression=drs_expression,
|
|
167
|
+
errors=cast(list[GenerationError], errors),
|
|
168
|
+
warnings=cast(list[GenerationWarning], warnings),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def __generate_from_mapping(
|
|
172
|
+
self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
|
|
173
|
+
) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
|
|
176
174
|
errors: list[GenerationIssue] = list()
|
|
177
175
|
warnings: list[GenerationIssue] = list()
|
|
178
176
|
drs_expression = ""
|
|
@@ -185,18 +183,17 @@ class DrsGenerator(DrsApplication):
|
|
|
185
183
|
if collection_id in mapping:
|
|
186
184
|
part_value = mapping[collection_id]
|
|
187
185
|
if has_to_valid_terms:
|
|
188
|
-
matching_terms = projects.valid_term_in_collection(part_value,
|
|
189
|
-
self.project_id,
|
|
190
|
-
collection_id)
|
|
186
|
+
matching_terms = projects.valid_term_in_collection(part_value, self.project_id, collection_id)
|
|
191
187
|
if not matching_terms:
|
|
192
|
-
issue = InvalidTerm(
|
|
193
|
-
|
|
194
|
-
|
|
188
|
+
issue = InvalidTerm(
|
|
189
|
+
term=part_value,
|
|
190
|
+
term_position=part_position,
|
|
191
|
+
collection_id_or_constant_value=collection_id,
|
|
192
|
+
)
|
|
195
193
|
errors.append(issue)
|
|
196
194
|
part_value = DrsGenerationReport.INVALID_TAG
|
|
197
195
|
else:
|
|
198
|
-
other_issue = MissingTerm(collection_id=collection_id,
|
|
199
|
-
collection_position=part_position)
|
|
196
|
+
other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
|
|
200
197
|
if collection_part.is_required:
|
|
201
198
|
errors.append(other_issue)
|
|
202
199
|
part_value = DrsGenerationReport.MISSING_TAG
|
|
@@ -209,14 +206,18 @@ class DrsGenerator(DrsApplication):
|
|
|
209
206
|
|
|
210
207
|
drs_expression += part_value + specs.separator
|
|
211
208
|
|
|
212
|
-
drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
|
|
209
|
+
drs_expression = drs_expression[0 : len(drs_expression) - len(specs.separator)]
|
|
213
210
|
return drs_expression, errors, warnings
|
|
214
211
|
|
|
215
|
-
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification)
|
|
216
|
-
-> DrsGenerationReport: # noqa E127
|
|
212
|
+
def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
|
|
217
213
|
collection_terms_mapping: dict[str, set[str]] = dict()
|
|
218
214
|
for term in terms:
|
|
219
|
-
matching_terms =
|
|
215
|
+
matching_terms: list[MatchingTerm] = []
|
|
216
|
+
for col in [part.collection_id for part in specs.parts if part.kind == DrsPartKind.COLLECTION]:
|
|
217
|
+
matching_terms_in_col = projects.valid_term_in_collection(term, self.project_id, col)
|
|
218
|
+
for mtic in matching_terms_in_col:
|
|
219
|
+
matching_terms.append(mtic)
|
|
220
|
+
# matching_terms = projects.valid_term_in_project(term, self.project_id)
|
|
220
221
|
for matching_term in matching_terms:
|
|
221
222
|
if matching_term.collection_id not in collection_terms_mapping:
|
|
222
223
|
collection_terms_mapping[matching_term.collection_id] = set()
|
|
@@ -229,15 +230,20 @@ class DrsGenerator(DrsApplication):
|
|
|
229
230
|
if self.pedantic:
|
|
230
231
|
errors.extend(warnings)
|
|
231
232
|
warnings.clear()
|
|
232
|
-
return DrsGenerationReport(
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
return DrsGenerationReport(
|
|
234
|
+
project_id=self.project_id,
|
|
235
|
+
type=specs.type,
|
|
236
|
+
given_mapping_or_bag_of_terms=terms,
|
|
237
|
+
mapping_used=mapping,
|
|
238
|
+
generated_drs_expression=drs_expression,
|
|
239
|
+
errors=cast(list[GenerationError], errors),
|
|
240
|
+
warnings=cast(list[GenerationWarning], warnings),
|
|
241
|
+
)
|
|
237
242
|
|
|
238
243
|
@staticmethod
|
|
239
|
-
def _resolve_conflicts(
|
|
240
|
-
|
|
244
|
+
def _resolve_conflicts(
|
|
245
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
246
|
+
) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
|
|
241
247
|
warnings: list[GenerationIssue] = list()
|
|
242
248
|
conflicting_collection_ids_list: list[list[str]] = list()
|
|
243
249
|
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
@@ -247,13 +253,16 @@ class DrsGenerator(DrsApplication):
|
|
|
247
253
|
conflicting_collection_ids: list[str] = list()
|
|
248
254
|
for r_collection_index in range(l_collection_index + 1, len_collection_ids):
|
|
249
255
|
if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
|
|
250
|
-
|
|
256
|
+
collection_terms_mapping[collection_ids[r_collection_index]]
|
|
257
|
+
):
|
|
251
258
|
continue
|
|
252
259
|
else:
|
|
253
260
|
not_registered = True
|
|
254
261
|
for cc_ids in conflicting_collection_ids_list:
|
|
255
|
-
if
|
|
256
|
-
|
|
262
|
+
if (
|
|
263
|
+
collection_ids[l_collection_index] in cc_ids
|
|
264
|
+
and collection_ids[r_collection_index] in cc_ids
|
|
265
|
+
):
|
|
257
266
|
not_registered = False
|
|
258
267
|
break
|
|
259
268
|
if not_registered:
|
|
@@ -287,10 +296,12 @@ class DrsGenerator(DrsApplication):
|
|
|
287
296
|
# raise errors, remove the faulty collections and their term.
|
|
288
297
|
if collection_ids_with_len_eq_1_list:
|
|
289
298
|
for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
|
|
290
|
-
DrsGenerator._remove_ids_from_conflicts(
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
299
|
+
DrsGenerator._remove_ids_from_conflicts(
|
|
300
|
+
conflicting_collection_ids_list, collection_ids_to_be_removed
|
|
301
|
+
)
|
|
302
|
+
DrsGenerator._remove_term_from_other_term_sets(
|
|
303
|
+
collection_terms_mapping, collection_ids_to_be_removed
|
|
304
|
+
)
|
|
294
305
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
295
306
|
# as conflicting collections may be resolved.
|
|
296
307
|
continue
|
|
@@ -307,10 +318,8 @@ class DrsGenerator(DrsApplication):
|
|
|
307
318
|
warnings.append(issue)
|
|
308
319
|
# 3.b Update conflicting collections.
|
|
309
320
|
if wining_collection_ids:
|
|
310
|
-
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
311
|
-
|
|
312
|
-
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
313
|
-
wining_collection_ids)
|
|
321
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
322
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
314
323
|
# Every time conflicting_collection_ids_list is modified, we must restart the loop,
|
|
315
324
|
# as conflicting collections may be resolved.
|
|
316
325
|
continue
|
|
@@ -320,13 +329,15 @@ class DrsGenerator(DrsApplication):
|
|
|
320
329
|
wining_id_and_term_pairs: list[tuple[str, str]] = list()
|
|
321
330
|
for collection_ids in conflicting_collection_ids_list:
|
|
322
331
|
for collection_index in range(0, len(collection_ids)):
|
|
323
|
-
collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
|
|
324
|
-
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]
|
|
325
|
-
|
|
326
|
-
|
|
332
|
+
collection_set = collection_ids[collection_index + 1 :] + collection_ids[:collection_index]
|
|
333
|
+
diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
|
|
334
|
+
*[
|
|
335
|
+
collection_terms_mapping[index] # noqa E127
|
|
336
|
+
for index in collection_set
|
|
337
|
+
]
|
|
338
|
+
)
|
|
327
339
|
if len(diff) == 1:
|
|
328
|
-
wining_id_and_term_pairs.append((collection_ids[collection_index],
|
|
329
|
-
_get_first_item(diff)))
|
|
340
|
+
wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
|
|
330
341
|
# 4.b Update conflicting collections.
|
|
331
342
|
if wining_id_and_term_pairs:
|
|
332
343
|
wining_collection_ids = list()
|
|
@@ -336,18 +347,17 @@ class DrsGenerator(DrsApplication):
|
|
|
336
347
|
collection_terms_mapping[collection_id].add(term)
|
|
337
348
|
issue = AssignedTerm(collection_id=collection_id, term=term)
|
|
338
349
|
warnings.append(issue)
|
|
339
|
-
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
|
|
340
|
-
|
|
341
|
-
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
|
|
342
|
-
wining_collection_ids)
|
|
350
|
+
DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
|
|
351
|
+
DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
|
|
343
352
|
continue
|
|
344
353
|
else:
|
|
345
354
|
break # Stop the loop when no progress is made.
|
|
346
355
|
return collection_terms_mapping, warnings
|
|
347
356
|
|
|
348
357
|
@staticmethod
|
|
349
|
-
def _check_collection_terms_mapping(
|
|
350
|
-
|
|
358
|
+
def _check_collection_terms_mapping(
|
|
359
|
+
collection_terms_mapping: dict[str, set[str]],
|
|
360
|
+
) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
|
|
351
361
|
errors: list[GenerationIssue] = list()
|
|
352
362
|
# 1. Looking for collections that share strictly the same term(s).
|
|
353
363
|
collection_ids: list[str] = list(collection_terms_mapping.keys())
|
|
@@ -363,8 +373,7 @@ class DrsGenerator(DrsApplication):
|
|
|
363
373
|
if l_term_set and (not l_term_set.difference(r_term_set)):
|
|
364
374
|
not_registered = True
|
|
365
375
|
for faulty_collections in faulty_collections_list:
|
|
366
|
-
if l_collection_id in faulty_collections or
|
|
367
|
-
r_collection_id in faulty_collections:
|
|
376
|
+
if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
|
|
368
377
|
faulty_collections.add(l_collection_id)
|
|
369
378
|
faulty_collections.add(r_collection_id)
|
|
370
379
|
not_registered = False
|
|
@@ -373,8 +382,9 @@ class DrsGenerator(DrsApplication):
|
|
|
373
382
|
faulty_collections_list.append({l_collection_id, r_collection_id})
|
|
374
383
|
for faulty_collections in faulty_collections_list:
|
|
375
384
|
terms = collection_terms_mapping[_get_first_item(faulty_collections)]
|
|
376
|
-
issue = ConflictingCollections(
|
|
377
|
-
|
|
385
|
+
issue = ConflictingCollections(
|
|
386
|
+
collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
|
|
387
|
+
)
|
|
378
388
|
errors.append(issue)
|
|
379
389
|
for collection_id in faulty_collections:
|
|
380
390
|
del collection_terms_mapping[collection_id]
|
|
@@ -386,25 +396,28 @@ class DrsGenerator(DrsApplication):
|
|
|
386
396
|
if len_term_set == 1:
|
|
387
397
|
result[collection_id] = _get_first_item(term_set)
|
|
388
398
|
elif len_term_set > 1:
|
|
389
|
-
other_issue = TooManyTermCollection(
|
|
390
|
-
|
|
399
|
+
other_issue = TooManyTermCollection(
|
|
400
|
+
collection_id=collection_id, terms=_transform_set_and_sort(term_set)
|
|
401
|
+
)
|
|
391
402
|
errors.append(other_issue)
|
|
392
403
|
# else: Don't add emptied collection to the result.
|
|
393
404
|
return result, errors
|
|
394
405
|
|
|
395
406
|
@staticmethod
|
|
396
|
-
def _remove_term_from_other_term_sets(
|
|
397
|
-
|
|
407
|
+
def _remove_term_from_other_term_sets(
|
|
408
|
+
collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
|
|
409
|
+
) -> None:
|
|
398
410
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
399
411
|
# Should only be one term.
|
|
400
412
|
term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
|
|
401
413
|
for collection_id in collection_terms_mapping.keys():
|
|
402
|
-
if
|
|
414
|
+
if collection_id not in collection_ids_to_be_removed:
|
|
403
415
|
collection_terms_mapping[collection_id].discard(term_to_be_removed)
|
|
404
416
|
|
|
405
417
|
@staticmethod
|
|
406
|
-
def _remove_ids_from_conflicts(
|
|
407
|
-
|
|
418
|
+
def _remove_ids_from_conflicts(
|
|
419
|
+
conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
|
|
420
|
+
) -> None:
|
|
408
421
|
for collection_id_to_be_removed in collection_ids_to_be_removed:
|
|
409
422
|
for conflicting_collection_ids in conflicting_collection_ids_list:
|
|
410
423
|
if collection_id_to_be_removed in conflicting_collection_ids:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import json
|
|
3
|
+
from json import JSONEncoder
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Iterable
|
|
5
6
|
|
|
@@ -21,12 +22,12 @@ JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
|
|
|
21
22
|
JSON_INDENTATION = 2
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
def _process_plain(collection: PCollection, selected_field: str) ->
|
|
25
|
-
result:
|
|
25
|
+
def _process_plain(collection: PCollection, selected_field: str) -> set[str]:
|
|
26
|
+
result: set[str] = set()
|
|
26
27
|
for term in collection.terms:
|
|
27
28
|
if selected_field in term.specs:
|
|
28
29
|
value = term.specs[selected_field]
|
|
29
|
-
result.
|
|
30
|
+
result.add(value)
|
|
30
31
|
else:
|
|
31
32
|
raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
|
|
32
33
|
f'collection {collection.id}')
|
|
@@ -86,8 +87,8 @@ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextMa
|
|
|
86
87
|
return True
|
|
87
88
|
|
|
88
89
|
def _generate_attribute_property(self, attribute_name: str, source_collection: str,
|
|
89
|
-
selected_field: str) -> tuple[str, str |
|
|
90
|
-
property_value: str |
|
|
90
|
+
selected_field: str) -> tuple[str, str | set[str]]:
|
|
91
|
+
property_value: str | set[str]
|
|
91
92
|
property_key: str
|
|
92
93
|
if source_collection not in self.collections:
|
|
93
94
|
raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
|
|
@@ -113,9 +114,9 @@ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextMa
|
|
|
113
114
|
return property_key, property_value
|
|
114
115
|
|
|
115
116
|
def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
|
|
116
|
-
-> tuple[str, dict[str, str |
|
|
117
|
+
-> tuple[str, dict[str, str | set[str]]]:
|
|
117
118
|
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
118
|
-
attribute_properties: dict[str, str |
|
|
119
|
+
attribute_properties: dict[str, str | set[str]] = dict()
|
|
119
120
|
attribute_properties['type'] = attribute.value_type.value
|
|
120
121
|
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
121
122
|
attribute.source_collection,
|
|
@@ -124,9 +125,9 @@ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextMa
|
|
|
124
125
|
return attribute_key, attribute_properties
|
|
125
126
|
|
|
126
127
|
def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
|
|
127
|
-
-> tuple[str, dict[str, str |
|
|
128
|
+
-> tuple[str, dict[str, str | set[str]]]:
|
|
128
129
|
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
129
|
-
attribute_properties: dict[str, str |
|
|
130
|
+
attribute_properties: dict[str, str | set[str]] = dict()
|
|
130
131
|
attribute_properties['type'] = attribute.value_type.value
|
|
131
132
|
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
132
133
|
attribute.source_collection,
|
|
@@ -148,6 +149,14 @@ def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
|
|
|
148
149
|
json_root['definitions']['fields']['properties'][property[0]] = property[1]
|
|
149
150
|
|
|
150
151
|
|
|
152
|
+
class SetEncoder(JSONEncoder):
|
|
153
|
+
def default(self, o):
|
|
154
|
+
if isinstance(o, set):
|
|
155
|
+
return list(o)
|
|
156
|
+
else:
|
|
157
|
+
return super().default(self, o)
|
|
158
|
+
|
|
159
|
+
|
|
151
160
|
def generate_json_schema(project_id: str) -> str:
|
|
152
161
|
"""
|
|
153
162
|
Generate json schema for the given project.
|
|
@@ -169,17 +178,17 @@ def generate_json_schema(project_id: str) -> str:
|
|
|
169
178
|
JsonPropertiesVisitor(project_id) as visitor:
|
|
170
179
|
file_content = file.read()
|
|
171
180
|
root = json.loads(file_content)
|
|
172
|
-
properties: list[tuple[str, dict[str, str |
|
|
181
|
+
properties: list[tuple[str, dict[str, str | set[str]]]] = list()
|
|
173
182
|
for attribute_name, attribute in project_specs.global_attributes_specs.items():
|
|
174
183
|
attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
|
|
175
184
|
properties.append((attribute_key, attribute_properties))
|
|
176
185
|
_inject_properties(root, properties)
|
|
177
186
|
_inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
|
|
178
|
-
return json.dumps(root, indent=JSON_INDENTATION)
|
|
187
|
+
return json.dumps(root, indent=JSON_INDENTATION, cls=SetEncoder)
|
|
179
188
|
else:
|
|
180
189
|
raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
|
|
181
190
|
"are not provided")
|
|
182
191
|
else:
|
|
183
|
-
raise EsgvocNotFoundError(f"project '{project_id}' is not found")
|
|
192
|
+
raise EsgvocNotFoundError(f"specs of project '{project_id}' is not found")
|
|
184
193
|
else:
|
|
185
194
|
raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: esgvoc
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: python library and CLI to interact with WCRP CVs
|
|
5
5
|
Project-URL: Repository, https://github.com/ESGF/esgf-vocab
|
|
6
6
|
Author-email: Sébastien Gardoll <sebastien@gardoll.fr>, Guillaume Levavasseur <guillaume.levavasseur@ipsl.fr>, Laurent Troussellier <laurent.troussellier@ipsl.fr>
|
|
@@ -62,7 +62,6 @@ esgvoc install
|
|
|
62
62
|
|
|
63
63
|
```bash
|
|
64
64
|
pip install -e .
|
|
65
|
-
wily setup
|
|
66
65
|
pip install pre-commit
|
|
67
66
|
pre-commit install
|
|
68
67
|
```
|
|
@@ -71,6 +70,5 @@ pre-commit install
|
|
|
71
70
|
|
|
72
71
|
```bash
|
|
73
72
|
uv sync
|
|
74
|
-
uv run wily setup
|
|
75
73
|
uv run pre-commit install
|
|
76
74
|
```
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
esgvoc/__init__.py,sha256=
|
|
1
|
+
esgvoc/__init__.py,sha256=u3ucA4xC8eQ_WbPjLI1E9a3kvkwtU5n8TUNRaCFQMfs,66
|
|
2
2
|
esgvoc/api/__init__.py,sha256=w68CdVRS553bDWezZoCTxIFq_vsP7mFluSoO4yUo_Uc,4130
|
|
3
3
|
esgvoc/api/project_specs.py,sha256=ZvDAVn3-ZFpReCozK-_cVt6Sqkwrwww0X4vKUoxr1I4,5502
|
|
4
|
-
esgvoc/api/projects.py,sha256=
|
|
4
|
+
esgvoc/api/projects.py,sha256=LUasPF5cPSG1aD6vOkFwGNCMNSf5e--LdDtDkrMjcTU,56720
|
|
5
5
|
esgvoc/api/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
esgvoc/api/report.py,sha256=OlK5ApnaikMKmC6FyJ1uTSBeTezZe85yTCJwsk14uZE,3526
|
|
7
7
|
esgvoc/api/search.py,sha256=C4IRhfNezSV6ztwMXo-P8W_41qIPiO5nQXdU9etPy3k,7595
|
|
8
8
|
esgvoc/api/universe.py,sha256=i3bSIwUvO8S3COLvn_fz2K9Diegfeniccwx6QZXzIGc,22842
|
|
9
|
-
esgvoc/api/data_descriptors/__init__.py,sha256=
|
|
9
|
+
esgvoc/api/data_descriptors/__init__.py,sha256=JFdB-Qfzxbws2zlWRqq77TTQFIlb6MosXe7fv9zimDg,4420
|
|
10
10
|
esgvoc/api/data_descriptors/activity.py,sha256=uu7e-fNvk_0oOOrtVWujDIBbF88fvhqwUfqYS_2Fabs,621
|
|
11
11
|
esgvoc/api/data_descriptors/area_label.py,sha256=Vyny3nmESGLOTVhGCE1iJbdITpN_wvB_onKy44dsVRY,842
|
|
12
12
|
esgvoc/api/data_descriptors/branded_suffix.py,sha256=jliXbvygKjcxqipalRZT694nXGVUp0k4uAORzMX9B0I,822
|
|
@@ -29,6 +29,7 @@ esgvoc/api/data_descriptors/initialisation_index.py,sha256=VjgIHq1j7xoR5VvMW_eFR
|
|
|
29
29
|
esgvoc/api/data_descriptors/institution.py,sha256=dUqyMS_HsLz72dvE4-9ZTIoF3QBXDKASC3OKoFX4S8w,547
|
|
30
30
|
esgvoc/api/data_descriptors/known_branded_variable.py,sha256=IXeMgxkCPWssSuNU56O7GU6oOjJ-hju03ZKpDg0gz7Q,834
|
|
31
31
|
esgvoc/api/data_descriptors/license.py,sha256=BQK8GcbGYuXHSei_CxXlbUct3SM0G15waJDEc6jyr7o,180
|
|
32
|
+
esgvoc/api/data_descriptors/member_id.py,sha256=L9Kcbz6mtRZDqSJiIqQwTBoU8A4z8JgI3UTfn2ZiMo8,703
|
|
32
33
|
esgvoc/api/data_descriptors/mip_era.py,sha256=ubxwqJL8xPgCZu7bmjg-vvphBlG_aqogwE-ewu3lB2Q,176
|
|
33
34
|
esgvoc/api/data_descriptors/model_component.py,sha256=erKMHqSbZcVDsCPcSebIIMRTtgZToTdKEGQ8vB_zzYs,226
|
|
34
35
|
esgvoc/api/data_descriptors/obs_type.py,sha256=uVbxIMFoYs9ySJ-unhOoW0h0ljdWsBNwYfwmlXOSRe8,143
|
|
@@ -55,12 +56,11 @@ esgvoc/apps/__init__.py,sha256=Kyq36qRjvTWN7gu4_iFaLOjNUYvW0k1xp8bvkgJlQ5w,269
|
|
|
55
56
|
esgvoc/apps/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
57
|
esgvoc/apps/drs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
58
|
esgvoc/apps/drs/constants.py,sha256=rVWq1QQwAFgISjvl3YzJDLLPNUPXHpqgv66spmjyPMQ,96
|
|
58
|
-
esgvoc/apps/drs/generator.py,sha256=
|
|
59
|
+
esgvoc/apps/drs/generator.py,sha256=rFGuqbfAvYeWC9qQWqgI57Z48XZ3mhIC14XngVMTqJs,21667
|
|
59
60
|
esgvoc/apps/drs/report.py,sha256=ZRu5l6T-U-hqY7O3ZwAseYbWZPcJiMhJ2dpFKZJE3Gk,17371
|
|
60
61
|
esgvoc/apps/drs/validator.py,sha256=yNijdOPhF9adgZbya5Ugvs13GbL4MvgQepCT38A66vM,13825
|
|
61
62
|
esgvoc/apps/jsg/cmip6_template.json,sha256=KJHhr0FSrIVB5kXVt57k_KtvB3uhs9Xz5hoX8ajAZis,1916
|
|
62
|
-
esgvoc/apps/jsg/
|
|
63
|
-
esgvoc/apps/jsg/json_schema_generator.py,sha256=ByLalwtoqZKtWUgOmPKcgA7hoCLwUss994ViBSqrfXE,9058
|
|
63
|
+
esgvoc/apps/jsg/json_schema_generator.py,sha256=2Y8d3fnso_6b7aAO18_Zws4UoyJ2IRhT5kfDl_0ewGM,9274
|
|
64
64
|
esgvoc/cli/config.py,sha256=MNrpYzEM9gwqCzPUs-ZzFv6Tg-p0ySMGeBUzB0nXXo0,18714
|
|
65
65
|
esgvoc/cli/drs.py,sha256=PvVbLxef34A1IO600AFWOEWb5iLaWrBRHwwgMJ4u-PM,9237
|
|
66
66
|
esgvoc/cli/find.py,sha256=DxpEvSbQIJ3-XL-pgH5RicBzS3asjG2Cn_fJhjXKSoU,4497
|
|
@@ -88,8 +88,8 @@ esgvoc/core/service/esg_voc.py,sha256=5G0P4_xmQzoI_RG_agpq-yHoYYZx220P27v2nPrpyN
|
|
|
88
88
|
esgvoc/core/service/state.py,sha256=CGlVbmvW5WB6DKivzqz9i8PsMDKHGuNdIWyohQVdBhQ,11113
|
|
89
89
|
esgvoc/core/service/configuration/config_manager.py,sha256=K-gU3Kd-eJMunxDKOk4x72CRcyJ50IZXLfqQgyI9zTs,8282
|
|
90
90
|
esgvoc/core/service/configuration/setting.py,sha256=WJgo9ZjZJrTGR9WEBhp1d7ab0Yb2Y6XmnO1oImTPc2s,3042
|
|
91
|
-
esgvoc-1.0.
|
|
92
|
-
esgvoc-1.0.
|
|
93
|
-
esgvoc-1.0.
|
|
94
|
-
esgvoc-1.0.
|
|
95
|
-
esgvoc-1.0.
|
|
91
|
+
esgvoc-1.0.1.dist-info/METADATA,sha256=FKkE2Cw5w3QVFnkr4Wi6grYHeCS-lUvWXxm9D0GQFsg,2037
|
|
92
|
+
esgvoc-1.0.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
93
|
+
esgvoc-1.0.1.dist-info/entry_points.txt,sha256=ZXufSC7Jlx1lb52U6Buv9IitJMcqAAXOerR2V9DaIto,48
|
|
94
|
+
esgvoc-1.0.1.dist-info/licenses/LICENSE.txt,sha256=rWJoZt3vach8ZNdLq-Ee5djzCMFnJ1gIfBeJU5RIop4,21782
|
|
95
|
+
esgvoc-1.0.1.dist-info/RECORD,,
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"$id": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json#",
|
|
4
|
-
"title": "CMIP6Plus Extension",
|
|
5
|
-
"description": "STAC CMIP6Plus Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
-
"type": "object",
|
|
7
|
-
"required": [
|
|
8
|
-
"stac_extensions"
|
|
9
|
-
],
|
|
10
|
-
"properties": {
|
|
11
|
-
"stac_extensions": {
|
|
12
|
-
"type": "array",
|
|
13
|
-
"contains": {
|
|
14
|
-
"const": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json"
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
},
|
|
18
|
-
"oneOf": [
|
|
19
|
-
{
|
|
20
|
-
"$comment": "This is the schema for STAC Items.",
|
|
21
|
-
"type": "object",
|
|
22
|
-
"required": [
|
|
23
|
-
"type",
|
|
24
|
-
"properties"
|
|
25
|
-
],
|
|
26
|
-
"properties": {
|
|
27
|
-
"type": {
|
|
28
|
-
"const": "Feature"
|
|
29
|
-
},
|
|
30
|
-
"properties": {
|
|
31
|
-
"allOf": [
|
|
32
|
-
{
|
|
33
|
-
"$ref": "#/definitions/require_any"
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"$ref": "#/definitions/fields"
|
|
37
|
-
}
|
|
38
|
-
]
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
-
"type": "object",
|
|
45
|
-
"required": [
|
|
46
|
-
"type",
|
|
47
|
-
"summaries"
|
|
48
|
-
],
|
|
49
|
-
"properties": {
|
|
50
|
-
"type": {
|
|
51
|
-
"const": "Collection"
|
|
52
|
-
},
|
|
53
|
-
"summaries": {
|
|
54
|
-
"$ref": "#/definitions/require_any"
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
],
|
|
59
|
-
"definitions": {
|
|
60
|
-
"require_any": {
|
|
61
|
-
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
-
},
|
|
63
|
-
"fields": {
|
|
64
|
-
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
-
"type": "object",
|
|
66
|
-
"properties": {
|
|
67
|
-
},
|
|
68
|
-
"patternProperties": {
|
|
69
|
-
"^(?!cmip6plus:)": {}
|
|
70
|
-
},
|
|
71
|
-
"additionalProperties": false
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|