esgvoc 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (41) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +0 -6
  3. esgvoc/api/data_descriptors/__init__.py +6 -0
  4. esgvoc/api/data_descriptors/archive.py +5 -0
  5. esgvoc/api/data_descriptors/citation_url.py +5 -0
  6. esgvoc/api/data_descriptors/experiment.py +2 -2
  7. esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
  8. esgvoc/api/data_descriptors/regex.py +5 -0
  9. esgvoc/api/data_descriptors/vertical_label.py +2 -2
  10. esgvoc/api/project_specs.py +48 -130
  11. esgvoc/api/projects.py +104 -63
  12. esgvoc/apps/drs/generator.py +47 -42
  13. esgvoc/apps/drs/validator.py +22 -38
  14. esgvoc/apps/jsg/json_schema_generator.py +252 -136
  15. esgvoc/apps/jsg/templates/template.jinja +249 -0
  16. esgvoc/apps/test_cv/README.md +214 -0
  17. esgvoc/apps/test_cv/cv_tester.py +1368 -0
  18. esgvoc/apps/test_cv/example_usage.py +216 -0
  19. esgvoc/apps/vr/__init__.py +12 -0
  20. esgvoc/apps/vr/build_variable_registry.py +71 -0
  21. esgvoc/apps/vr/example_usage.py +60 -0
  22. esgvoc/apps/vr/vr_app.py +333 -0
  23. esgvoc/cli/config.py +671 -86
  24. esgvoc/cli/drs.py +39 -21
  25. esgvoc/cli/main.py +2 -0
  26. esgvoc/cli/test_cv.py +257 -0
  27. esgvoc/core/constants.py +10 -7
  28. esgvoc/core/data_handler.py +24 -22
  29. esgvoc/core/db/connection.py +7 -0
  30. esgvoc/core/db/project_ingestion.py +34 -9
  31. esgvoc/core/db/universe_ingestion.py +1 -2
  32. esgvoc/core/service/configuration/setting.py +192 -21
  33. esgvoc/core/service/data_merger.py +1 -1
  34. esgvoc/core/service/state.py +18 -2
  35. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -1
  36. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/RECORD +40 -29
  37. esgvoc/apps/jsg/cmip6_template.json +0 -74
  38. /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
  39. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
  40. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
  41. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,75 +1,145 @@
1
- import contextlib
2
1
  import json
3
- from json import JSONEncoder
2
+ from dataclasses import dataclass
3
+ from itertools import product
4
4
  from pathlib import Path
5
- from typing import Iterable
5
+ from typing import Sequence
6
6
 
7
+ from jinja2 import Environment, FileSystemLoader
7
8
  from sqlmodel import Session
8
9
 
9
10
  from esgvoc.api import projects, search
10
- from esgvoc.api.project_specs import (
11
- GlobalAttributeSpecBase,
12
- GlobalAttributeSpecSpecific,
13
- GlobalAttributeVisitor,
14
- )
11
+ from esgvoc.api.project_specs import CatalogProperty, DrsType
15
12
  from esgvoc.core.constants import DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
16
- from esgvoc.core.db.models.project import PCollection, TermKind
17
- from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocNotImplementedError
13
+ from esgvoc.core.db.models.project import PCollection, PTerm, TermKind
14
+ from esgvoc.core.db.models.universe import UTerm
15
+ from esgvoc.core.exceptions import EsgvocException, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
18
16
 
19
17
  KEY_SEPARATOR = ':'
20
- JSON_SCHEMA_TEMPLATE_DIR_PATH = Path(__file__).parent
21
- JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
18
+ TEMPLATE_DIR_NAME = 'templates'
19
+ TEMPLATE_DIR_PATH = Path(__file__).parent.joinpath(TEMPLATE_DIR_NAME)
20
+ TEMPLATE_FILE_NAME = 'template.jinja'
22
21
  JSON_INDENTATION = 2
23
22
 
24
23
 
25
- def _process_plain(collection: PCollection, selected_field: str) -> set[str]:
26
- result: set[str] = set()
24
+ @dataclass
25
+ class _CatalogProperty:
26
+ field_name: str
27
+ field_value: dict
28
+ is_required: bool
29
+
30
+
31
+ def _process_col_plain_terms(collection: PCollection, source_collection_key: str) -> tuple[str, list[str]]:
32
+ property_values: set[str] = set()
27
33
  for term in collection.terms:
28
- if selected_field in term.specs:
29
- value = term.specs[selected_field]
30
- result.add(value)
31
- else:
32
- raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
33
- f'collection {collection.id}')
34
- return result
34
+ property_key, property_value = _process_plain_term(term, source_collection_key)
35
+ property_values.add(property_value)
36
+ return property_key, list(property_values) # type: ignore
37
+
38
+
39
+ def _process_plain_term(term: PTerm, source_collection_key: str) -> tuple[str, str]:
40
+ if source_collection_key in term.specs:
41
+ property_value = term.specs[source_collection_key]
42
+ else:
43
+ raise EsgvocNotFoundError(f'missing key {source_collection_key} for term {term.id} in ' +
44
+ f'collection {term.collection.id}')
45
+ return 'enum', property_value
35
46
 
36
47
 
37
- def _process_composite(collection: PCollection, universe_session: Session,
38
- project_session: Session) -> str:
39
- result = ""
48
+ def _process_col_composite_terms(collection: PCollection, universe_session: Session,
49
+ project_session: Session) -> tuple[str, list[str | dict], bool]:
50
+ result: list[str | dict] = list()
51
+ property_key = ""
52
+ has_pattern = False
40
53
  for term in collection.terms:
41
- _, parts = projects._get_composite_term_separator_parts(term)
42
- for part in parts:
43
- resolved_term = projects._resolve_term(part, universe_session, project_session)
44
- if resolved_term.kind == TermKind.PATTERN:
45
- result += resolved_term.specs[PATTERN_JSON_KEY]
46
- else:
47
- raise EsgvocNotImplementedError(f'{term.kind} term is not supported yet')
48
- # Patterns terms are meant to be validated individually.
49
- # So their regex are defined as a whole (begins by a ^, ends by a $).
50
- # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
51
- # The later, must be removed.
52
- result = result.replace('^', '').replace('$', '')
53
- result = f'^{result}$'
54
- return result
54
+ property_key, property_value, _has_pattern = _process_composite_term(term, universe_session,
55
+ project_session)
56
+ if isinstance(property_value, list):
57
+ result.extend(property_value)
58
+ else:
59
+ result.append(property_value)
60
+ has_pattern |= _has_pattern
61
+ return property_key, result, has_pattern
62
+
55
63
 
64
+ def _inner_process_composite_term(resolved_term: UTerm | PTerm,
65
+ universe_session: Session,
66
+ project_session: Session) -> tuple[str | list, bool]:
67
+ is_pattern = False
68
+ match resolved_term.kind:
69
+ case TermKind.PLAIN:
70
+ result = resolved_term.specs[DRS_SPECS_JSON_KEY]
71
+ case TermKind.PATTERN:
72
+ result = resolved_term.specs[PATTERN_JSON_KEY].replace('^', '').replace('$', '')
73
+ is_pattern = True
74
+ case TermKind.COMPOSITE:
75
+ _, result, is_pattern = _process_composite_term(resolved_term, universe_session,
76
+ project_session)
77
+ case _:
78
+ msg = f"unsupported term kind '{resolved_term.kind}'"
79
+ raise EsgvocNotImplementedError(msg)
80
+ return result, is_pattern
56
81
 
57
- def _process_pattern(collection: PCollection) -> str:
82
+
83
+ def _accumulate_resolved_part(resolved_part: list,
84
+ resolved_term: UTerm | PTerm,
85
+ universe_session: Session,
86
+ project_session: Session) -> bool:
87
+ tmp, has_pattern = _inner_process_composite_term(resolved_term, universe_session,
88
+ project_session)
89
+ if isinstance(tmp, list):
90
+ resolved_part.extend(tmp)
91
+ else:
92
+ resolved_part.append(tmp)
93
+ return has_pattern
94
+
95
+
96
+ def _process_composite_term(term: UTerm | PTerm, universe_session: Session,
97
+ project_session: Session) -> tuple[str, list[str | dict], bool]:
98
+ resolved_parts = list()
99
+ separator, parts = projects._get_composite_term_separator_parts(term)
100
+ has_pattern = False
101
+ for part in parts:
102
+ resolved_term = projects._resolve_composite_term_part(part, universe_session, project_session)
103
+ resolved_part = list()
104
+ if isinstance(resolved_term, Sequence):
105
+ for r_term in resolved_term:
106
+ has_pattern |= _accumulate_resolved_part(resolved_part, r_term, universe_session,
107
+ project_session)
108
+ else:
109
+ has_pattern = _accumulate_resolved_part(resolved_part, resolved_term, universe_session,
110
+ project_session)
111
+ resolved_parts.append(resolved_part)
112
+ property_values: list[str | dict] = list()
113
+ for combination in product(*resolved_parts):
114
+ # Patterns terms are meant to be validated individually.
115
+ # So their regex are defined as a whole (begins by a ^, ends by a $).
116
+ # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
117
+ # The later, must be removed.
118
+ tmp = separator.join(combination)
119
+ if has_pattern:
120
+ tmp = f'^{tmp}$'
121
+ tmp = {'pattern': tmp}
122
+ property_values.append(tmp)
123
+ property_key = 'anyOf' if has_pattern else 'enum'
124
+ return property_key, property_values, has_pattern
125
+
126
+
127
+ def _process_col_pattern_terms(collection: PCollection) -> tuple[str, str]:
58
128
  # The generation of the value of the field pattern for the collections with more than one term
59
129
  # is not specified yet.
60
130
  if len(collection.terms) == 1:
61
131
  term = collection.terms[0]
62
- return term.specs[PATTERN_JSON_KEY]
132
+ return _process_pattern_term(term)
63
133
  else:
64
134
  msg = f"unsupported collection of term pattern with more than one term for '{collection.id}'"
65
135
  raise EsgvocNotImplementedError(msg)
66
136
 
67
137
 
68
- def _generate_attribute_key(project_id: str, attribute_name) -> str:
69
- return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
138
+ def _process_pattern_term(term: PTerm) -> tuple[str, str]:
139
+ return 'pattern', term.specs[PATTERN_JSON_KEY]
70
140
 
71
141
 
72
- class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextManager):
142
+ class CatalogPropertiesJsonTranslator:
73
143
  def __init__(self, project_id: str) -> None:
74
144
  self.project_id = project_id
75
145
  # Project session can't be None here.
@@ -86,109 +156,155 @@ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextMa
86
156
  raise exception_value
87
157
  return True
88
158
 
89
- def _generate_attribute_property(self, attribute_name: str, source_collection: str,
90
- selected_field: str) -> tuple[str, str | set[str]]:
91
- property_value: str | set[str]
92
- property_key: str
93
- if source_collection not in self.collections:
94
- raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
95
- f"{attribute_name} is not found")
96
- collection = self.collections[source_collection]
97
- match collection.term_kind:
98
- case TermKind.PLAIN:
99
- property_value = _process_plain(collection=collection,
100
- selected_field=selected_field)
101
- property_key = 'enum'
102
- case TermKind.COMPOSITE:
103
- property_value = _process_composite(collection=collection,
104
- universe_session=self.universe_session,
105
- project_session=self.project_session)
106
- property_key = 'pattern'
107
- case TermKind.PATTERN:
108
- property_value = _process_pattern(collection)
109
- property_key = 'pattern'
110
- case _:
111
- msg = f"unsupported term kind '{collection.term_kind}' " + \
112
- f"for global attribute {attribute_name}"
113
- raise EsgvocNotImplementedError(msg)
159
+ def _translate_property_value(self, catalog_property: CatalogProperty) \
160
+ -> tuple[str, str | list[str] | list[str | dict]]:
161
+ property_value: str | list[str] | list[str | dict]
162
+ if catalog_property.source_collection not in self.collections:
163
+ raise EsgvocNotFoundError(f"collection '{catalog_property.source_collection}' is not found")
164
+
165
+ if catalog_property.source_collection_key is None:
166
+ source_collection_key = DRS_SPECS_JSON_KEY
167
+ else:
168
+ source_collection_key = catalog_property.source_collection_key
169
+
170
+ if catalog_property.source_collection_term is None:
171
+ collection = self.collections[catalog_property.source_collection]
172
+ match collection.term_kind:
173
+ case TermKind.PLAIN:
174
+ property_key, property_value = _process_col_plain_terms(
175
+ collection=collection,
176
+ source_collection_key=source_collection_key)
177
+ case TermKind.COMPOSITE:
178
+ property_key, property_value, _ = _process_col_composite_terms(
179
+ collection=collection,
180
+ universe_session=self.universe_session,
181
+ project_session=self.project_session)
182
+ case TermKind.PATTERN:
183
+ property_key, property_value = _process_col_pattern_terms(collection)
184
+ case _:
185
+ msg = f"unsupported term kind '{collection.term_kind}'"
186
+ raise EsgvocNotImplementedError(msg)
187
+ else:
188
+ pterm_found = projects._get_term_in_collection(
189
+ session=self.project_session,
190
+ collection_id=catalog_property.source_collection,
191
+ term_id=catalog_property.source_collection_term)
192
+ if pterm_found is None:
193
+ raise EsgvocValueError(f"term '{catalog_property.source_collection_term}' is not " +
194
+ f"found in collection '{catalog_property.source_collection}'")
195
+ match pterm_found.kind:
196
+ case TermKind.PLAIN:
197
+ property_key, property_value = _process_plain_term(
198
+ term=pterm_found,
199
+ source_collection_key=source_collection_key)
200
+ case TermKind.COMPOSITE:
201
+ property_key, property_value, _ = _process_composite_term(
202
+ term=pterm_found,
203
+ universe_session=self.universe_session,
204
+ project_session=self.project_session)
205
+ case TermKind.PATTERN:
206
+ property_key, property_value = _process_pattern_term(term=pterm_found)
207
+ case _:
208
+ msg = f"unsupported term kind '{pterm_found.kind}'"
209
+ raise EsgvocNotImplementedError(msg)
114
210
  return property_key, property_value
115
211
 
116
- def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
117
- -> tuple[str, dict[str, str | set[str]]]:
118
- attribute_key = _generate_attribute_key(self.project_id, attribute_name)
119
- attribute_properties: dict[str, str | set[str]] = dict()
120
- attribute_properties['type'] = attribute.value_type.value
121
- property_key, property_value = self._generate_attribute_property(attribute_name,
122
- attribute.source_collection,
123
- DRS_SPECS_JSON_KEY)
124
- attribute_properties[property_key] = property_value
125
- return attribute_key, attribute_properties
126
-
127
- def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
128
- -> tuple[str, dict[str, str | set[str]]]:
129
- attribute_key = _generate_attribute_key(self.project_id, attribute_name)
130
- attribute_properties: dict[str, str | set[str]] = dict()
131
- attribute_properties['type'] = attribute.value_type.value
132
- property_key, property_value = self._generate_attribute_property(attribute_name,
133
- attribute.source_collection,
134
- attribute.specific_key)
135
- attribute_properties[property_key] = property_value
136
- return attribute_key, attribute_properties
137
-
138
-
139
- def _inject_global_attributes(json_root: dict, project_id: str, attribute_names: Iterable[str]) -> None:
140
- attribute_properties = list()
141
- for attribute_name in attribute_names:
142
- attribute_key = _generate_attribute_key(project_id, attribute_name)
143
- attribute_properties.append({"required": [attribute_key]})
144
- json_root['definitions']['require_any']['anyOf'] = attribute_properties
145
-
146
-
147
- def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
148
- for property in properties:
149
- json_root['definitions']['fields']['properties'][property[0]] = property[1]
150
-
151
-
152
- class SetEncoder(JSONEncoder):
153
- def default(self, o):
154
- if isinstance(o, set):
155
- return list(o)
212
+ def translate_property(self, catalog_property: CatalogProperty) -> _CatalogProperty:
213
+ property_key, property_value = self._translate_property_value(catalog_property)
214
+ field_value = dict()
215
+ if 'array' in catalog_property.catalog_field_value_type:
216
+ field_value['type'] = 'array'
217
+ root_property = dict()
218
+ field_value['items'] = root_property
219
+ root_property['type'] = catalog_property.catalog_field_value_type.split('_')[0]
220
+ root_property['minItems'] = 1
221
+ else:
222
+ field_value['type'] = catalog_property.catalog_field_value_type
223
+ root_property = field_value
224
+
225
+ root_property[property_key] = property_value
226
+
227
+ if catalog_property.catalog_field_name is None:
228
+ attribute_name = catalog_property.source_collection
156
229
  else:
157
- return super().default(self, o)
230
+ attribute_name = catalog_property.catalog_field_name
231
+ field_name = CatalogPropertiesJsonTranslator._translate_field_name(self.project_id,
232
+ attribute_name)
233
+ return _CatalogProperty(field_name=field_name,
234
+ field_value=field_value,
235
+ is_required=catalog_property.is_required)
236
+
237
+ @staticmethod
238
+ def _translate_field_name(project_id: str, attribute_name) -> str:
239
+ return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
240
+
241
+
242
+ def _catalog_properties_json_processor(property_translator: CatalogPropertiesJsonTranslator,
243
+ properties: list[CatalogProperty]) -> list[_CatalogProperty]:
244
+ result: list[_CatalogProperty] = list()
245
+ for dataset_property_spec in properties:
246
+ catalog_property = property_translator.translate_property(dataset_property_spec)
247
+ result.append(catalog_property)
248
+ return result
158
249
 
159
250
 
160
- def generate_json_schema(project_id: str) -> str:
251
+ def generate_json_schema(project_id: str) -> dict:
161
252
  """
162
253
  Generate json schema for the given project.
163
254
 
164
255
  :param project_id: The id of the given project.
165
256
  :type project_id: str
166
- :returns: The content of a json schema
167
- :rtype: str
168
- :raises EsgvocNotFoundError: On missing information
169
- :raises EsgvocNotImplementedError: On unexpected operations
257
+ :returns: The root node of a json schema.
258
+ :rtype: dict
259
+ :raises EsgvocValueError: On wrong information in catalog_specs.
260
+ :raises EsgvocNotFoundError: On missing information in catalog_specs.
261
+ :raises EsgvocNotImplementedError: On unexpected operations resulted in wrong information in catalog_specs).
262
+ :raises EsgvocException: On json compliance error.
170
263
  """
171
- file_name = JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE.format(project_id=project_id)
172
- template_file_path = JSON_SCHEMA_TEMPLATE_DIR_PATH.joinpath(file_name)
173
- if template_file_path.exists():
174
- project_specs = projects.get_project(project_id)
175
- if project_specs:
176
- if project_specs.global_attributes_specs:
177
- with open(file=template_file_path, mode='r') as file, \
178
- JsonPropertiesVisitor(project_id) as visitor:
179
- file_content = file.read()
180
- root = json.loads(file_content)
181
- properties: list[tuple[str, dict[str, str | set[str]]]] = list()
182
- for attribute_name, attribute in project_specs.global_attributes_specs.items():
183
- attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
184
- properties.append((attribute_key, attribute_properties))
185
- _inject_properties(root, properties)
186
- _inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
187
- return json.dumps(root, indent=JSON_INDENTATION, cls=SetEncoder)
188
- else:
189
- raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
190
- "are not provided")
264
+ project_specs = projects.get_project(project_id)
265
+ if project_specs is not None:
266
+ catalog_specs = project_specs.catalog_specs
267
+ if catalog_specs is not None:
268
+ env = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) # noqa: S701
269
+ template = env.get_template(TEMPLATE_FILE_NAME)
270
+
271
+ file_extension_version = catalog_specs.catalog_properties.extensions[0].version
272
+ drs_dataset_id_regex = project_specs.drs_specs[DrsType.DATASET_ID].regex
273
+ property_translator = CatalogPropertiesJsonTranslator(project_id)
274
+ catalog_dataset_properties = \
275
+ _catalog_properties_json_processor(property_translator,
276
+ catalog_specs.dataset_properties)
277
+
278
+ catalog_file_properties = \
279
+ _catalog_properties_json_processor(property_translator,
280
+ catalog_specs.file_properties)
281
+ del property_translator
282
+ json_raw_str = template.render(project_id=project_id,
283
+ catalog_version=catalog_specs.version,
284
+ file_extension_version=file_extension_version,
285
+ drs_dataset_id_regex=drs_dataset_id_regex,
286
+ catalog_dataset_properties=catalog_dataset_properties,
287
+ catalog_file_properties=catalog_file_properties)
288
+ # Json compliance checking.
289
+ try:
290
+ result = json.loads(json_raw_str)
291
+ return result
292
+ except Exception as e:
293
+ raise EsgvocException(f'unable to produce schema compliant to JSON: {e}') from e
191
294
  else:
192
- raise EsgvocNotFoundError(f"specs of project '{project_id}' is not found")
295
+ raise EsgvocNotFoundError(f"catalog properties for the project '{project_id}' " +
296
+ "are missing")
193
297
  else:
194
- raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")
298
+ raise EsgvocNotFoundError(f"unknown project '{project_id}'")
299
+
300
+
301
+ def pretty_print_json_node(obj: dict) -> str:
302
+ """
303
+ Serialize a dictionary into json format.
304
+
305
+ :param obj: The dictionary.
306
+ :type obj: dict
307
+ :returns: a string that represents the dictionary in json format.
308
+ :rtype: str
309
+ """
310
+ return json.dumps(obj, indent=JSON_INDENTATION)