esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (87) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +95 -60
  3. esgvoc/api/data_descriptors/__init__.py +50 -28
  4. esgvoc/api/data_descriptors/activity.py +3 -3
  5. esgvoc/api/data_descriptors/area_label.py +16 -1
  6. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  8. esgvoc/api/data_descriptors/consortium.py +14 -13
  9. esgvoc/api/data_descriptors/contact.py +5 -0
  10. esgvoc/api/data_descriptors/conventions.py +6 -0
  11. esgvoc/api/data_descriptors/creation_date.py +5 -0
  12. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  13. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  14. esgvoc/api/data_descriptors/date.py +1 -1
  15. esgvoc/api/data_descriptors/directory_date.py +1 -1
  16. esgvoc/api/data_descriptors/experiment.py +13 -11
  17. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  18. esgvoc/api/data_descriptors/frequency.py +3 -3
  19. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  20. esgvoc/api/data_descriptors/grid_label.py +2 -2
  21. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  22. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  23. esgvoc/api/data_descriptors/institution.py +8 -5
  24. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  25. esgvoc/api/data_descriptors/license.py +3 -3
  26. esgvoc/api/data_descriptors/mip_era.py +1 -1
  27. esgvoc/api/data_descriptors/model_component.py +1 -1
  28. esgvoc/api/data_descriptors/obs_type.py +5 -0
  29. esgvoc/api/data_descriptors/organisation.py +1 -1
  30. esgvoc/api/data_descriptors/physic_index.py +1 -1
  31. esgvoc/api/data_descriptors/product.py +2 -2
  32. esgvoc/api/data_descriptors/publication_status.py +5 -0
  33. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  34. esgvoc/api/data_descriptors/realm.py +1 -1
  35. esgvoc/api/data_descriptors/region.py +5 -0
  36. esgvoc/api/data_descriptors/resolution.py +3 -3
  37. esgvoc/api/data_descriptors/source.py +9 -5
  38. esgvoc/api/data_descriptors/source_type.py +1 -1
  39. esgvoc/api/data_descriptors/table.py +3 -2
  40. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  41. esgvoc/api/data_descriptors/time_range.py +4 -3
  42. esgvoc/api/data_descriptors/title.py +5 -0
  43. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  44. esgvoc/api/data_descriptors/variable.py +25 -12
  45. esgvoc/api/data_descriptors/variant_label.py +3 -3
  46. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  47. esgvoc/api/project_specs.py +120 -4
  48. esgvoc/api/projects.py +733 -505
  49. esgvoc/api/py.typed +0 -0
  50. esgvoc/api/report.py +12 -8
  51. esgvoc/api/search.py +168 -98
  52. esgvoc/api/universe.py +368 -157
  53. esgvoc/apps/drs/constants.py +1 -1
  54. esgvoc/apps/drs/generator.py +51 -69
  55. esgvoc/apps/drs/report.py +60 -15
  56. esgvoc/apps/drs/validator.py +60 -71
  57. esgvoc/apps/jsg/cmip6_template.json +74 -0
  58. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  59. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  60. esgvoc/apps/py.typed +0 -0
  61. esgvoc/cli/config.py +500 -0
  62. esgvoc/cli/drs.py +3 -2
  63. esgvoc/cli/find.py +138 -0
  64. esgvoc/cli/get.py +46 -38
  65. esgvoc/cli/main.py +10 -3
  66. esgvoc/cli/status.py +27 -18
  67. esgvoc/cli/valid.py +10 -15
  68. esgvoc/core/constants.py +1 -1
  69. esgvoc/core/db/__init__.py +2 -4
  70. esgvoc/core/db/connection.py +5 -3
  71. esgvoc/core/db/models/project.py +57 -15
  72. esgvoc/core/db/models/universe.py +49 -10
  73. esgvoc/core/db/project_ingestion.py +79 -65
  74. esgvoc/core/db/universe_ingestion.py +71 -40
  75. esgvoc/core/exceptions.py +33 -0
  76. esgvoc/core/logging_handler.py +24 -2
  77. esgvoc/core/repo_fetcher.py +61 -59
  78. esgvoc/core/service/data_merger.py +47 -34
  79. esgvoc/core/service/state.py +107 -83
  80. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  81. esgvoc-1.0.0.dist-info/RECORD +95 -0
  82. esgvoc/api/_utils.py +0 -53
  83. esgvoc/core/logging.conf +0 -21
  84. esgvoc-0.3.0.dist-info/RECORD +0 -78
  85. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  86. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  87. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -2,17 +2,33 @@ from typing import cast
2
2
 
3
3
  import esgvoc.api.projects as projects
4
4
  import esgvoc.apps.drs.constants as constants
5
- from esgvoc.api import APIException
6
- from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPart,
7
- DrsPartKind, DrsSpecification, DrsType,
8
- ProjectSpecs)
9
- from esgvoc.apps.drs.report import (BlankTerm, ComplianceIssue, DrsIssue,
10
- DrsValidationReport, ExtraChar,
11
- ExtraSeparator, ExtraTerm,
12
- FileNameExtensionIssue, InvalidTerm,
13
- MissingTerm, ParsingIssue, Space,
14
- Unparsable, ValidationError,
15
- ValidationWarning)
5
+ from esgvoc.api.project_specs import (
6
+ DrsCollection,
7
+ DrsConstant,
8
+ DrsPart,
9
+ DrsPartKind,
10
+ DrsSpecification,
11
+ DrsType,
12
+ ProjectSpecs,
13
+ )
14
+ from esgvoc.apps.drs.report import (
15
+ BlankTerm,
16
+ ComplianceIssue,
17
+ DrsIssue,
18
+ DrsValidationReport,
19
+ ExtraChar,
20
+ ExtraSeparator,
21
+ ExtraTerm,
22
+ FileNameExtensionIssue,
23
+ InvalidTerm,
24
+ MissingTerm,
25
+ ParsingIssue,
26
+ Space,
27
+ Unparsable,
28
+ ValidationError,
29
+ ValidationWarning,
30
+ )
31
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError
16
32
 
17
33
 
18
34
  class DrsApplication:
@@ -25,9 +41,9 @@ class DrsApplication:
25
41
  """The project id."""
26
42
  self.pedantic: bool = pedantic
27
43
  """Same as the option of GCC: turn warnings into errors. Default False."""
28
- project_specs: ProjectSpecs|None = projects.find_project(project_id)
44
+ project_specs: ProjectSpecs | None = projects.get_project(project_id)
29
45
  if not project_specs:
30
- raise APIException(f'unable to find project {project_id}')
46
+ raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
31
47
  for specs in project_specs.drs_specs:
32
48
  match specs.type:
33
49
  case DrsType.DIRECTORY:
@@ -40,7 +56,7 @@ class DrsApplication:
40
56
  self.dataset_id_specs: DrsSpecification = specs
41
57
  """The DRS dataset id specs of the project."""
42
58
  case _:
43
- raise RuntimeError(f'unsupported DRS specs type {specs.type}')
59
+ raise EsgvocDbError(f"unsupported DRS specs type '{specs.type}'")
44
60
 
45
61
  def _get_full_file_name_extension(self) -> str:
46
62
  """
@@ -55,8 +71,8 @@ class DrsApplication:
55
71
  full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
56
72
  specs.properties[constants.FILE_NAME_EXTENSION_KEY]
57
73
  else:
58
- raise RuntimeError('missing properties in the DRS file name specifications of the ' +
59
- f'project {self.project_id}')
74
+ raise EsgvocDbError('missing properties in the DRS file name specifications of the ' +
75
+ f"project '{self.project_id}'")
60
76
  return full_extension
61
77
 
62
78
 
@@ -66,7 +82,7 @@ class DrsValidator(DrsApplication):
66
82
  """
67
83
 
68
84
  def validate_directory(self, drs_expression: str,
69
- prefix: str|None = None) -> DrsValidationReport:
85
+ prefix: str | None = None) -> DrsValidationReport:
70
86
  """
71
87
  Validate a DRS directory expression.
72
88
 
@@ -112,7 +128,7 @@ class DrsValidator(DrsApplication):
112
128
  [issue], [])
113
129
  return result
114
130
 
115
- def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
131
+ def validate(self, drs_expression: str, drs_type: DrsType | str) -> DrsValidationReport:
116
132
  """
117
133
  Validate a DRS expression.
118
134
 
@@ -131,14 +147,14 @@ class DrsValidator(DrsApplication):
131
147
  case DrsType.DATASET_ID:
132
148
  return self.validate_dataset_id(drs_expression=drs_expression)
133
149
  case _:
134
- raise RuntimeError(f'unsupported drs type {drs_type}')
150
+ raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
135
151
 
136
152
  def _parse(self,
137
153
  drs_expression: str,
138
154
  separator: str,
139
- drs_type: DrsType) -> tuple[list[str]|None, # terms
155
+ drs_type: DrsType) -> tuple[list[str] | None, # terms
140
156
  list[DrsIssue], # Errors
141
- list[DrsIssue]]: # Warnings
157
+ list[DrsIssue]]: # Warnings
142
158
  errors: list[DrsIssue] = list()
143
159
  warnings: list[DrsIssue] = list()
144
160
  cursor_offset = 0
@@ -160,7 +176,7 @@ class DrsValidator(DrsApplication):
160
176
  terms = drs_expression.split(separator)
161
177
  if len(terms) < 2:
162
178
  errors.append(Unparsable(expected_drs_type=drs_type))
163
- return None, errors, warnings # Early exit
179
+ return None, errors, warnings # Early exit
164
180
  max_term_index = len(terms)
165
181
  cursor_position = initial_cursor_position = len(drs_expression) + 1
166
182
  has_white_term = False
@@ -178,7 +194,10 @@ class DrsValidator(DrsApplication):
178
194
  column = cursor_position+cursor_offset
179
195
  if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
180
196
  issue = ExtraSeparator(column=column)
181
- warnings.append(issue)
197
+ if self.pedantic:
198
+ errors.append(issue)
199
+ else:
200
+ warnings.append(issue)
182
201
  else:
183
202
  issue = ExtraChar(column=column)
184
203
  errors.append(issue)
@@ -188,7 +207,7 @@ class DrsValidator(DrsApplication):
188
207
  if not term:
189
208
  column = cursor_position + cursor_offset
190
209
  issue = ExtraSeparator(column=column)
191
- if (drs_type != DrsType.DIRECTORY) or self.pedantic or (index == 0):
210
+ if self.pedantic or drs_type != DrsType.DIRECTORY or index == 0:
192
211
  errors.append(issue)
193
212
  else:
194
213
  warnings.append(issue)
@@ -200,10 +219,10 @@ class DrsValidator(DrsApplication):
200
219
  del terms[index]
201
220
  cursor_position -= len_term + 1
202
221
 
203
- # Mypy doesn't understand that ParsingIssues are DrsIssues...
204
- sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
205
- sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
206
- return terms, sorted_errors, sorted_warnings # type: ignore
222
+ # Mypy doesn't understand that ParsingIssues are DrsIssues...
223
+ sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
224
+ sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
225
+ return terms, sorted_errors, sorted_warnings # type: ignore
207
226
 
208
227
  @staticmethod
209
228
  def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
@@ -213,13 +232,9 @@ class DrsValidator(DrsApplication):
213
232
  match part.kind:
214
233
  case DrsPartKind.COLLECTION:
215
234
  casted_part: DrsCollection = cast(DrsCollection, part)
216
- try:
217
- matching_terms = projects.valid_term_in_collection(term,
218
- self.project_id,
219
- casted_part.collection_id)
220
- except Exception as e:
221
- msg = f'problem while validating term: {e}.Abort.'
222
- raise APIException(msg) from e
235
+ matching_terms = projects.valid_term_in_collection(term,
236
+ self.project_id,
237
+ casted_part.collection_id)
223
238
  if len(matching_terms) > 0:
224
239
  return True
225
240
  else:
@@ -228,7 +243,7 @@ class DrsValidator(DrsApplication):
228
243
  part_casted: DrsConstant = cast(DrsConstant, part)
229
244
  return part_casted.value != term
230
245
  case _:
231
- raise RuntimeError(f'unsupported DRS specs part type {part.kind}')
246
+ raise EsgvocDbError(f"unsupported DRS specs part type '{part.kind}'")
232
247
 
233
248
  def _create_report(self,
234
249
  type: DrsType,
@@ -245,7 +260,7 @@ class DrsValidator(DrsApplication):
245
260
  specs: DrsSpecification) -> DrsValidationReport:
246
261
  terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
247
262
  if not terms:
248
- return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
263
+ return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
249
264
  term_index = 0
250
265
  term_max_index = len(terms)
251
266
  part_index = 0
@@ -259,27 +274,27 @@ class DrsValidator(DrsApplication):
259
274
  part_index += 1
260
275
  matching_code_mapping[part.__str__()] = 0
261
276
  elif part.kind == DrsPartKind.CONSTANT or \
262
- cast(DrsCollection, part).is_required:
277
+ cast(DrsCollection, part).is_required: # noqa E127
263
278
  issue: ComplianceIssue = InvalidTerm(term=term,
264
- term_position=term_index+1,
265
- collection_id_or_constant_value=str(part))
279
+ term_position=term_index+1,
280
+ collection_id_or_constant_value=str(part))
266
281
  errors.append(issue)
267
282
  matching_code_mapping[part.__str__()] = 1
268
283
  term_index += 1
269
284
  part_index += 1
270
- else: # The part is not required so try to match the term with the next part.
285
+ else: # The part is not required so try to match the term with the next part.
271
286
  part_index += 1
272
287
  matching_code_mapping[part.__str__()] = -1
273
288
  if term_index == term_max_index:
274
289
  break
275
290
  # Cases:
276
291
  # - All terms and collections have been processed.
277
- # - Not enough term to process all collections.
292
+ # - Not enough term to process all collections.
278
293
  # - Extra terms left whereas all collections have been processed:
279
294
  # + The last collections are required => report extra terms.
280
295
  # + The last collections are not required and these terms were not validated by them.
281
296
  # => Should report error even if the collections are not required.
282
- if part_index < part_max_index: # Missing terms.
297
+ if part_index < part_max_index: # Missing terms.
283
298
  for index in range(part_index, part_max_index):
284
299
  part = specs.parts[index]
285
300
  issue = MissingTerm(collection_id=str(part), collection_position=index+1)
@@ -288,43 +303,17 @@ class DrsValidator(DrsApplication):
288
303
  errors.append(issue)
289
304
  else:
290
305
  warnings.append(issue)
291
- elif term_index < term_max_index: # Extra terms.
306
+ elif term_index < term_max_index: # Extra terms.
292
307
  part_index -= term_max_index - term_index
293
308
  for index in range(term_index, term_max_index):
294
309
  term = terms[index]
295
310
  part = specs.parts[part_index]
296
311
  if part.kind != DrsPartKind.CONSTANT and \
297
312
  (not cast(DrsCollection, part).is_required) and \
298
- matching_code_mapping[part.__str__()] < 0:
313
+ matching_code_mapping[part.__str__()] < 0: # noqa E125
299
314
  issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
300
315
  else:
301
316
  issue = ExtraTerm(term=term, term_position=index, collection_id=None)
302
317
  errors.append(issue)
303
318
  part_index += 1
304
319
  return self._create_report(specs.type, drs_expression, errors, warnings)
305
-
306
-
307
- if __name__ == "__main__":
308
- project_id = 'cmip6plus'
309
- validator = DrsValidator(project_id)
310
- drs_expressions = [
311
- ".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
312
- ]
313
- import time
314
- for drs_expression in drs_expressions:
315
- start_time = time.perf_counter_ns()
316
- report = validator.validate_dataset_id(drs_expression)
317
- stop_time = time.perf_counter_ns()
318
- print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
319
- if report.nb_errors > 0:
320
- print(f'error(s): {report.nb_errors}')
321
- for error in report.errors:
322
- print(error)
323
- else:
324
- print('error(s): 0')
325
- if report.nb_warnings > 0:
326
- print(f'warning(s): {report.nb_warnings}')
327
- for warning in report.warnings:
328
- print(warning)
329
- else:
330
- print('warning(s): 0')
@@ -0,0 +1,74 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
4
+ "title": "CMIP6 Extension",
5
+ "description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
6
+ "type": "object",
7
+ "required": [
8
+ "stac_extensions"
9
+ ],
10
+ "properties": {
11
+ "stac_extensions": {
12
+ "type": "array",
13
+ "contains": {
14
+ "const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
15
+ }
16
+ }
17
+ },
18
+ "oneOf": [
19
+ {
20
+ "$comment": "This is the schema for STAC Items.",
21
+ "type": "object",
22
+ "required": [
23
+ "type",
24
+ "properties"
25
+ ],
26
+ "properties": {
27
+ "type": {
28
+ "const": "Feature"
29
+ },
30
+ "properties": {
31
+ "allOf": [
32
+ {
33
+ "$ref": "#/definitions/require_any"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/fields"
37
+ }
38
+ ]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
44
+ "type": "object",
45
+ "required": [
46
+ "type",
47
+ "summaries"
48
+ ],
49
+ "properties": {
50
+ "type": {
51
+ "const": "Collection"
52
+ },
53
+ "summaries": {
54
+ "$ref": "#/definitions/require_any"
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "definitions": {
60
+ "require_any": {
61
+ "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
62
+ },
63
+ "fields": {
64
+ "$comment": " Don't require fields here, do that above in the corresponding schema.",
65
+ "type": "object",
66
+ "properties": {
67
+ },
68
+ "patternProperties": {
69
+ "^(?!cmip6:)": {}
70
+ },
71
+ "additionalProperties": false
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,74 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json#",
4
+ "title": "CMIP6Plus Extension",
5
+ "description": "STAC CMIP6Plus Extension for STAC Items and STAC Collection Summaries.",
6
+ "type": "object",
7
+ "required": [
8
+ "stac_extensions"
9
+ ],
10
+ "properties": {
11
+ "stac_extensions": {
12
+ "type": "array",
13
+ "contains": {
14
+ "const": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json"
15
+ }
16
+ }
17
+ },
18
+ "oneOf": [
19
+ {
20
+ "$comment": "This is the schema for STAC Items.",
21
+ "type": "object",
22
+ "required": [
23
+ "type",
24
+ "properties"
25
+ ],
26
+ "properties": {
27
+ "type": {
28
+ "const": "Feature"
29
+ },
30
+ "properties": {
31
+ "allOf": [
32
+ {
33
+ "$ref": "#/definitions/require_any"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/fields"
37
+ }
38
+ ]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
44
+ "type": "object",
45
+ "required": [
46
+ "type",
47
+ "summaries"
48
+ ],
49
+ "properties": {
50
+ "type": {
51
+ "const": "Collection"
52
+ },
53
+ "summaries": {
54
+ "$ref": "#/definitions/require_any"
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "definitions": {
60
+ "require_any": {
61
+ "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
62
+ },
63
+ "fields": {
64
+ "$comment": " Don't require fields here, do that above in the corresponding schema.",
65
+ "type": "object",
66
+ "properties": {
67
+ },
68
+ "patternProperties": {
69
+ "^(?!cmip6plus:)": {}
70
+ },
71
+ "additionalProperties": false
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,185 @@
1
+ import contextlib
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Iterable
5
+
6
+ from sqlmodel import Session
7
+
8
+ from esgvoc.api import projects, search
9
+ from esgvoc.api.project_specs import (
10
+ GlobalAttributeSpecBase,
11
+ GlobalAttributeSpecSpecific,
12
+ GlobalAttributeVisitor,
13
+ )
14
+ from esgvoc.core.constants import DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
15
+ from esgvoc.core.db.models.project import PCollection, TermKind
16
+ from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocNotImplementedError
17
+
18
+ KEY_SEPARATOR = ':'
19
+ JSON_SCHEMA_TEMPLATE_DIR_PATH = Path(__file__).parent
20
+ JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
21
+ JSON_INDENTATION = 2
22
+
23
+
24
+ def _process_plain(collection: PCollection, selected_field: str) -> list[str]:
25
+ result: list[str] = list()
26
+ for term in collection.terms:
27
+ if selected_field in term.specs:
28
+ value = term.specs[selected_field]
29
+ result.append(value)
30
+ else:
31
+ raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
32
+ f'collection {collection.id}')
33
+ return result
34
+
35
+
36
+ def _process_composite(collection: PCollection, universe_session: Session,
37
+ project_session: Session) -> str:
38
+ result = ""
39
+ for term in collection.terms:
40
+ _, parts = projects._get_composite_term_separator_parts(term)
41
+ for part in parts:
42
+ resolved_term = projects._resolve_term(part, universe_session, project_session)
43
+ if resolved_term.kind == TermKind.PATTERN:
44
+ result += resolved_term.specs[PATTERN_JSON_KEY]
45
+ else:
46
+ raise EsgvocNotImplementedError(f'{term.kind} term is not supported yet')
47
+ # Patterns terms are meant to be validated individually.
48
+ # So their regex are defined as a whole (begins by a ^, ends by a $).
49
+ # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
50
+ # The later, must be removed.
51
+ result = result.replace('^', '').replace('$', '')
52
+ result = f'^{result}$'
53
+ return result
54
+
55
+
56
+ def _process_pattern(collection: PCollection) -> str:
57
+ # The generation of the value of the field pattern for the collections with more than one term
58
+ # is not specified yet.
59
+ if len(collection.terms) == 1:
60
+ term = collection.terms[0]
61
+ return term.specs[PATTERN_JSON_KEY]
62
+ else:
63
+ msg = f"unsupported collection of term pattern with more than one term for '{collection.id}'"
64
+ raise EsgvocNotImplementedError(msg)
65
+
66
+
67
+ def _generate_attribute_key(project_id: str, attribute_name) -> str:
68
+ return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
69
+
70
+
71
+ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextManager):
72
+ def __init__(self, project_id: str) -> None:
73
+ self.project_id = project_id
74
+ # Project session can't be None here.
75
+ self.universe_session: Session = search.get_universe_session()
76
+ self.project_session: Session = projects._get_project_session_with_exception(project_id)
77
+ self.collections: dict[str, PCollection] = dict()
78
+ for collection in projects._get_all_collections_in_project(self.project_session):
79
+ self.collections[collection.id] = collection
80
+
81
+ def __exit__(self, exception_type, exception_value, exception_traceback):
82
+ self.project_session.close()
83
+ self.universe_session.close()
84
+ if exception_type is not None:
85
+ raise exception_value
86
+ return True
87
+
88
+ def _generate_attribute_property(self, attribute_name: str, source_collection: str,
89
+ selected_field: str) -> tuple[str, str | list[str]]:
90
+ property_value: str | list[str]
91
+ property_key: str
92
+ if source_collection not in self.collections:
93
+ raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
94
+ f"{attribute_name} is not found")
95
+ collection = self.collections[source_collection]
96
+ match collection.term_kind:
97
+ case TermKind.PLAIN:
98
+ property_value = _process_plain(collection=collection,
99
+ selected_field=selected_field)
100
+ property_key = 'enum'
101
+ case TermKind.COMPOSITE:
102
+ property_value = _process_composite(collection=collection,
103
+ universe_session=self.universe_session,
104
+ project_session=self.project_session)
105
+ property_key = 'pattern'
106
+ case TermKind.PATTERN:
107
+ property_value = _process_pattern(collection)
108
+ property_key = 'pattern'
109
+ case _:
110
+ msg = f"unsupported term kind '{collection.term_kind}' " + \
111
+ f"for global attribute {attribute_name}"
112
+ raise EsgvocNotImplementedError(msg)
113
+ return property_key, property_value
114
+
115
+ def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
116
+ -> tuple[str, dict[str, str | list[str]]]:
117
+ attribute_key = _generate_attribute_key(self.project_id, attribute_name)
118
+ attribute_properties: dict[str, str | list[str]] = dict()
119
+ attribute_properties['type'] = attribute.value_type.value
120
+ property_key, property_value = self._generate_attribute_property(attribute_name,
121
+ attribute.source_collection,
122
+ DRS_SPECS_JSON_KEY)
123
+ attribute_properties[property_key] = property_value
124
+ return attribute_key, attribute_properties
125
+
126
+ def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
127
+ -> tuple[str, dict[str, str | list[str]]]:
128
+ attribute_key = _generate_attribute_key(self.project_id, attribute_name)
129
+ attribute_properties: dict[str, str | list[str]] = dict()
130
+ attribute_properties['type'] = attribute.value_type.value
131
+ property_key, property_value = self._generate_attribute_property(attribute_name,
132
+ attribute.source_collection,
133
+ attribute.specific_key)
134
+ attribute_properties[property_key] = property_value
135
+ return attribute_key, attribute_properties
136
+
137
+
138
+ def _inject_global_attributes(json_root: dict, project_id: str, attribute_names: Iterable[str]) -> None:
139
+ attribute_properties = list()
140
+ for attribute_name in attribute_names:
141
+ attribute_key = _generate_attribute_key(project_id, attribute_name)
142
+ attribute_properties.append({"required": [attribute_key]})
143
+ json_root['definitions']['require_any']['anyOf'] = attribute_properties
144
+
145
+
146
+ def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
147
+ for property in properties:
148
+ json_root['definitions']['fields']['properties'][property[0]] = property[1]
149
+
150
+
151
+ def generate_json_schema(project_id: str) -> str:
152
+ """
153
+ Generate json schema for the given project.
154
+
155
+ :param project_id: The id of the given project.
156
+ :type project_id: str
157
+ :returns: The content of a json schema
158
+ :rtype: str
159
+ :raises EsgvocNotFoundError: On missing information
160
+ :raises EsgvocNotImplementedError: On unexpected operations
161
+ """
162
+ file_name = JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE.format(project_id=project_id)
163
+ template_file_path = JSON_SCHEMA_TEMPLATE_DIR_PATH.joinpath(file_name)
164
+ if template_file_path.exists():
165
+ project_specs = projects.get_project(project_id)
166
+ if project_specs:
167
+ if project_specs.global_attributes_specs:
168
+ with open(file=template_file_path, mode='r') as file, \
169
+ JsonPropertiesVisitor(project_id) as visitor:
170
+ file_content = file.read()
171
+ root = json.loads(file_content)
172
+ properties: list[tuple[str, dict[str, str | list[str]]]] = list()
173
+ for attribute_name, attribute in project_specs.global_attributes_specs.items():
174
+ attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
175
+ properties.append((attribute_key, attribute_properties))
176
+ _inject_properties(root, properties)
177
+ _inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
178
+ return json.dumps(root, indent=JSON_INDENTATION)
179
+ else:
180
+ raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
181
+ "are not provided")
182
+ else:
183
+ raise EsgvocNotFoundError(f"project '{project_id}' is not found")
184
+ else:
185
+ raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")
esgvoc/apps/py.typed ADDED
File without changes