esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (73) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +50 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/mip_era.py +1 -1
  26. esgvoc/api/data_descriptors/model_component.py +1 -1
  27. esgvoc/api/data_descriptors/obs_type.py +5 -0
  28. esgvoc/api/data_descriptors/organisation.py +1 -1
  29. esgvoc/api/data_descriptors/physic_index.py +1 -1
  30. esgvoc/api/data_descriptors/product.py +2 -2
  31. esgvoc/api/data_descriptors/publication_status.py +5 -0
  32. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  33. esgvoc/api/data_descriptors/realm.py +1 -1
  34. esgvoc/api/data_descriptors/region.py +5 -0
  35. esgvoc/api/data_descriptors/resolution.py +3 -3
  36. esgvoc/api/data_descriptors/source.py +9 -5
  37. esgvoc/api/data_descriptors/source_type.py +1 -1
  38. esgvoc/api/data_descriptors/table.py +3 -2
  39. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  40. esgvoc/api/data_descriptors/time_range.py +4 -3
  41. esgvoc/api/data_descriptors/title.py +5 -0
  42. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  43. esgvoc/api/data_descriptors/variable.py +25 -12
  44. esgvoc/api/data_descriptors/variant_label.py +3 -3
  45. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  46. esgvoc/api/project_specs.py +117 -2
  47. esgvoc/api/projects.py +242 -279
  48. esgvoc/api/search.py +30 -3
  49. esgvoc/api/universe.py +42 -27
  50. esgvoc/apps/jsg/cmip6_template.json +74 -0
  51. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  52. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  53. esgvoc/cli/config.py +500 -0
  54. esgvoc/cli/find.py +138 -0
  55. esgvoc/cli/get.py +43 -38
  56. esgvoc/cli/main.py +10 -3
  57. esgvoc/cli/status.py +27 -18
  58. esgvoc/cli/valid.py +10 -15
  59. esgvoc/core/db/models/project.py +11 -11
  60. esgvoc/core/db/models/universe.py +3 -3
  61. esgvoc/core/db/project_ingestion.py +40 -40
  62. esgvoc/core/db/universe_ingestion.py +36 -33
  63. esgvoc/core/logging_handler.py +24 -2
  64. esgvoc/core/repo_fetcher.py +61 -59
  65. esgvoc/core/service/data_merger.py +47 -34
  66. esgvoc/core/service/state.py +107 -83
  67. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  68. esgvoc-1.0.0.dist-info/RECORD +95 -0
  69. esgvoc/core/logging.conf +0 -21
  70. esgvoc-0.4.0.dist-info/RECORD +0 -80
  71. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/search.py CHANGED
@@ -76,18 +76,45 @@ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
76
76
  list_to_populate.append(term)
77
77
 
78
78
 
79
+ def process_expression(expression: str) -> str:
80
+ """
81
+ Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
82
+ """
83
+ # 1. Remove single and double quotes.
84
+ result = expression.replace('"', '')
85
+ result = result.replace("'", '')
86
+
87
+ # 2. Escape keywords.
88
+ result = result.replace('NEAR', '"NEAR"')
89
+ result = result.replace('+', '"+"')
90
+ result = result.replace('-', '"-"')
91
+ result = result.replace(':', '":"')
92
+ result = result.replace('^', '"^"')
93
+ result = result.replace('(', '"("')
94
+ result = result.replace(')', '")"')
95
+ result = result.replace(',', '","')
96
+
97
+ # 3. Make single word request a prefix search.
98
+ if not result.endswith('*'):
99
+ tokens = result.split(sep=None)
100
+ if len(tokens) == 1:
101
+ result += '*'
102
+ return result
103
+
104
+
79
105
  def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
80
106
  type[PTermFTS5] | type[PCollectionFTS5],
81
107
  expression: str,
82
108
  only_id: bool) -> ColumnElement[bool]:
109
+ processed_expression = process_expression(expression)
83
110
  # TODO: fix this when specs will ba available in collections and Data descriptors.
84
111
  if cls is PTermFTS5 or cls is UTermFTS5:
85
112
  if only_id:
86
- result = col(cls.id).match(expression)
113
+ result = col(cls.id).match(processed_expression)
87
114
  else:
88
- result = col(cls.specs).match(expression) # type: ignore
115
+ result = col(cls.specs).match(processed_expression) # type: ignore
89
116
  else:
90
- result = col(cls.id).match(expression)
117
+ result = col(cls.id).match(processed_expression)
91
118
  return result
92
119
 
93
120
 
esgvoc/api/universe.py CHANGED
@@ -13,6 +13,7 @@ from esgvoc.api.search import (
13
13
  handle_rank_limit_offset,
14
14
  instantiate_pydantic_term,
15
15
  instantiate_pydantic_terms,
16
+ process_expression,
16
17
  )
17
18
  from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
18
19
 
@@ -211,12 +212,15 @@ def find_data_descriptors_in_universe(expression: str,
211
212
  offset: int | None = None) -> list[tuple[str, dict]]:
212
213
  """
213
214
  Find data descriptors in the universe based on a full text search defined by the given `expression`.
214
- The `expression` comes from the powerful
215
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
216
- and corresponds to the expression of the `MATCH` operator.
217
- It can be composed of one or multiple keywords combined with boolean
218
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
219
- with the wildcard `*`.
215
+ The `expression` can be composed of one or multiple keywords.
216
+ The keywords can combined with boolean operators: `AND`,
217
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
218
+ if no boolean operators is provided, whitespaces are handled as if there were
219
+ an implicit AND operator between each pair of keywords. Note that this
220
+ function does not provide any priority operator (parenthesis).
221
+ Keywords can define prefixes when adding a `*` at the end of them.
222
+ If the expression is composed of only one keyword, the function
223
+ automatically defines it as a prefix.
220
224
  The function returns a list of data descriptor ids and contexts, sorted according to the
221
225
  bm25 ranking metric (list index `0` has the highest rank).
222
226
  If the provided `expression` does not hit any data descriptor, the function returns an empty list.
@@ -266,12 +270,15 @@ def find_terms_in_universe(expression: str,
266
270
  selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
267
271
  """
268
272
  Find terms in the universe based on a full-text search defined by the given `expression`.
269
- The `expression` comes from the powerful
270
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
271
- and corresponds to the expression of the `MATCH` operator.
272
- It can be composed of one or multiple keywords combined with boolean
273
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
274
- with the wildcard `*`.
273
+ The `expression` can be composed of one or multiple keywords.
274
+ The keywords can combined with boolean operators: `AND`,
275
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
276
+ if no boolean operators is provided, whitespaces are handled as if there were
277
+ an implicit AND operator between each pair of keywords. Note that this
278
+ function does not provide any priority operator (parenthesis).
279
+ Keywords can define prefixes when adding a `*` at the end of them.
280
+ If the expression is composed of only one keyword, the function
281
+ automatically defines it as a prefix.
275
282
  The function returns a list of term instances sorted according to the
276
283
  bm25 ranking metric (list index `0` has the highest rank).
277
284
  If the provided `expression` does not hit any term, the function returns an empty list.
@@ -323,12 +330,15 @@ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
323
330
  -> list[DataDescriptor]:
324
331
  """
325
332
  Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
326
- The `expression` comes from the powerful
327
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
328
- and corresponds to the expression of the `MATCH` operator.
329
- It can be composed of one or multiple keywords combined with boolean
330
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
331
- with the wildcard `*`.
333
+ The `expression` can be composed of one or multiple keywords.
334
+ The keywords can combined with boolean operators: `AND`,
335
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
336
+ if no boolean operators is provided, whitespaces are handled as if there were
337
+ an implicit AND operator between each pair of keywords. Note that this
338
+ function does not provide any priority operator (parenthesis).
339
+ Keywords can define prefixes when adding a `*` at the end of them.
340
+ If the expression is composed of only one keyword, the function
341
+ automatically defines it as a prefix.
332
342
  The function returns a list of term instances sorted according to the
333
343
  bm25 ranking metric (list index `0` has the highest rank).
334
344
  This function performs an exact match on the `data_descriptor_id`,
@@ -370,12 +380,16 @@ def find_items_in_universe(expression: str,
370
380
  offset: int | None = None) -> list[Item]:
371
381
  """
372
382
  Find items, at the moment terms and data descriptors, in the universe based on a full-text
373
- search defined by the given `expression`. The `expression` comes from the powerful
374
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
375
- and corresponds to the expression of the `MATCH` operator.
376
- It can be composed of one or multiple keywords combined with boolean
377
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
378
- with the wildcard `*`.
383
+ search defined by the given `expression`.
384
+ The `expression` can be composed of one or multiple keywords.
385
+ The keywords can combined with boolean operators: `AND`,
386
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
387
+ if no boolean operators is provided, whitespaces are handled as if there were
388
+ an implicit AND operator between each pair of keywords. Note that this
389
+ function does not provide any priority operator (parenthesis).
390
+ Keywords can define prefixes when adding a `*` at the end of them.
391
+ If the expression is composed of only one keyword, the function
392
+ automatically defines it as a prefix.
379
393
  The function returns a list of item instances sorted according to the
380
394
  bm25 ranking metric (list index `0` has the highest rank).
381
395
  If the provided `expression` does not hit any item, the function returns an empty list.
@@ -401,23 +415,24 @@ def find_items_in_universe(expression: str,
401
415
  # TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
402
416
  result = list()
403
417
  with get_universe_session() as session:
418
+ processed_expression = process_expression(expression)
404
419
  if only_id:
405
420
  dd_column = col(UDataDescriptorFTS5.id)
406
421
  term_column = col(UTermFTS5.id)
407
422
  else:
408
423
  dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
409
424
  term_column = col(UTermFTS5.specs) # type: ignore
410
- dd_where_condition = dd_column.match(expression)
425
+ dd_where_condition = dd_column.match(processed_expression)
411
426
  dd_statement = select(UDataDescriptorFTS5.id,
412
427
  text("'data_descriptor' AS TYPE"),
413
428
  text("'universe' AS TYPE"),
414
429
  text('rank')).where(dd_where_condition)
415
- term_where_condition = term_column.match(expression)
430
+ term_where_condition = term_column.match(processed_expression)
416
431
  term_statement = select(UTermFTS5.id,
417
432
  text("'term' AS TYPE"),
418
433
  UDataDescriptor.id,
419
434
  text('rank')).join(UDataDescriptor) \
420
435
  .where(term_where_condition)
421
- result = execute_find_item_statements(session, expression, dd_statement,
436
+ result = execute_find_item_statements(session, processed_expression, dd_statement,
422
437
  term_statement, limit, offset)
423
438
  return result
@@ -0,0 +1,74 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
4
+ "title": "CMIP6 Extension",
5
+ "description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
6
+ "type": "object",
7
+ "required": [
8
+ "stac_extensions"
9
+ ],
10
+ "properties": {
11
+ "stac_extensions": {
12
+ "type": "array",
13
+ "contains": {
14
+ "const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
15
+ }
16
+ }
17
+ },
18
+ "oneOf": [
19
+ {
20
+ "$comment": "This is the schema for STAC Items.",
21
+ "type": "object",
22
+ "required": [
23
+ "type",
24
+ "properties"
25
+ ],
26
+ "properties": {
27
+ "type": {
28
+ "const": "Feature"
29
+ },
30
+ "properties": {
31
+ "allOf": [
32
+ {
33
+ "$ref": "#/definitions/require_any"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/fields"
37
+ }
38
+ ]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
44
+ "type": "object",
45
+ "required": [
46
+ "type",
47
+ "summaries"
48
+ ],
49
+ "properties": {
50
+ "type": {
51
+ "const": "Collection"
52
+ },
53
+ "summaries": {
54
+ "$ref": "#/definitions/require_any"
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "definitions": {
60
+ "require_any": {
61
+ "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
62
+ },
63
+ "fields": {
64
+ "$comment": " Don't require fields here, do that above in the corresponding schema.",
65
+ "type": "object",
66
+ "properties": {
67
+ },
68
+ "patternProperties": {
69
+ "^(?!cmip6:)": {}
70
+ },
71
+ "additionalProperties": false
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,74 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json#",
4
+ "title": "CMIP6Plus Extension",
5
+ "description": "STAC CMIP6Plus Extension for STAC Items and STAC Collection Summaries.",
6
+ "type": "object",
7
+ "required": [
8
+ "stac_extensions"
9
+ ],
10
+ "properties": {
11
+ "stac_extensions": {
12
+ "type": "array",
13
+ "contains": {
14
+ "const": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json"
15
+ }
16
+ }
17
+ },
18
+ "oneOf": [
19
+ {
20
+ "$comment": "This is the schema for STAC Items.",
21
+ "type": "object",
22
+ "required": [
23
+ "type",
24
+ "properties"
25
+ ],
26
+ "properties": {
27
+ "type": {
28
+ "const": "Feature"
29
+ },
30
+ "properties": {
31
+ "allOf": [
32
+ {
33
+ "$ref": "#/definitions/require_any"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/fields"
37
+ }
38
+ ]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
44
+ "type": "object",
45
+ "required": [
46
+ "type",
47
+ "summaries"
48
+ ],
49
+ "properties": {
50
+ "type": {
51
+ "const": "Collection"
52
+ },
53
+ "summaries": {
54
+ "$ref": "#/definitions/require_any"
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "definitions": {
60
+ "require_any": {
61
+ "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
62
+ },
63
+ "fields": {
64
+ "$comment": " Don't require fields here, do that above in the corresponding schema.",
65
+ "type": "object",
66
+ "properties": {
67
+ },
68
+ "patternProperties": {
69
+ "^(?!cmip6plus:)": {}
70
+ },
71
+ "additionalProperties": false
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,185 @@
1
+ import contextlib
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Iterable
5
+
6
+ from sqlmodel import Session
7
+
8
+ from esgvoc.api import projects, search
9
+ from esgvoc.api.project_specs import (
10
+ GlobalAttributeSpecBase,
11
+ GlobalAttributeSpecSpecific,
12
+ GlobalAttributeVisitor,
13
+ )
14
+ from esgvoc.core.constants import DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
15
+ from esgvoc.core.db.models.project import PCollection, TermKind
16
+ from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocNotImplementedError
17
+
18
+ KEY_SEPARATOR = ':'
19
+ JSON_SCHEMA_TEMPLATE_DIR_PATH = Path(__file__).parent
20
+ JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
21
+ JSON_INDENTATION = 2
22
+
23
+
24
+ def _process_plain(collection: PCollection, selected_field: str) -> list[str]:
25
+ result: list[str] = list()
26
+ for term in collection.terms:
27
+ if selected_field in term.specs:
28
+ value = term.specs[selected_field]
29
+ result.append(value)
30
+ else:
31
+ raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
32
+ f'collection {collection.id}')
33
+ return result
34
+
35
+
36
+ def _process_composite(collection: PCollection, universe_session: Session,
37
+ project_session: Session) -> str:
38
+ result = ""
39
+ for term in collection.terms:
40
+ _, parts = projects._get_composite_term_separator_parts(term)
41
+ for part in parts:
42
+ resolved_term = projects._resolve_term(part, universe_session, project_session)
43
+ if resolved_term.kind == TermKind.PATTERN:
44
+ result += resolved_term.specs[PATTERN_JSON_KEY]
45
+ else:
46
+ raise EsgvocNotImplementedError(f'{term.kind} term is not supported yet')
47
+ # Patterns terms are meant to be validated individually.
48
+ # So their regex are defined as a whole (begins by a ^, ends by a $).
49
+ # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
50
+ # The later, must be removed.
51
+ result = result.replace('^', '').replace('$', '')
52
+ result = f'^{result}$'
53
+ return result
54
+
55
+
56
+ def _process_pattern(collection: PCollection) -> str:
57
+ # The generation of the value of the field pattern for the collections with more than one term
58
+ # is not specified yet.
59
+ if len(collection.terms) == 1:
60
+ term = collection.terms[0]
61
+ return term.specs[PATTERN_JSON_KEY]
62
+ else:
63
+ msg = f"unsupported collection of term pattern with more than one term for '{collection.id}'"
64
+ raise EsgvocNotImplementedError(msg)
65
+
66
+
67
+ def _generate_attribute_key(project_id: str, attribute_name) -> str:
68
+ return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
69
+
70
+
71
+ class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextManager):
72
+ def __init__(self, project_id: str) -> None:
73
+ self.project_id = project_id
74
+ # Project session can't be None here.
75
+ self.universe_session: Session = search.get_universe_session()
76
+ self.project_session: Session = projects._get_project_session_with_exception(project_id)
77
+ self.collections: dict[str, PCollection] = dict()
78
+ for collection in projects._get_all_collections_in_project(self.project_session):
79
+ self.collections[collection.id] = collection
80
+
81
+ def __exit__(self, exception_type, exception_value, exception_traceback):
82
+ self.project_session.close()
83
+ self.universe_session.close()
84
+ if exception_type is not None:
85
+ raise exception_value
86
+ return True
87
+
88
+ def _generate_attribute_property(self, attribute_name: str, source_collection: str,
89
+ selected_field: str) -> tuple[str, str | list[str]]:
90
+ property_value: str | list[str]
91
+ property_key: str
92
+ if source_collection not in self.collections:
93
+ raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
94
+ f"{attribute_name} is not found")
95
+ collection = self.collections[source_collection]
96
+ match collection.term_kind:
97
+ case TermKind.PLAIN:
98
+ property_value = _process_plain(collection=collection,
99
+ selected_field=selected_field)
100
+ property_key = 'enum'
101
+ case TermKind.COMPOSITE:
102
+ property_value = _process_composite(collection=collection,
103
+ universe_session=self.universe_session,
104
+ project_session=self.project_session)
105
+ property_key = 'pattern'
106
+ case TermKind.PATTERN:
107
+ property_value = _process_pattern(collection)
108
+ property_key = 'pattern'
109
+ case _:
110
+ msg = f"unsupported term kind '{collection.term_kind}' " + \
111
+ f"for global attribute {attribute_name}"
112
+ raise EsgvocNotImplementedError(msg)
113
+ return property_key, property_value
114
+
115
+ def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
116
+ -> tuple[str, dict[str, str | list[str]]]:
117
+ attribute_key = _generate_attribute_key(self.project_id, attribute_name)
118
+ attribute_properties: dict[str, str | list[str]] = dict()
119
+ attribute_properties['type'] = attribute.value_type.value
120
+ property_key, property_value = self._generate_attribute_property(attribute_name,
121
+ attribute.source_collection,
122
+ DRS_SPECS_JSON_KEY)
123
+ attribute_properties[property_key] = property_value
124
+ return attribute_key, attribute_properties
125
+
126
+ def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
127
+ -> tuple[str, dict[str, str | list[str]]]:
128
+ attribute_key = _generate_attribute_key(self.project_id, attribute_name)
129
+ attribute_properties: dict[str, str | list[str]] = dict()
130
+ attribute_properties['type'] = attribute.value_type.value
131
+ property_key, property_value = self._generate_attribute_property(attribute_name,
132
+ attribute.source_collection,
133
+ attribute.specific_key)
134
+ attribute_properties[property_key] = property_value
135
+ return attribute_key, attribute_properties
136
+
137
+
138
+ def _inject_global_attributes(json_root: dict, project_id: str, attribute_names: Iterable[str]) -> None:
139
+ attribute_properties = list()
140
+ for attribute_name in attribute_names:
141
+ attribute_key = _generate_attribute_key(project_id, attribute_name)
142
+ attribute_properties.append({"required": [attribute_key]})
143
+ json_root['definitions']['require_any']['anyOf'] = attribute_properties
144
+
145
+
146
+ def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
147
+ for property in properties:
148
+ json_root['definitions']['fields']['properties'][property[0]] = property[1]
149
+
150
+
151
+ def generate_json_schema(project_id: str) -> str:
152
+ """
153
+ Generate json schema for the given project.
154
+
155
+ :param project_id: The id of the given project.
156
+ :type project_id: str
157
+ :returns: The content of a json schema
158
+ :rtype: str
159
+ :raises EsgvocNotFoundError: On missing information
160
+ :raises EsgvocNotImplementedError: On unexpected operations
161
+ """
162
+ file_name = JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE.format(project_id=project_id)
163
+ template_file_path = JSON_SCHEMA_TEMPLATE_DIR_PATH.joinpath(file_name)
164
+ if template_file_path.exists():
165
+ project_specs = projects.get_project(project_id)
166
+ if project_specs:
167
+ if project_specs.global_attributes_specs:
168
+ with open(file=template_file_path, mode='r') as file, \
169
+ JsonPropertiesVisitor(project_id) as visitor:
170
+ file_content = file.read()
171
+ root = json.loads(file_content)
172
+ properties: list[tuple[str, dict[str, str | list[str]]]] = list()
173
+ for attribute_name, attribute in project_specs.global_attributes_specs.items():
174
+ attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
175
+ properties.append((attribute_key, attribute_properties))
176
+ _inject_properties(root, properties)
177
+ _inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
178
+ return json.dumps(root, indent=JSON_INDENTATION)
179
+ else:
180
+ raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
181
+ "are not provided")
182
+ else:
183
+ raise EsgvocNotFoundError(f"project '{project_id}' is not found")
184
+ else:
185
+ raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")