esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +117 -2
- esgvoc/api/projects.py +242 -279
- esgvoc/api/search.py +30 -3
- esgvoc/api/universe.py +42 -27
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +43 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/db/models/project.py +11 -11
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +40 -40
- esgvoc/core/db/universe_ingestion.py +36 -33
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.4.0.dist-info/RECORD +0 -80
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/search.py
CHANGED
|
@@ -76,18 +76,45 @@ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
|
|
|
76
76
|
list_to_populate.append(term)
|
|
77
77
|
|
|
78
78
|
|
|
79
|
+
def process_expression(expression: str) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
|
|
82
|
+
"""
|
|
83
|
+
# 1. Remove single and double quotes.
|
|
84
|
+
result = expression.replace('"', '')
|
|
85
|
+
result = result.replace("'", '')
|
|
86
|
+
|
|
87
|
+
# 2. Escape keywords.
|
|
88
|
+
result = result.replace('NEAR', '"NEAR"')
|
|
89
|
+
result = result.replace('+', '"+"')
|
|
90
|
+
result = result.replace('-', '"-"')
|
|
91
|
+
result = result.replace(':', '":"')
|
|
92
|
+
result = result.replace('^', '"^"')
|
|
93
|
+
result = result.replace('(', '"("')
|
|
94
|
+
result = result.replace(')', '")"')
|
|
95
|
+
result = result.replace(',', '","')
|
|
96
|
+
|
|
97
|
+
# 3. Make single word request a prefix search.
|
|
98
|
+
if not result.endswith('*'):
|
|
99
|
+
tokens = result.split(sep=None)
|
|
100
|
+
if len(tokens) == 1:
|
|
101
|
+
result += '*'
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
|
|
79
105
|
def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
|
|
80
106
|
type[PTermFTS5] | type[PCollectionFTS5],
|
|
81
107
|
expression: str,
|
|
82
108
|
only_id: bool) -> ColumnElement[bool]:
|
|
109
|
+
processed_expression = process_expression(expression)
|
|
83
110
|
# TODO: fix this when specs will ba available in collections and Data descriptors.
|
|
84
111
|
if cls is PTermFTS5 or cls is UTermFTS5:
|
|
85
112
|
if only_id:
|
|
86
|
-
result = col(cls.id).match(
|
|
113
|
+
result = col(cls.id).match(processed_expression)
|
|
87
114
|
else:
|
|
88
|
-
result = col(cls.specs).match(
|
|
115
|
+
result = col(cls.specs).match(processed_expression) # type: ignore
|
|
89
116
|
else:
|
|
90
|
-
result = col(cls.id).match(
|
|
117
|
+
result = col(cls.id).match(processed_expression)
|
|
91
118
|
return result
|
|
92
119
|
|
|
93
120
|
|
esgvoc/api/universe.py
CHANGED
|
@@ -13,6 +13,7 @@ from esgvoc.api.search import (
|
|
|
13
13
|
handle_rank_limit_offset,
|
|
14
14
|
instantiate_pydantic_term,
|
|
15
15
|
instantiate_pydantic_terms,
|
|
16
|
+
process_expression,
|
|
16
17
|
)
|
|
17
18
|
from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
|
|
18
19
|
|
|
@@ -211,12 +212,15 @@ def find_data_descriptors_in_universe(expression: str,
|
|
|
211
212
|
offset: int | None = None) -> list[tuple[str, dict]]:
|
|
212
213
|
"""
|
|
213
214
|
Find data descriptors in the universe based on a full text search defined by the given `expression`.
|
|
214
|
-
The `expression`
|
|
215
|
-
|
|
216
|
-
and
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
The `expression` can be composed of one or multiple keywords.
|
|
216
|
+
The keywords can combined with boolean operators: `AND`,
|
|
217
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
218
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
219
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
220
|
+
function does not provide any priority operator (parenthesis).
|
|
221
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
222
|
+
If the expression is composed of only one keyword, the function
|
|
223
|
+
automatically defines it as a prefix.
|
|
220
224
|
The function returns a list of data descriptor ids and contexts, sorted according to the
|
|
221
225
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
222
226
|
If the provided `expression` does not hit any data descriptor, the function returns an empty list.
|
|
@@ -266,12 +270,15 @@ def find_terms_in_universe(expression: str,
|
|
|
266
270
|
selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
|
|
267
271
|
"""
|
|
268
272
|
Find terms in the universe based on a full-text search defined by the given `expression`.
|
|
269
|
-
The `expression`
|
|
270
|
-
|
|
271
|
-
and
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
273
|
+
The `expression` can be composed of one or multiple keywords.
|
|
274
|
+
The keywords can combined with boolean operators: `AND`,
|
|
275
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
276
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
277
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
278
|
+
function does not provide any priority operator (parenthesis).
|
|
279
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
280
|
+
If the expression is composed of only one keyword, the function
|
|
281
|
+
automatically defines it as a prefix.
|
|
275
282
|
The function returns a list of term instances sorted according to the
|
|
276
283
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
277
284
|
If the provided `expression` does not hit any term, the function returns an empty list.
|
|
@@ -323,12 +330,15 @@ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
|
|
|
323
330
|
-> list[DataDescriptor]:
|
|
324
331
|
"""
|
|
325
332
|
Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
|
|
326
|
-
The `expression`
|
|
327
|
-
|
|
328
|
-
and
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
333
|
+
The `expression` can be composed of one or multiple keywords.
|
|
334
|
+
The keywords can combined with boolean operators: `AND`,
|
|
335
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
336
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
337
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
338
|
+
function does not provide any priority operator (parenthesis).
|
|
339
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
340
|
+
If the expression is composed of only one keyword, the function
|
|
341
|
+
automatically defines it as a prefix.
|
|
332
342
|
The function returns a list of term instances sorted according to the
|
|
333
343
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
334
344
|
This function performs an exact match on the `data_descriptor_id`,
|
|
@@ -370,12 +380,16 @@ def find_items_in_universe(expression: str,
|
|
|
370
380
|
offset: int | None = None) -> list[Item]:
|
|
371
381
|
"""
|
|
372
382
|
Find items, at the moment terms and data descriptors, in the universe based on a full-text
|
|
373
|
-
search defined by the given `expression`.
|
|
374
|
-
`
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
383
|
+
search defined by the given `expression`.
|
|
384
|
+
The `expression` can be composed of one or multiple keywords.
|
|
385
|
+
The keywords can combined with boolean operators: `AND`,
|
|
386
|
+
`OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
|
|
387
|
+
if no boolean operators is provided, whitespaces are handled as if there were
|
|
388
|
+
an implicit AND operator between each pair of keywords. Note that this
|
|
389
|
+
function does not provide any priority operator (parenthesis).
|
|
390
|
+
Keywords can define prefixes when adding a `*` at the end of them.
|
|
391
|
+
If the expression is composed of only one keyword, the function
|
|
392
|
+
automatically defines it as a prefix.
|
|
379
393
|
The function returns a list of item instances sorted according to the
|
|
380
394
|
bm25 ranking metric (list index `0` has the highest rank).
|
|
381
395
|
If the provided `expression` does not hit any item, the function returns an empty list.
|
|
@@ -401,23 +415,24 @@ def find_items_in_universe(expression: str,
|
|
|
401
415
|
# TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
|
|
402
416
|
result = list()
|
|
403
417
|
with get_universe_session() as session:
|
|
418
|
+
processed_expression = process_expression(expression)
|
|
404
419
|
if only_id:
|
|
405
420
|
dd_column = col(UDataDescriptorFTS5.id)
|
|
406
421
|
term_column = col(UTermFTS5.id)
|
|
407
422
|
else:
|
|
408
423
|
dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
|
|
409
424
|
term_column = col(UTermFTS5.specs) # type: ignore
|
|
410
|
-
dd_where_condition = dd_column.match(
|
|
425
|
+
dd_where_condition = dd_column.match(processed_expression)
|
|
411
426
|
dd_statement = select(UDataDescriptorFTS5.id,
|
|
412
427
|
text("'data_descriptor' AS TYPE"),
|
|
413
428
|
text("'universe' AS TYPE"),
|
|
414
429
|
text('rank')).where(dd_where_condition)
|
|
415
|
-
term_where_condition = term_column.match(
|
|
430
|
+
term_where_condition = term_column.match(processed_expression)
|
|
416
431
|
term_statement = select(UTermFTS5.id,
|
|
417
432
|
text("'term' AS TYPE"),
|
|
418
433
|
UDataDescriptor.id,
|
|
419
434
|
text('rank')).join(UDataDescriptor) \
|
|
420
435
|
.where(term_where_condition)
|
|
421
|
-
result = execute_find_item_statements(session,
|
|
436
|
+
result = execute_find_item_statements(session, processed_expression, dd_statement,
|
|
422
437
|
term_statement, limit, offset)
|
|
423
438
|
return result
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
|
|
4
|
+
"title": "CMIP6 Extension",
|
|
5
|
+
"description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"stac_extensions"
|
|
9
|
+
],
|
|
10
|
+
"properties": {
|
|
11
|
+
"stac_extensions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"contains": {
|
|
14
|
+
"const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"$comment": "This is the schema for STAC Items.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": [
|
|
23
|
+
"type",
|
|
24
|
+
"properties"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"const": "Feature"
|
|
29
|
+
},
|
|
30
|
+
"properties": {
|
|
31
|
+
"allOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/require_any"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"$ref": "#/definitions/fields"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
+
"type": "object",
|
|
45
|
+
"required": [
|
|
46
|
+
"type",
|
|
47
|
+
"summaries"
|
|
48
|
+
],
|
|
49
|
+
"properties": {
|
|
50
|
+
"type": {
|
|
51
|
+
"const": "Collection"
|
|
52
|
+
},
|
|
53
|
+
"summaries": {
|
|
54
|
+
"$ref": "#/definitions/require_any"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"definitions": {
|
|
60
|
+
"require_any": {
|
|
61
|
+
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
+
},
|
|
63
|
+
"fields": {
|
|
64
|
+
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
},
|
|
68
|
+
"patternProperties": {
|
|
69
|
+
"^(?!cmip6:)": {}
|
|
70
|
+
},
|
|
71
|
+
"additionalProperties": false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json#",
|
|
4
|
+
"title": "CMIP6Plus Extension",
|
|
5
|
+
"description": "STAC CMIP6Plus Extension for STAC Items and STAC Collection Summaries.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"stac_extensions"
|
|
9
|
+
],
|
|
10
|
+
"properties": {
|
|
11
|
+
"stac_extensions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"contains": {
|
|
14
|
+
"const": "https://stac-extensions.github.io/cmip6plus/v1.0.0/schema.json"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"$comment": "This is the schema for STAC Items.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": [
|
|
23
|
+
"type",
|
|
24
|
+
"properties"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"const": "Feature"
|
|
29
|
+
},
|
|
30
|
+
"properties": {
|
|
31
|
+
"allOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/require_any"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"$ref": "#/definitions/fields"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
|
|
44
|
+
"type": "object",
|
|
45
|
+
"required": [
|
|
46
|
+
"type",
|
|
47
|
+
"summaries"
|
|
48
|
+
],
|
|
49
|
+
"properties": {
|
|
50
|
+
"type": {
|
|
51
|
+
"const": "Collection"
|
|
52
|
+
},
|
|
53
|
+
"summaries": {
|
|
54
|
+
"$ref": "#/definitions/require_any"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"definitions": {
|
|
60
|
+
"require_any": {
|
|
61
|
+
"$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
|
|
62
|
+
},
|
|
63
|
+
"fields": {
|
|
64
|
+
"$comment": " Don't require fields here, do that above in the corresponding schema.",
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
},
|
|
68
|
+
"patternProperties": {
|
|
69
|
+
"^(?!cmip6plus:)": {}
|
|
70
|
+
},
|
|
71
|
+
"additionalProperties": false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Iterable
|
|
5
|
+
|
|
6
|
+
from sqlmodel import Session
|
|
7
|
+
|
|
8
|
+
from esgvoc.api import projects, search
|
|
9
|
+
from esgvoc.api.project_specs import (
|
|
10
|
+
GlobalAttributeSpecBase,
|
|
11
|
+
GlobalAttributeSpecSpecific,
|
|
12
|
+
GlobalAttributeVisitor,
|
|
13
|
+
)
|
|
14
|
+
from esgvoc.core.constants import DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
|
|
15
|
+
from esgvoc.core.db.models.project import PCollection, TermKind
|
|
16
|
+
from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocNotImplementedError
|
|
17
|
+
|
|
18
|
+
KEY_SEPARATOR = ':'
|
|
19
|
+
JSON_SCHEMA_TEMPLATE_DIR_PATH = Path(__file__).parent
|
|
20
|
+
JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE = '{project_id}_template.json'
|
|
21
|
+
JSON_INDENTATION = 2
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _process_plain(collection: PCollection, selected_field: str) -> list[str]:
|
|
25
|
+
result: list[str] = list()
|
|
26
|
+
for term in collection.terms:
|
|
27
|
+
if selected_field in term.specs:
|
|
28
|
+
value = term.specs[selected_field]
|
|
29
|
+
result.append(value)
|
|
30
|
+
else:
|
|
31
|
+
raise EsgvocNotFoundError(f'missing key {selected_field} for term {term.id} in ' +
|
|
32
|
+
f'collection {collection.id}')
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _process_composite(collection: PCollection, universe_session: Session,
|
|
37
|
+
project_session: Session) -> str:
|
|
38
|
+
result = ""
|
|
39
|
+
for term in collection.terms:
|
|
40
|
+
_, parts = projects._get_composite_term_separator_parts(term)
|
|
41
|
+
for part in parts:
|
|
42
|
+
resolved_term = projects._resolve_term(part, universe_session, project_session)
|
|
43
|
+
if resolved_term.kind == TermKind.PATTERN:
|
|
44
|
+
result += resolved_term.specs[PATTERN_JSON_KEY]
|
|
45
|
+
else:
|
|
46
|
+
raise EsgvocNotImplementedError(f'{term.kind} term is not supported yet')
|
|
47
|
+
# Patterns terms are meant to be validated individually.
|
|
48
|
+
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
49
|
+
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
50
|
+
# The later, must be removed.
|
|
51
|
+
result = result.replace('^', '').replace('$', '')
|
|
52
|
+
result = f'^{result}$'
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _process_pattern(collection: PCollection) -> str:
|
|
57
|
+
# The generation of the value of the field pattern for the collections with more than one term
|
|
58
|
+
# is not specified yet.
|
|
59
|
+
if len(collection.terms) == 1:
|
|
60
|
+
term = collection.terms[0]
|
|
61
|
+
return term.specs[PATTERN_JSON_KEY]
|
|
62
|
+
else:
|
|
63
|
+
msg = f"unsupported collection of term pattern with more than one term for '{collection.id}'"
|
|
64
|
+
raise EsgvocNotImplementedError(msg)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _generate_attribute_key(project_id: str, attribute_name) -> str:
|
|
68
|
+
return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class JsonPropertiesVisitor(GlobalAttributeVisitor, contextlib.AbstractContextManager):
|
|
72
|
+
def __init__(self, project_id: str) -> None:
|
|
73
|
+
self.project_id = project_id
|
|
74
|
+
# Project session can't be None here.
|
|
75
|
+
self.universe_session: Session = search.get_universe_session()
|
|
76
|
+
self.project_session: Session = projects._get_project_session_with_exception(project_id)
|
|
77
|
+
self.collections: dict[str, PCollection] = dict()
|
|
78
|
+
for collection in projects._get_all_collections_in_project(self.project_session):
|
|
79
|
+
self.collections[collection.id] = collection
|
|
80
|
+
|
|
81
|
+
def __exit__(self, exception_type, exception_value, exception_traceback):
|
|
82
|
+
self.project_session.close()
|
|
83
|
+
self.universe_session.close()
|
|
84
|
+
if exception_type is not None:
|
|
85
|
+
raise exception_value
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
def _generate_attribute_property(self, attribute_name: str, source_collection: str,
|
|
89
|
+
selected_field: str) -> tuple[str, str | list[str]]:
|
|
90
|
+
property_value: str | list[str]
|
|
91
|
+
property_key: str
|
|
92
|
+
if source_collection not in self.collections:
|
|
93
|
+
raise EsgvocNotFoundError(f"collection '{source_collection}' referenced by attribute " +
|
|
94
|
+
f"{attribute_name} is not found")
|
|
95
|
+
collection = self.collections[source_collection]
|
|
96
|
+
match collection.term_kind:
|
|
97
|
+
case TermKind.PLAIN:
|
|
98
|
+
property_value = _process_plain(collection=collection,
|
|
99
|
+
selected_field=selected_field)
|
|
100
|
+
property_key = 'enum'
|
|
101
|
+
case TermKind.COMPOSITE:
|
|
102
|
+
property_value = _process_composite(collection=collection,
|
|
103
|
+
universe_session=self.universe_session,
|
|
104
|
+
project_session=self.project_session)
|
|
105
|
+
property_key = 'pattern'
|
|
106
|
+
case TermKind.PATTERN:
|
|
107
|
+
property_value = _process_pattern(collection)
|
|
108
|
+
property_key = 'pattern'
|
|
109
|
+
case _:
|
|
110
|
+
msg = f"unsupported term kind '{collection.term_kind}' " + \
|
|
111
|
+
f"for global attribute {attribute_name}"
|
|
112
|
+
raise EsgvocNotImplementedError(msg)
|
|
113
|
+
return property_key, property_value
|
|
114
|
+
|
|
115
|
+
def visit_base_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecBase) \
|
|
116
|
+
-> tuple[str, dict[str, str | list[str]]]:
|
|
117
|
+
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
118
|
+
attribute_properties: dict[str, str | list[str]] = dict()
|
|
119
|
+
attribute_properties['type'] = attribute.value_type.value
|
|
120
|
+
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
121
|
+
attribute.source_collection,
|
|
122
|
+
DRS_SPECS_JSON_KEY)
|
|
123
|
+
attribute_properties[property_key] = property_value
|
|
124
|
+
return attribute_key, attribute_properties
|
|
125
|
+
|
|
126
|
+
def visit_specific_attribute(self, attribute_name: str, attribute: GlobalAttributeSpecSpecific) \
|
|
127
|
+
-> tuple[str, dict[str, str | list[str]]]:
|
|
128
|
+
attribute_key = _generate_attribute_key(self.project_id, attribute_name)
|
|
129
|
+
attribute_properties: dict[str, str | list[str]] = dict()
|
|
130
|
+
attribute_properties['type'] = attribute.value_type.value
|
|
131
|
+
property_key, property_value = self._generate_attribute_property(attribute_name,
|
|
132
|
+
attribute.source_collection,
|
|
133
|
+
attribute.specific_key)
|
|
134
|
+
attribute_properties[property_key] = property_value
|
|
135
|
+
return attribute_key, attribute_properties
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _inject_global_attributes(json_root: dict, project_id: str, attribute_names: Iterable[str]) -> None:
|
|
139
|
+
attribute_properties = list()
|
|
140
|
+
for attribute_name in attribute_names:
|
|
141
|
+
attribute_key = _generate_attribute_key(project_id, attribute_name)
|
|
142
|
+
attribute_properties.append({"required": [attribute_key]})
|
|
143
|
+
json_root['definitions']['require_any']['anyOf'] = attribute_properties
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _inject_properties(json_root: dict, properties: list[tuple]) -> None:
|
|
147
|
+
for property in properties:
|
|
148
|
+
json_root['definitions']['fields']['properties'][property[0]] = property[1]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def generate_json_schema(project_id: str) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Generate json schema for the given project.
|
|
154
|
+
|
|
155
|
+
:param project_id: The id of the given project.
|
|
156
|
+
:type project_id: str
|
|
157
|
+
:returns: The content of a json schema
|
|
158
|
+
:rtype: str
|
|
159
|
+
:raises EsgvocNotFoundError: On missing information
|
|
160
|
+
:raises EsgvocNotImplementedError: On unexpected operations
|
|
161
|
+
"""
|
|
162
|
+
file_name = JSON_SCHEMA_TEMPLATE_FILE_NAME_TEMPLATE.format(project_id=project_id)
|
|
163
|
+
template_file_path = JSON_SCHEMA_TEMPLATE_DIR_PATH.joinpath(file_name)
|
|
164
|
+
if template_file_path.exists():
|
|
165
|
+
project_specs = projects.get_project(project_id)
|
|
166
|
+
if project_specs:
|
|
167
|
+
if project_specs.global_attributes_specs:
|
|
168
|
+
with open(file=template_file_path, mode='r') as file, \
|
|
169
|
+
JsonPropertiesVisitor(project_id) as visitor:
|
|
170
|
+
file_content = file.read()
|
|
171
|
+
root = json.loads(file_content)
|
|
172
|
+
properties: list[tuple[str, dict[str, str | list[str]]]] = list()
|
|
173
|
+
for attribute_name, attribute in project_specs.global_attributes_specs.items():
|
|
174
|
+
attribute_key, attribute_properties = attribute.accept(attribute_name, visitor)
|
|
175
|
+
properties.append((attribute_key, attribute_properties))
|
|
176
|
+
_inject_properties(root, properties)
|
|
177
|
+
_inject_global_attributes(root, project_id, project_specs.global_attributes_specs.keys())
|
|
178
|
+
return json.dumps(root, indent=JSON_INDENTATION)
|
|
179
|
+
else:
|
|
180
|
+
raise EsgvocNotFoundError(f"global attributes for the project '{project_id}' " +
|
|
181
|
+
"are not provided")
|
|
182
|
+
else:
|
|
183
|
+
raise EsgvocNotFoundError(f"project '{project_id}' is not found")
|
|
184
|
+
else:
|
|
185
|
+
raise EsgvocNotFoundError(f"template for project '{project_id}' is not found")
|