esgvoc 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -0
- esgvoc/api/__init__.py +62 -0
- esgvoc/api/_utils.py +39 -0
- esgvoc/api/data_descriptors/__init__.py +60 -0
- esgvoc/api/data_descriptors/activity.py +51 -0
- esgvoc/api/data_descriptors/consortium.py +66 -0
- esgvoc/api/data_descriptors/date.py +48 -0
- esgvoc/api/data_descriptors/experiment.py +60 -0
- esgvoc/api/data_descriptors/forcing_index.py +47 -0
- esgvoc/api/data_descriptors/frequency.py +45 -0
- esgvoc/api/data_descriptors/grid_label.py +46 -0
- esgvoc/api/data_descriptors/initialisation_index.py +46 -0
- esgvoc/api/data_descriptors/institution.py +58 -0
- esgvoc/api/data_descriptors/license.py +47 -0
- esgvoc/api/data_descriptors/mip_era.py +46 -0
- esgvoc/api/data_descriptors/model_component.py +47 -0
- esgvoc/api/data_descriptors/organisation.py +42 -0
- esgvoc/api/data_descriptors/physic_index.py +47 -0
- esgvoc/api/data_descriptors/product.py +45 -0
- esgvoc/api/data_descriptors/realisation_index.py +46 -0
- esgvoc/api/data_descriptors/realm.py +44 -0
- esgvoc/api/data_descriptors/resolution.py +46 -0
- esgvoc/api/data_descriptors/source.py +57 -0
- esgvoc/api/data_descriptors/source_type.py +43 -0
- esgvoc/api/data_descriptors/sub_experiment.py +43 -0
- esgvoc/api/data_descriptors/table.py +50 -0
- esgvoc/api/data_descriptors/time_range.py +28 -0
- esgvoc/api/data_descriptors/variable.py +77 -0
- esgvoc/api/data_descriptors/variant_label.py +49 -0
- esgvoc/api/projects.py +854 -0
- esgvoc/api/report.py +86 -0
- esgvoc/api/search.py +92 -0
- esgvoc/api/universe.py +218 -0
- esgvoc/apps/drs/__init__.py +16 -0
- esgvoc/apps/drs/models.py +43 -0
- esgvoc/apps/drs/parser.py +27 -0
- esgvoc/cli/config.py +79 -0
- esgvoc/cli/get.py +142 -0
- esgvoc/cli/install.py +14 -0
- esgvoc/cli/main.py +22 -0
- esgvoc/cli/status.py +26 -0
- esgvoc/cli/valid.py +156 -0
- esgvoc/core/constants.py +13 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +133 -0
- esgvoc/core/db/__init__.py +5 -0
- esgvoc/core/db/connection.py +31 -0
- esgvoc/core/db/models/mixins.py +18 -0
- esgvoc/core/db/models/project.py +65 -0
- esgvoc/core/db/models/universe.py +59 -0
- esgvoc/core/db/project_ingestion.py +152 -0
- esgvoc/core/db/universe_ingestion.py +120 -0
- esgvoc/core/logging.conf +21 -0
- esgvoc/core/logging_handler.py +4 -0
- esgvoc/core/repo_fetcher.py +259 -0
- esgvoc/core/service/__init__.py +8 -0
- esgvoc/core/service/data_merger.py +83 -0
- esgvoc/core/service/esg_voc.py +79 -0
- esgvoc/core/service/settings.py +64 -0
- esgvoc/core/service/settings.toml +12 -0
- esgvoc/core/service/settings_default.toml +20 -0
- esgvoc/core/service/state.py +222 -0
- esgvoc-0.1.2.dist-info/METADATA +54 -0
- esgvoc-0.1.2.dist-info/RECORD +66 -0
- esgvoc-0.1.2.dist-info/WHEEL +4 -0
- esgvoc-0.1.2.dist-info/entry_points.txt +2 -0
esgvoc/cli/get.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Any
|
|
3
|
+
from esgvoc.api.projects import find_terms_in_collection, find_terms_in_project, get_all_collections_in_project, get_all_projects, get_all_terms_in_collection
|
|
4
|
+
from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from requests import logging
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
import typer
|
|
9
|
+
import re
|
|
10
|
+
from rich.json import JSON
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
app = typer.Typer()
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
_LOGGER = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
def validate_key_format(key: str):
|
|
19
|
+
"""
|
|
20
|
+
Validate if the key matches the XXXX:YYYY:ZZZZ format.
|
|
21
|
+
"""
|
|
22
|
+
if not re.match(r"^(\w*-?\w*)*:(\w*-?\w*)*:(\w*-?\w*)*$", key):
|
|
23
|
+
raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
|
|
24
|
+
return key.split(":")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
|
|
28
|
+
_LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
|
|
29
|
+
|
|
30
|
+
if data_descriptor_id and term_id:
|
|
31
|
+
return find_terms_in_data_descriptor(data_descriptor_id,term_id,options)
|
|
32
|
+
# BaseModel|dict[str: BaseModel]|None:
|
|
33
|
+
|
|
34
|
+
elif term_id:
|
|
35
|
+
return find_terms_in_universe(term_id,options)
|
|
36
|
+
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
elif data_descriptor_id:
|
|
40
|
+
return get_all_terms_in_data_descriptor(data_descriptor_id)
|
|
41
|
+
# dict[str, BaseModel]|None:
|
|
42
|
+
|
|
43
|
+
else:
|
|
44
|
+
return get_all_data_descriptors_in_universe()
|
|
45
|
+
# dict[str, dict]:
|
|
46
|
+
|
|
47
|
+
def handle_project(project_id:str,collection_id:str|None,term_id:str|None,options=None):
|
|
48
|
+
_LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
|
|
49
|
+
|
|
50
|
+
if project_id and collection_id and term_id:
|
|
51
|
+
return find_terms_in_collection(project_id,collection_id,term_id)
|
|
52
|
+
# BaseModel|dict[str: BaseModel]|None:
|
|
53
|
+
|
|
54
|
+
elif term_id:
|
|
55
|
+
return find_terms_in_project(project_id, term_id,options)
|
|
56
|
+
# dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
elif collection_id:
|
|
60
|
+
return get_all_terms_in_collection(project_id, collection_id)
|
|
61
|
+
# dict[str, BaseModel]|None:
|
|
62
|
+
|
|
63
|
+
else:
|
|
64
|
+
res = get_all_collections_in_project(project_id)
|
|
65
|
+
if res is None:
|
|
66
|
+
return None
|
|
67
|
+
else:
|
|
68
|
+
return res
|
|
69
|
+
# dict[str, dict]:
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def handle_unknown(x:str|None,y:str|None,z:str|None):
|
|
73
|
+
print(f"Something wrong in X,Y or Z : X={x}, Y={y}, Z={z}")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def display(data:Any):
|
|
77
|
+
|
|
78
|
+
if isinstance(data, BaseModel):
|
|
79
|
+
# Pydantic Model
|
|
80
|
+
console.print(JSON.from_data(data.model_dump()))
|
|
81
|
+
elif isinstance(data, dict):
|
|
82
|
+
# Dictionary as JSON
|
|
83
|
+
console.print(data.keys())
|
|
84
|
+
elif isinstance(data, list):
|
|
85
|
+
# List as Table
|
|
86
|
+
table = Table(title="List")
|
|
87
|
+
table.add_column("Index")
|
|
88
|
+
table.add_column("Item")
|
|
89
|
+
for i, item in enumerate(data):
|
|
90
|
+
table.add_row(str(i), str(item))
|
|
91
|
+
console.print(table)
|
|
92
|
+
else:
|
|
93
|
+
# Fallback to simple print
|
|
94
|
+
console.print(data)
|
|
95
|
+
|
|
96
|
+
@app.command()
|
|
97
|
+
def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format")):
|
|
98
|
+
"""
|
|
99
|
+
Retrieve a specific value from the database system.
|
|
100
|
+
This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
|
|
101
|
+
and term in a structured format.
|
|
102
|
+
|
|
103
|
+
Usage:
|
|
104
|
+
`get <project>:<collection>:<term>`
|
|
105
|
+
|
|
106
|
+
Arguments:
|
|
107
|
+
<project> The name of the project to query. like `cmip6plus`
|
|
108
|
+
<collection> The name of the collection in the specified database.
|
|
109
|
+
<term> The name or term within the specified collection.
|
|
110
|
+
|
|
111
|
+
Example:
|
|
112
|
+
To retrieve the value from the "cmip6plus" project, under the "institution_id" column,
|
|
113
|
+
in the term with the identifier "ipsl", you would use:
|
|
114
|
+
`get cmip6plus:institution_id:ipsl`
|
|
115
|
+
The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl`
|
|
116
|
+
- to get list of available term from universe institution `:institution:`
|
|
117
|
+
|
|
118
|
+
Notes:
|
|
119
|
+
- Ensure data exist in your system before using this command (use status command to see whats available).
|
|
120
|
+
- Use a colon (`:`) to separate the parts of the argument.
|
|
121
|
+
- if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended.
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
known_projects = get_all_projects()
|
|
125
|
+
|
|
126
|
+
# Validate and process each key
|
|
127
|
+
for key in keys:
|
|
128
|
+
validated_key = validate_key_format(key)
|
|
129
|
+
_LOGGER.debug(f"Processed key: {validated_key}")
|
|
130
|
+
where,what,who = validated_key
|
|
131
|
+
what = what if what!="" else None
|
|
132
|
+
who = who if who!="" else None
|
|
133
|
+
if where == "" or where=="universe":
|
|
134
|
+
res = handle_universe(what,who)
|
|
135
|
+
elif where in known_projects:
|
|
136
|
+
res = handle_project(where,what,who,{})
|
|
137
|
+
else:
|
|
138
|
+
res = handle_unknown(where,what,who)
|
|
139
|
+
|
|
140
|
+
display(res)
|
|
141
|
+
|
|
142
|
+
|
esgvoc/cli/install.py
ADDED
esgvoc/cli/main.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
|
|
2
|
+
import typer
|
|
3
|
+
from esgvoc.cli.config import app as config_app
|
|
4
|
+
from esgvoc.cli.get import app as get_app
|
|
5
|
+
from esgvoc.cli.status import app as status_app
|
|
6
|
+
from esgvoc.cli.valid import app as valid_app
|
|
7
|
+
from esgvoc.cli.install import app as install_app
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
app = typer.Typer()
|
|
11
|
+
|
|
12
|
+
# Register the subcommands
|
|
13
|
+
app.add_typer(config_app)
|
|
14
|
+
app.add_typer(get_app)
|
|
15
|
+
app.add_typer(status_app)
|
|
16
|
+
app.add_typer(valid_app)
|
|
17
|
+
app.add_typer(install_app)
|
|
18
|
+
|
|
19
|
+
def main():
|
|
20
|
+
app()
|
|
21
|
+
if __name__ == "__main__":
|
|
22
|
+
main()
|
esgvoc/cli/status.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from esgvoc.core import service
|
|
2
|
+
import typer
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
|
|
5
|
+
app = typer.Typer()
|
|
6
|
+
console = Console()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def display(table):
|
|
10
|
+
console = Console(record=True,width=200)
|
|
11
|
+
console.print(table)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command()
|
|
16
|
+
def status():
|
|
17
|
+
"""
|
|
18
|
+
Command to display status
|
|
19
|
+
i.e summary of version of usable ressources (between remote/cached)
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
service.state_service.get_state_summary()
|
|
24
|
+
display(service.state_service.table())
|
|
25
|
+
|
|
26
|
+
|
esgvoc/cli/valid.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import List
|
|
3
|
+
from esgvoc.api.projects import (
|
|
4
|
+
valid_term,
|
|
5
|
+
valid_term_in_collection,
|
|
6
|
+
valid_term_in_project,
|
|
7
|
+
valid_term_in_all_projects
|
|
8
|
+
)
|
|
9
|
+
from esgvoc.api import BasicValidationErrorVisitor
|
|
10
|
+
from requests import logging
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
from sqlmodel import except_
|
|
13
|
+
import typer
|
|
14
|
+
import re
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
|
|
17
|
+
app = typer.Typer()
|
|
18
|
+
console = Console()
|
|
19
|
+
|
|
20
|
+
_LOGGER = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
@app.command()
|
|
23
|
+
def valid(
|
|
24
|
+
strings_targets: List[str] = typer.Argument(
|
|
25
|
+
...,
|
|
26
|
+
help=(
|
|
27
|
+
"Pairs of strings to validate against a key in the form '<StringToValidate> <Project:Collection:Term>'.\n"
|
|
28
|
+
"Multiple pairs can be provided. The key '<Project:Collection:Term>' consists of three parts:\n"
|
|
29
|
+
"- 'Project' (optional)\n"
|
|
30
|
+
"- 'Collection' (optional)\n"
|
|
31
|
+
"- 'Term' (optional)\n"
|
|
32
|
+
"Only the ':' separators are mandatory. For example:\n"
|
|
33
|
+
" - 'my_string ::'\n"
|
|
34
|
+
" - 'my_string Project::'\n"
|
|
35
|
+
" - 'my_string Project:Collection:'\n"
|
|
36
|
+
" - 'my_string Project:Collection:Term'\n"
|
|
37
|
+
"The function validates based on the provided parts."
|
|
38
|
+
)
|
|
39
|
+
),
|
|
40
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results")
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Validates one or more strings against specified Project:Collection:Term configurations.
|
|
44
|
+
|
|
45
|
+
Depending on the provided key structure, the function performs different validation operations:
|
|
46
|
+
- If all are None (e.g., "::"), validates the term across all projects (`valid_term_in_all_projects`).
|
|
47
|
+
- If Term is None (e.g., "Project:Collection:"), validates the term in the specified collection (`valid_term_in_collection`).
|
|
48
|
+
- If Term and Collection are None (e.g., "Project::"), validates the term in the specified project (`valid_term_in_project`).
|
|
49
|
+
- If all are specified (e.g., "Project:Collection:Term"), validates the term exactly (`valid_term`).
|
|
50
|
+
|
|
51
|
+
Parameters:
|
|
52
|
+
strings_targets (List[str]): A list of validation pairs, where each pair consists of:
|
|
53
|
+
- A string to validate.
|
|
54
|
+
- A key in the form '<Project:Collection:Term>'.
|
|
55
|
+
Usage :
|
|
56
|
+
Valid one:
|
|
57
|
+
esgvocab valid IPSL cmip6plus:institution_id:ipsl
|
|
58
|
+
esgvocab valid IPSL cmip6plus:institution_id:
|
|
59
|
+
esgvocab valid IPSL cmip6plus::
|
|
60
|
+
esgvocab valid IPSL ::
|
|
61
|
+
|
|
62
|
+
Unvalid one:
|
|
63
|
+
esgvocab valid IPSL_invalid cmip6plus:institution_id:ipsl
|
|
64
|
+
esgvocab valid IPSL cmip6plus:institution_id:isl <= term cant be found
|
|
65
|
+
esgvocab valid IPSL cmip6plus:institutin_id:ispl <= collection cant be found
|
|
66
|
+
esgvocab valid IPSL cmip6pls:institution_id:ispl <= project cant be found
|
|
67
|
+
|
|
68
|
+
Multiple validation for all known projects:
|
|
69
|
+
esgvocab valid IPSL :: IPS ::
|
|
70
|
+
result will be [True, False]
|
|
71
|
+
|
|
72
|
+
esgvocab valid --verbose IPS :: IPSL ::
|
|
73
|
+
result will be
|
|
74
|
+
┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
75
|
+
┃ String ┃ Key ┃ Result ┃ Errors ┃
|
|
76
|
+
┡━━━━━━━━╇━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
|
77
|
+
│ IPS │ :: │ ❌ Invalid │ did not found matching term │
|
|
78
|
+
│ IPSL │ :: │ ✅ Valid │ None │
|
|
79
|
+
└────────┴─────┴────────────┴─────────────────────────────┘
|
|
80
|
+
Returns:
|
|
81
|
+
List[bool]: Validation results for each pair in the input.
|
|
82
|
+
"""
|
|
83
|
+
results = []
|
|
84
|
+
detailed_results = []
|
|
85
|
+
|
|
86
|
+
# Combine string and target into pairs
|
|
87
|
+
pairs = [strings_targets[i] + " " + strings_targets[i + 1] for i in range(0, len(strings_targets), 2)]
|
|
88
|
+
|
|
89
|
+
# Validate each string against each target
|
|
90
|
+
for validation in pairs:
|
|
91
|
+
match = re.match(r"(.+)\s+([^:]*):([^:]*):([^:]*)", validation)
|
|
92
|
+
if not match:
|
|
93
|
+
console.print(f"[red]Invalid input format: {validation}[/red]")
|
|
94
|
+
results.append(False)
|
|
95
|
+
detailed_results.append({"validation": validation, "errors": ["Invalid input format"]})
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
string_to_validate, project, collection, term = match.groups()
|
|
99
|
+
exception_message= None
|
|
100
|
+
try:
|
|
101
|
+
# Perform the appropriate validation
|
|
102
|
+
if project and collection and term:
|
|
103
|
+
validation_result = valid_term(string_to_validate, project, collection, term)
|
|
104
|
+
elif project and collection:
|
|
105
|
+
validation_result = valid_term_in_collection(string_to_validate, project, collection)
|
|
106
|
+
elif project:
|
|
107
|
+
validation_result = valid_term_in_project(string_to_validate, project)
|
|
108
|
+
else:
|
|
109
|
+
validation_result = valid_term_in_all_projects(string_to_validate)
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
validation_result=False
|
|
113
|
+
exception_message = repr(e)
|
|
114
|
+
|
|
115
|
+
# Handle validation result
|
|
116
|
+
|
|
117
|
+
if validation_result:
|
|
118
|
+
results.append(True)
|
|
119
|
+
detailed_results.append({"validation": validation, "errors": []})
|
|
120
|
+
else:
|
|
121
|
+
# Parse and collect errors for verbose mode
|
|
122
|
+
if validation_result == []:
|
|
123
|
+
detailed_results.append({"validation":validation, "errors":["did not found matching term"]})
|
|
124
|
+
|
|
125
|
+
results.append(False)
|
|
126
|
+
if project and collection and term and exception_message is None:
|
|
127
|
+
visitor = BasicValidationErrorVisitor()
|
|
128
|
+
errors = [error.accept(visitor) for error in validation_result.errors]
|
|
129
|
+
detailed_results.append({"validation": validation, "errors": errors})
|
|
130
|
+
if exception_message is not None:
|
|
131
|
+
detailed_results.append({"validation": validation, "errors": [exception_message]})
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# Output results
|
|
135
|
+
if verbose:
|
|
136
|
+
table = Table(title="Validation Results")
|
|
137
|
+
table.add_column("String", style="cyan")
|
|
138
|
+
table.add_column("Key", style="magenta")
|
|
139
|
+
table.add_column("Result", style="green" if all(results) else "red")
|
|
140
|
+
table.add_column("Errors", style="red")
|
|
141
|
+
|
|
142
|
+
for detail in detailed_results:
|
|
143
|
+
validation = detail["validation"]
|
|
144
|
+
validation_parts = validation.split()
|
|
145
|
+
string = validation_parts[0]
|
|
146
|
+
key = validation_parts[1] if len(validation_parts) > 1 else "::"
|
|
147
|
+
result = "✅ Valid" if detail["errors"] == [] else "❌ Invalid"
|
|
148
|
+
print(detail)
|
|
149
|
+
errors = "\n".join(detail["errors"]) if detail["errors"] else "None"
|
|
150
|
+
table.add_row(string, key, result, errors)
|
|
151
|
+
|
|
152
|
+
console.print(table)
|
|
153
|
+
else:
|
|
154
|
+
console.print(results)
|
|
155
|
+
|
|
156
|
+
return results
|
esgvoc/core/constants.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
DIRNAME_AND_FILENAME_SEPARATOR = "_"
|
|
2
|
+
PROJECT_SPECS_FILENAME = "project_specs.json"
|
|
3
|
+
PROJECT_ID_JSON_KEY = "project_id"
|
|
4
|
+
CONTEXT_FILENAME = "000_context.jsonld"
|
|
5
|
+
CONTEXT_JSON_KEY = "@context"
|
|
6
|
+
TERM_ID_JSON_KEY = 'id'
|
|
7
|
+
COMPOSITE_PARTS_JSON_KEY = 'parts'
|
|
8
|
+
COMPOSITE_SEPARATOR_JSON_KEY = 'separator'
|
|
9
|
+
PATTERN_JSON_KEY = 'regex'
|
|
10
|
+
TERM_TYPE_JSON_KEY = 'type'
|
|
11
|
+
DRS_SPECS_JSON_KEY = 'drs_name'
|
|
12
|
+
SQLITE_FIRST_PK = 1
|
|
13
|
+
DATA_DESCRIPTOR_JSON_KEY = "@base"
|
esgvoc/core/convert.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from functools import cached_property
|
|
6
|
+
from typing import Any, Optional, Dict
|
|
7
|
+
import requests
|
|
8
|
+
from pyld import jsonld
|
|
9
|
+
from pydantic import BaseModel, model_validator, ConfigDict
|
|
10
|
+
|
|
11
|
+
from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
|
|
12
|
+
# Configure logging
|
|
13
|
+
_LOGGER = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
mapping = DATA_DESCRIPTOR_CLASS_MAPPING
|
|
16
|
+
|
|
17
|
+
def unified_document_loader(uri: str) -> Dict:
|
|
18
|
+
"""Load a document from a local file or a remote URI."""
|
|
19
|
+
if uri.startswith(("http://", "https://")):
|
|
20
|
+
response = requests.get(uri, headers={"accept": "application/json"}, verify=False)
|
|
21
|
+
if response.status_code == 200:
|
|
22
|
+
return response.json()
|
|
23
|
+
else:
|
|
24
|
+
_LOGGER.error(f"Failed to fetch remote document: {response.status_code} - {response.text}")
|
|
25
|
+
return {}
|
|
26
|
+
else:
|
|
27
|
+
with open(uri, "r") as f:
|
|
28
|
+
return json.load(f)
|
|
29
|
+
|
|
30
|
+
class JsonLdResource(BaseModel):
|
|
31
|
+
uri: str
|
|
32
|
+
local_path: Optional[str] = None
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
35
|
+
|
|
36
|
+
@model_validator(mode="before")
|
|
37
|
+
@classmethod
|
|
38
|
+
def set_local_path(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
39
|
+
"""Set the local path to an absolute path if provided."""
|
|
40
|
+
local_path = values.get("local_path")
|
|
41
|
+
if local_path:
|
|
42
|
+
values["local_path"] = os.path.abspath(local_path) + "/"
|
|
43
|
+
jsonld.set_document_loader(lambda uri,options:{
|
|
44
|
+
"contextUrl": None, # No special context URL
|
|
45
|
+
"documentUrl": uri, # The document's actual URL
|
|
46
|
+
"document": unified_document_loader(uri), # The parsed JSON-LD document
|
|
47
|
+
})
|
|
48
|
+
return values
|
|
49
|
+
|
|
50
|
+
@cached_property
|
|
51
|
+
def json_dict(self) -> Dict:
|
|
52
|
+
"""Fetch the original JSON data."""
|
|
53
|
+
_LOGGER.debug(f"Fetching JSON data from {self.uri}")
|
|
54
|
+
return unified_document_loader(self.uri)
|
|
55
|
+
|
|
56
|
+
@cached_property
|
|
57
|
+
def expanded(self) -> Any:
|
|
58
|
+
"""Expand the JSON-LD data."""
|
|
59
|
+
_LOGGER.debug(f"Expanding JSON-LD data for {self.uri}")
|
|
60
|
+
return jsonld.expand(self.uri, options={"base": self.uri})
|
|
61
|
+
|
|
62
|
+
@cached_property
|
|
63
|
+
def context(self) -> Dict:
|
|
64
|
+
"""Fetch and return the JSON content of the '@context'."""
|
|
65
|
+
|
|
66
|
+
context_data =JsonLdResource(uri="/".join(self.uri.split("/")[:-1])+"/"+self.json_dict["@context"])
|
|
67
|
+
# Works only in relative path declaration
|
|
68
|
+
|
|
69
|
+
context_value = context_data.json_dict
|
|
70
|
+
if isinstance(context_value, str):
|
|
71
|
+
# It's a URI, fetch it
|
|
72
|
+
_LOGGER.info(f"Fetching context from URI: {context_value}")
|
|
73
|
+
return unified_document_loader(context_value)
|
|
74
|
+
elif isinstance(context_value, dict):
|
|
75
|
+
# Embedded context
|
|
76
|
+
_LOGGER.info("Using embedded context.")
|
|
77
|
+
return context_value
|
|
78
|
+
else:
|
|
79
|
+
_LOGGER.warning("No valid '@context' found.")
|
|
80
|
+
return {}
|
|
81
|
+
|
|
82
|
+
@cached_property
|
|
83
|
+
def normalized(self) -> str:
|
|
84
|
+
"""Normalize the JSON-LD data."""
|
|
85
|
+
_LOGGER.info(f"Normalizing JSON-LD data for {self.uri}")
|
|
86
|
+
return jsonld.normalize(
|
|
87
|
+
self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
@cached_property
|
|
91
|
+
def python(self) -> Optional[Any]:
|
|
92
|
+
"""Map the data to a Pydantic model based on URI."""
|
|
93
|
+
_LOGGER.info(f"Mapping data to a Pydantic model for {self.uri}")
|
|
94
|
+
model_key = self._extract_model_key(self.uri)
|
|
95
|
+
if model_key and model_key in mapping:
|
|
96
|
+
model = mapping[model_key]
|
|
97
|
+
return model(**self.json_dict)
|
|
98
|
+
_LOGGER.warning(f"No matching model found for key: {model_key}")
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
def _extract_model_key(self, uri: str) -> Optional[str]:
|
|
102
|
+
"""Extract a model key from the URI."""
|
|
103
|
+
parts = uri.strip("/").split("/")
|
|
104
|
+
if len(parts) >= 2:
|
|
105
|
+
return parts[-2]
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def info(self) -> str:
|
|
110
|
+
"""Return a detailed summary of the data."""
|
|
111
|
+
res = f"{'#' * 100}\n"
|
|
112
|
+
res += f"### {self.uri.split('/')[-1]} ###\n"
|
|
113
|
+
res += f"JSON Version:\n {json.dumps(self.json_dict, indent=2)}\n"
|
|
114
|
+
res += f"URI: {self.uri}\n"
|
|
115
|
+
res += f"JSON Version:\n {json.dumps(self.json_dict, indent=2)}\n"
|
|
116
|
+
res += f"Expanded Version:\n {json.dumps(self.expanded, indent=2)}\n"
|
|
117
|
+
res += f"Normalized Version:\n {self.normalized}\n"
|
|
118
|
+
res += f"Pydantic Model Instance:\n {self.python}\n"
|
|
119
|
+
return res
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
if __name__ == "__main__":
|
|
123
|
+
## For Universe
|
|
124
|
+
#online
|
|
125
|
+
# d = Data(uri = "https://espri-mod.github.io/mip-cmor-tables/activity/cmip.json")
|
|
126
|
+
# print(d.info)
|
|
127
|
+
#offline
|
|
128
|
+
#print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
|
|
129
|
+
## for Project
|
|
130
|
+
#d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
|
|
131
|
+
#print(d.info)
|
|
132
|
+
#offline
|
|
133
|
+
print(JsonLdResource(uri = ".cache/repos/CMIP6Plus_CVs/activity_id/cmip.json").info)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import json
|
|
3
|
+
from sqlalchemy import Engine
|
|
4
|
+
from sqlmodel import Session, create_engine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DBConnection:
|
|
8
|
+
SQLITE_URL_PREFIX = 'sqlite://'
|
|
9
|
+
def __init__(self, db_file_path: Path, echo: bool = False) -> None:
|
|
10
|
+
self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
|
|
11
|
+
self.name = db_file_path.stem
|
|
12
|
+
self.file_path = db_file_path.absolute()
|
|
13
|
+
|
|
14
|
+
def set_echo(self, echo: bool) -> None:
|
|
15
|
+
self.engine.echo = echo
|
|
16
|
+
|
|
17
|
+
def get_engine(self) -> Engine:
|
|
18
|
+
return self.engine
|
|
19
|
+
|
|
20
|
+
def create_session(self) -> Session:
|
|
21
|
+
return Session(self.engine)
|
|
22
|
+
|
|
23
|
+
def get_name(self) -> str|None:
|
|
24
|
+
return self.name
|
|
25
|
+
|
|
26
|
+
def get_file_path(self) -> Path:
|
|
27
|
+
return self.file_path
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def read_json_file(json_file_path: Path) -> dict:
|
|
31
|
+
return json.loads(json_file_path.read_text())
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
from sqlmodel import Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TermKind(Enum):
|
|
7
|
+
PLAIN = "plain"
|
|
8
|
+
PATTERN = "pattern"
|
|
9
|
+
COMPOSITE = "composite"
|
|
10
|
+
MIXED = 'mixed'
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PkMixin:
|
|
14
|
+
pk: int | None = Field(default=None, primary_key=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class IdMixin:
|
|
18
|
+
id: str = Field(index=True)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy.dialects.sqlite import JSON
|
|
6
|
+
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
|
+
|
|
8
|
+
import esgvoc.core.db.connection as db
|
|
9
|
+
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
10
|
+
|
|
11
|
+
_LOGGER = logging.getLogger("project_db_creation")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Project(SQLModel, PkMixin, IdMixin, table=True):
|
|
15
|
+
__tablename__ = "projects"
|
|
16
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
17
|
+
git_hash: str
|
|
18
|
+
collections: list["Collection"] = Relationship(back_populates="project")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Collection(SQLModel, PkMixin, IdMixin, table=True):
|
|
22
|
+
__tablename__ = "collections"
|
|
23
|
+
data_descriptor_id: str = Field(index=True)
|
|
24
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
25
|
+
project_pk: int | None = Field(default=None, foreign_key="projects.pk")
|
|
26
|
+
project: Project = Relationship(back_populates="collections")
|
|
27
|
+
terms: list["PTerm"] = Relationship(back_populates="collection")
|
|
28
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
32
|
+
__tablename__ = "pterms"
|
|
33
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
34
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
35
|
+
collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
|
|
36
|
+
collection: Collection = Relationship(back_populates="terms")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def create_drs_name_index():
|
|
40
|
+
PTerm.__table_args__ = sa.Index(
|
|
41
|
+
"drs_name_index", PTerm.__table__.c.specs["drs_name"]
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def project_create_db(db_file_path: Path):
|
|
46
|
+
try:
|
|
47
|
+
connection = db.DBConnection(db_file_path)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
msg = f'Unable to create SQlite file at {db_file_path}. Abort.'
|
|
50
|
+
_LOGGER.fatal(msg)
|
|
51
|
+
raise RuntimeError(msg) from e
|
|
52
|
+
try:
|
|
53
|
+
tables_to_be_created = [SQLModel.metadata.tables['projects'],
|
|
54
|
+
SQLModel.metadata.tables['collections'],
|
|
55
|
+
SQLModel.metadata.tables['pterms']]
|
|
56
|
+
create_drs_name_index()
|
|
57
|
+
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
|
|
60
|
+
_LOGGER.fatal(msg)
|
|
61
|
+
raise RuntimeError(msg) from e
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
pass
|