esgvoc 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -0
- esgvoc/api/__init__.py +62 -0
- esgvoc/api/_utils.py +39 -0
- esgvoc/api/data_descriptors/__init__.py +60 -0
- esgvoc/api/data_descriptors/activity.py +51 -0
- esgvoc/api/data_descriptors/consortium.py +66 -0
- esgvoc/api/data_descriptors/date.py +48 -0
- esgvoc/api/data_descriptors/experiment.py +60 -0
- esgvoc/api/data_descriptors/forcing_index.py +47 -0
- esgvoc/api/data_descriptors/frequency.py +45 -0
- esgvoc/api/data_descriptors/grid_label.py +46 -0
- esgvoc/api/data_descriptors/initialisation_index.py +46 -0
- esgvoc/api/data_descriptors/institution.py +58 -0
- esgvoc/api/data_descriptors/license.py +47 -0
- esgvoc/api/data_descriptors/mip_era.py +46 -0
- esgvoc/api/data_descriptors/model_component.py +47 -0
- esgvoc/api/data_descriptors/organisation.py +42 -0
- esgvoc/api/data_descriptors/physic_index.py +47 -0
- esgvoc/api/data_descriptors/product.py +45 -0
- esgvoc/api/data_descriptors/realisation_index.py +46 -0
- esgvoc/api/data_descriptors/realm.py +44 -0
- esgvoc/api/data_descriptors/resolution.py +46 -0
- esgvoc/api/data_descriptors/source.py +57 -0
- esgvoc/api/data_descriptors/source_type.py +43 -0
- esgvoc/api/data_descriptors/sub_experiment.py +43 -0
- esgvoc/api/data_descriptors/table.py +50 -0
- esgvoc/api/data_descriptors/time_range.py +28 -0
- esgvoc/api/data_descriptors/variable.py +77 -0
- esgvoc/api/data_descriptors/variant_label.py +49 -0
- esgvoc/api/projects.py +854 -0
- esgvoc/api/report.py +86 -0
- esgvoc/api/search.py +92 -0
- esgvoc/api/universe.py +218 -0
- esgvoc/apps/drs/__init__.py +16 -0
- esgvoc/apps/drs/models.py +43 -0
- esgvoc/apps/drs/parser.py +27 -0
- esgvoc/cli/config.py +79 -0
- esgvoc/cli/get.py +142 -0
- esgvoc/cli/install.py +14 -0
- esgvoc/cli/main.py +22 -0
- esgvoc/cli/status.py +26 -0
- esgvoc/cli/valid.py +156 -0
- esgvoc/core/constants.py +13 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +133 -0
- esgvoc/core/db/__init__.py +5 -0
- esgvoc/core/db/connection.py +31 -0
- esgvoc/core/db/models/mixins.py +18 -0
- esgvoc/core/db/models/project.py +65 -0
- esgvoc/core/db/models/universe.py +59 -0
- esgvoc/core/db/project_ingestion.py +152 -0
- esgvoc/core/db/universe_ingestion.py +120 -0
- esgvoc/core/logging.conf +21 -0
- esgvoc/core/logging_handler.py +4 -0
- esgvoc/core/repo_fetcher.py +259 -0
- esgvoc/core/service/__init__.py +8 -0
- esgvoc/core/service/data_merger.py +83 -0
- esgvoc/core/service/esg_voc.py +79 -0
- esgvoc/core/service/settings.py +64 -0
- esgvoc/core/service/settings.toml +12 -0
- esgvoc/core/service/settings_default.toml +20 -0
- esgvoc/core/service/state.py +222 -0
- esgvoc-0.1.2.dist-info/METADATA +54 -0
- esgvoc-0.1.2.dist-info/RECORD +66 -0
- esgvoc-0.1.2.dist-info/WHEEL +4 -0
- esgvoc-0.1.2.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Dict, List, Set
|
|
3
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def merge_dicts(original: list, custom: list) -> dict:
|
|
10
|
+
"""Shallow merge: Overwrites original data with custom data."""
|
|
11
|
+
b = original[0]
|
|
12
|
+
a = custom[0]
|
|
13
|
+
merged = {**{k: v for k, v in a.items() if k != "@id"}, **{k: v for k, v in b.items() if k != "@id"}}
|
|
14
|
+
return merged
|
|
15
|
+
|
|
16
|
+
def merge(uri:str)->Dict:
|
|
17
|
+
mdm = DataMerger(data=JsonLdResource(uri=uri))
|
|
18
|
+
return mdm.merge_linked_json()[-1]
|
|
19
|
+
|
|
20
|
+
class DataMerger:
|
|
21
|
+
def __init__(self, data: JsonLdResource, allowed_base_uris: Set[str]={"https://espri-mod.github.io/mip-cmor-tables"}, locally_available:dict = {}):
|
|
22
|
+
self.data = data
|
|
23
|
+
self.allowed_base_uris = allowed_base_uris
|
|
24
|
+
self.locally_available = locally_available
|
|
25
|
+
|
|
26
|
+
def _should_resolve(self, uri: str) -> bool:
|
|
27
|
+
"""Check if a given URI should be resolved based on allowed URIs."""
|
|
28
|
+
return any(uri.startswith(base) for base in self.allowed_base_uris)
|
|
29
|
+
|
|
30
|
+
def _get_next_id(self, data: dict) -> str | None:
|
|
31
|
+
"""Extract the next @id from the data if it is a valid customization reference."""
|
|
32
|
+
if isinstance(data,list):
|
|
33
|
+
data = data[0]
|
|
34
|
+
if "@id" in data and self._should_resolve(data["@id"]):
|
|
35
|
+
return data["@id"] + ".json"
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def merge_linked_json(self) -> List[Dict]:
|
|
39
|
+
"""Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
|
|
40
|
+
result_list = [self.data.json_dict] # Start with the original json object
|
|
41
|
+
visited = set(self.data.uri) # Track visited URIs to prevent cycles
|
|
42
|
+
current_data = self.data
|
|
43
|
+
#print(current_data.expanded)
|
|
44
|
+
|
|
45
|
+
while True:
|
|
46
|
+
next_id = self._get_next_id(current_data.expanded[0])
|
|
47
|
+
if not next_id or next_id in visited or not self._should_resolve(next_id):
|
|
48
|
+
break
|
|
49
|
+
visited.add(next_id)
|
|
50
|
+
|
|
51
|
+
# Fetch and merge the next customization
|
|
52
|
+
#do we have it in local ? if so use it instead of remote
|
|
53
|
+
for local_repo in self.locally_available.keys():
|
|
54
|
+
if next_id.startswith(local_repo):
|
|
55
|
+
next_id = next_id.replace(local_repo,self.locally_available[local_repo])
|
|
56
|
+
next_data_instance = JsonLdResource(uri=next_id)
|
|
57
|
+
merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
|
|
58
|
+
next_data_instance.json_dict = merged_json_data
|
|
59
|
+
|
|
60
|
+
# Add the merged instance to the result list
|
|
61
|
+
result_list.append(merged_json_data)
|
|
62
|
+
current_data = next_data_instance
|
|
63
|
+
|
|
64
|
+
return result_list
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
import warnings
|
|
68
|
+
warnings.simplefilter("ignore")
|
|
69
|
+
|
|
70
|
+
# test from institution_id ipsl exapnd and merge with institution ipsl
|
|
71
|
+
# proj_ipsl = JsonLdResource(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/institution_id/ipsl.json")
|
|
72
|
+
# allowed_uris = {"https://espri-mod.github.io/CMIP6Plus_CVs/","https://espri-mod.github.io/mip-cmor-tables/"}
|
|
73
|
+
# mdm = DataMerger(data =proj_ipsl, allowed_base_uris = allowed_uris)
|
|
74
|
+
# json_list = mdm.merge_linked_json()
|
|
75
|
+
#
|
|
76
|
+
# pprint([res for res in json_list])
|
|
77
|
+
|
|
78
|
+
# a = JsonLdResource(uri = ".cache/repos/CMIP6Plus_CVs/institution_id/ipsl.json")
|
|
79
|
+
# mdm = DataMerger(data=a)
|
|
80
|
+
# print(mdm.merge_linked_json())
|
|
81
|
+
#
|
|
82
|
+
#
|
|
83
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from rich.logging import RichHandler
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
import shutil
|
|
7
|
+
import esgvoc.core.service as service
|
|
8
|
+
|
|
9
|
+
_LOGGER = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
rich_handler = RichHandler(rich_tracebacks=True)
|
|
12
|
+
_LOGGER.addHandler(rich_handler)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def reset_init_repo():
|
|
16
|
+
service_settings = service.service_settings
|
|
17
|
+
if (service_settings.universe.local_path) and os.path.exists(service_settings.universe.local_path):
|
|
18
|
+
shutil.rmtree(service_settings.universe.local_path)
|
|
19
|
+
|
|
20
|
+
for _, proj in service_settings.projects.items():
|
|
21
|
+
if (proj.local_path) and os.path.exists(proj.local_path):
|
|
22
|
+
shutil.rmtree(proj.local_path)
|
|
23
|
+
service.state_service.get_state_summary()
|
|
24
|
+
|
|
25
|
+
def reset_init_db():
|
|
26
|
+
service_settings = service.service_settings
|
|
27
|
+
if (service_settings.universe.db_path) and os.path.exists(service_settings.universe.db_path):
|
|
28
|
+
os.remove(service_settings.universe.db_path)
|
|
29
|
+
for _, proj in service_settings.projects.items():
|
|
30
|
+
if (proj.db_path) and os.path.exists(proj.db_path):
|
|
31
|
+
os.remove(proj.db_path)
|
|
32
|
+
service.state_service.get_state_summary()
|
|
33
|
+
|
|
34
|
+
def reset_init_all():
|
|
35
|
+
reset_init_db()
|
|
36
|
+
reset_init_repo()
|
|
37
|
+
|
|
38
|
+
def display(table):
|
|
39
|
+
console = Console(record=True,width=200)
|
|
40
|
+
console.print(table)
|
|
41
|
+
|
|
42
|
+
def install():
|
|
43
|
+
service.state_service.synchronize_all()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
|
|
52
|
+
def Nothing(): # IT WORKS
|
|
53
|
+
reset_init_all()
|
|
54
|
+
display(service.state_service.table())
|
|
55
|
+
service.state_service.universe.sync()
|
|
56
|
+
display(service.state_service.table())
|
|
57
|
+
for _,proj in service.state_service.projects.items():
|
|
58
|
+
proj.sync()
|
|
59
|
+
display(service.state_service.table())
|
|
60
|
+
|
|
61
|
+
def OnlyLocal(): #IT ALSO WORKS
|
|
62
|
+
reset_init_db()
|
|
63
|
+
service.state_service.universe.github_access = False
|
|
64
|
+
for _,proj in service.state_service.projects.items():
|
|
65
|
+
proj.github_access = False
|
|
66
|
+
display(service.state_service.table())
|
|
67
|
+
|
|
68
|
+
service.state_service.universe.sync()
|
|
69
|
+
display(service.state_service.table())
|
|
70
|
+
for _,proj in service.state_service.projects.items():
|
|
71
|
+
proj.sync()
|
|
72
|
+
display(service.state_service.table())
|
|
73
|
+
|
|
74
|
+
# TODO Some other test to do to be complete:
|
|
75
|
+
# Change the settings ... for now .. let say nobody change the settings !
|
|
76
|
+
|
|
77
|
+
OnlyLocal()
|
|
78
|
+
# service.state_service.synchronize_all()
|
|
79
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import toml
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
SETTINGS_FILE = Path(__file__) / "settings.toml"
|
|
8
|
+
|
|
9
|
+
class ProjectSettings(BaseModel):
|
|
10
|
+
project_name: str
|
|
11
|
+
github_repo: str
|
|
12
|
+
branch: Optional[str] = "main"
|
|
13
|
+
local_path: Optional[str] = None
|
|
14
|
+
db_path: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
class UniverseSettings(BaseModel):
|
|
17
|
+
github_repo: str
|
|
18
|
+
branch: Optional[str] = None
|
|
19
|
+
local_path: Optional[str] = None
|
|
20
|
+
db_path: Optional[str] = None
|
|
21
|
+
|
|
22
|
+
class ServiceSettings(BaseModel):
|
|
23
|
+
universe: UniverseSettings
|
|
24
|
+
projects: Dict[str, ProjectSettings] = Field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def load_from_file(cls, file_path: str) -> "ServiceSettings":
|
|
28
|
+
data = toml.load(file_path)
|
|
29
|
+
projects = {p['project_name']: ProjectSettings(**p) for p in data.pop('projects', [])}
|
|
30
|
+
return cls(universe=UniverseSettings(**data['universe']), projects=projects)
|
|
31
|
+
|
|
32
|
+
def save_to_file(self, file_path: str):
|
|
33
|
+
data = {
|
|
34
|
+
"universe": self.universe.model_dump(),
|
|
35
|
+
"projects": [p.model_dump() for p in self.projects.values()]
|
|
36
|
+
}
|
|
37
|
+
with open(file_path, "w") as f:
|
|
38
|
+
toml.dump(data, f)
|
|
39
|
+
|
|
40
|
+
def load_settings() -> ServiceSettings:
|
|
41
|
+
"""Load the settings from the TOML file."""
|
|
42
|
+
if SETTINGS_FILE.exists():
|
|
43
|
+
return ServiceSettings.load_from_file(str(SETTINGS_FILE))
|
|
44
|
+
else:
|
|
45
|
+
default_settings = ServiceSettings(
|
|
46
|
+
universe=UniverseSettings(
|
|
47
|
+
github_repo="https://github.com/ESPRI-Mod/mip-cmor-tables",
|
|
48
|
+
branch="uni_proj_ld",
|
|
49
|
+
local_path=".cache/repos/mip-cmor-tables",
|
|
50
|
+
db_path=".cache/dbs/universe.sqlite"
|
|
51
|
+
),
|
|
52
|
+
projects={"cmip6plus":ProjectSettings(
|
|
53
|
+
project_name="CMIP6Plus_CVs",
|
|
54
|
+
github_repo="https://github.com/ESPRI-Mod/CMIP6Plus_CVs",
|
|
55
|
+
branch="uni_proj_ld",
|
|
56
|
+
local_path=".cache/repos/CMIP6Plus_CVs",
|
|
57
|
+
db_path=".cache/dbs/cmip6plus.sqlite"
|
|
58
|
+
)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
default_settings.save_to_file(str(SETTINGS_FILE))
|
|
64
|
+
return default_settings
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
[[projects]]
|
|
2
|
+
project_name = "cmip6plus"
|
|
3
|
+
github_repo = "https://github.com/ESPRI-Mod/CMIP6Plus_CVs"
|
|
4
|
+
branch = "uni_proj_ld"
|
|
5
|
+
local_path = ".cache/repos/CMIP6Plus_CVs"
|
|
6
|
+
db_path = ".cache/dbs/cmip6plus.sqlite"
|
|
7
|
+
|
|
8
|
+
[universe]
|
|
9
|
+
github_repo = "https://github.com/ESPRI-Mod/mip-cmor-tables"
|
|
10
|
+
branch = "uni_proj_ld"
|
|
11
|
+
local_path = ".cache/repos/mip-cmor-tables"
|
|
12
|
+
db_path = ".cache/dbs/universe.sqlite"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
# [[projects]]
|
|
3
|
+
# project_name = "TestProject"
|
|
4
|
+
# github_repo = "XXX"
|
|
5
|
+
# branch = "XXX"
|
|
6
|
+
# local_path = "XXX"
|
|
7
|
+
# db_path = "./cache/dbs/XXXX.sqlite"
|
|
8
|
+
|
|
9
|
+
[[projects]]
|
|
10
|
+
project_name = "cmip6plus"
|
|
11
|
+
github_repo = "https://github.com/ESPRI-Mod/CMIP6Plus_CVs"
|
|
12
|
+
branch = "uni_proj_ld"
|
|
13
|
+
local_path = ".cache/repos/CMIP6Plus_CVs"
|
|
14
|
+
db_path = ".cache/dbs/cmip6plus.sqlite"
|
|
15
|
+
|
|
16
|
+
[universe]
|
|
17
|
+
github_repo = "https://github.com/ESPRI-Mod/mip-cmor-tables"
|
|
18
|
+
branch = "uni_proj_ld"
|
|
19
|
+
local_path = ".cache/repos/mip-cmor-tables"
|
|
20
|
+
db_path = ".cache/dbs/universe.sqlite"
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from esgvoc.core.repo_fetcher import RepoFetcher
|
|
7
|
+
from esgvoc.core.service.settings import UniverseSettings, ProjectSettings, ServiceSettings
|
|
8
|
+
from esgvoc.core.db.connection import DBConnection
|
|
9
|
+
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
from sqlalchemy.exc import NoResultFound
|
|
12
|
+
from sqlmodel import select
|
|
13
|
+
from esgvoc.core.db.models.universe import Universe
|
|
14
|
+
from esgvoc.core.db.models.project import Project
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
class BaseState:
|
|
19
|
+
def __init__(self, github_repo: str, branch: str = "main", local_path: Optional[str] = None, db_path: Optional[str] = None):
|
|
20
|
+
|
|
21
|
+
self.github_repo = github_repo
|
|
22
|
+
self.branch = branch
|
|
23
|
+
self.github_access = True # False if we dont have internet and some other cases
|
|
24
|
+
self.github_version = None
|
|
25
|
+
|
|
26
|
+
self.local_path = local_path
|
|
27
|
+
self.local_access = True # False if we dont have cloned the remote repo yet
|
|
28
|
+
self.local_version = None
|
|
29
|
+
|
|
30
|
+
self.db_path = db_path
|
|
31
|
+
self.db_access = True # False if we cant access the db for some reason
|
|
32
|
+
self.db_version = None
|
|
33
|
+
|
|
34
|
+
self.rf = RepoFetcher()
|
|
35
|
+
self.db_connection:DBConnection|None = None
|
|
36
|
+
self.db_sqlmodel = None
|
|
37
|
+
|
|
38
|
+
def fetch_version_local(self):
|
|
39
|
+
if self.local_path:
|
|
40
|
+
try:
|
|
41
|
+
self.local_version = self.rf.get_local_repo_version(self.local_path, self.branch)
|
|
42
|
+
logger.debug(f"Local repo commit: {self.local_version}")
|
|
43
|
+
self.local_access = True
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.exception(f"Failed to fetch local repo version: {e}")
|
|
46
|
+
self.local_access=False
|
|
47
|
+
|
|
48
|
+
def fetch_version_remote(self):
|
|
49
|
+
if self.github_repo:
|
|
50
|
+
owner = None
|
|
51
|
+
repo = None
|
|
52
|
+
try:
|
|
53
|
+
owner, repo = self.github_repo.lstrip("https://github.com/").split("/")
|
|
54
|
+
self.github_version = self.rf.get_github_version(owner, repo, self.branch)
|
|
55
|
+
self.github_access = True
|
|
56
|
+
logger.debug(f"Latest GitHub commit: {self.github_version}")
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.exception(f"Failed to fetch GitHub version: {e} ,for {self.github_repo},owner : {owner}, repo : {repo},branch : {self.branch}")
|
|
59
|
+
self.github_access = False
|
|
60
|
+
if self.github_version is None:
|
|
61
|
+
self.github_access = False
|
|
62
|
+
|
|
63
|
+
def fetch_version_db(self):
|
|
64
|
+
if self.db_path:
|
|
65
|
+
if not os.path.exists(self.db_path):
|
|
66
|
+
self.db_version = None
|
|
67
|
+
self.db_access = False
|
|
68
|
+
else:
|
|
69
|
+
try:
|
|
70
|
+
self.db_connection =DBConnection(db_file_path= Path(self.db_path))
|
|
71
|
+
with self.db_connection.create_session() as session:
|
|
72
|
+
self.db_version = session.exec(select(self.db_sqlmodel.git_hash)).one()
|
|
73
|
+
self.db_access = True
|
|
74
|
+
except NoResultFound :
|
|
75
|
+
logger.debug(f"Unable to find git_hash in {self.db_path}")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.debug(f"Unable to find git_has in {self.db_path} cause {e}" )
|
|
78
|
+
|
|
79
|
+
else:
|
|
80
|
+
self.db_version = None
|
|
81
|
+
self.db_access = False
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def fetch_versions(self):
|
|
85
|
+
self.fetch_version_remote()
|
|
86
|
+
self.fetch_version_local()
|
|
87
|
+
self.fetch_version_db()
|
|
88
|
+
|
|
89
|
+
def check_sync_status(self):
|
|
90
|
+
self.fetch_versions()
|
|
91
|
+
return {
|
|
92
|
+
"github_local_sync": self.github_version == self.local_version if self.github_access and self.github_version and self.local_version else None,
|
|
93
|
+
"local_db_sync": self.local_version == self.db_version if self.local_access and self.local_version and self.db_version else None,
|
|
94
|
+
|
|
95
|
+
"github_db_sync": self.github_version == self.db_version if self.github_access and self.github_version and self.db_version else None
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def clone_remote(self):
|
|
99
|
+
owner, repo = self.github_repo.lstrip("https://github.com/").split("/")
|
|
100
|
+
#TODO add destination "local_path" in clone_repo
|
|
101
|
+
self.rf.clone_repository(owner, repo, self.branch)
|
|
102
|
+
self.fetch_version_local()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def build_db(self):
|
|
106
|
+
from esgvoc.core.db.project_ingestion import ingest_project
|
|
107
|
+
from esgvoc.core.db.universe_ingestion import ingest_metadata_universe
|
|
108
|
+
from esgvoc.core.db.models.project import project_create_db
|
|
109
|
+
from esgvoc.core.db.models.universe import universe_create_db
|
|
110
|
+
from esgvoc.core.db.universe_ingestion import ingest_universe
|
|
111
|
+
|
|
112
|
+
if self.db_path :
|
|
113
|
+
if os.path.exists(self.db_path):
|
|
114
|
+
os.remove(self.db_path)
|
|
115
|
+
else:
|
|
116
|
+
os.makedirs(Path(self.db_path).parent,exist_ok=True)
|
|
117
|
+
|
|
118
|
+
if self.db_sqlmodel == Universe: # Ugly
|
|
119
|
+
universe_create_db(Path(self.db_path))
|
|
120
|
+
ingest_metadata_universe(DBConnection(Path(self.db_path)),self.local_version)
|
|
121
|
+
ingest_universe(Path(self.local_path), Path(self.db_path))
|
|
122
|
+
|
|
123
|
+
elif self.db_sqlmodel == Project:
|
|
124
|
+
project_create_db(Path(self.db_path))
|
|
125
|
+
ingest_project(Path(self.local_path),Path(self.db_path),self.local_version)
|
|
126
|
+
self.fetch_version_db()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def sync(self):
|
|
134
|
+
summary = self.check_sync_status()
|
|
135
|
+
if self.github_access and summary["github_db_sync"] is None and summary["local_db_sync"]is None and summary["github_local_sync"] is None:
|
|
136
|
+
self.clone_remote()
|
|
137
|
+
self.build_db()
|
|
138
|
+
elif self.github_access and not summary["github_db_sync"]:
|
|
139
|
+
if not summary["local_db_sync"] and summary["local_db_sync"] is not None:
|
|
140
|
+
self.clone_remote()
|
|
141
|
+
self.build_db()
|
|
142
|
+
else: # can be simply build in root and clone if neccessary
|
|
143
|
+
self.build_db()
|
|
144
|
+
elif self.local_access:
|
|
145
|
+
if not summary["local_db_sync"] and summary is not None:
|
|
146
|
+
self.build_db()
|
|
147
|
+
elif not self.db_access: # it can happen if the db is created but not filled
|
|
148
|
+
self.build_db()
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
"""
|
|
152
|
+
if self.github_version and self.github_version != self.local_version:
|
|
153
|
+
owner, repo = self.github_repo.lstrip("https://github.com/").split("/")
|
|
154
|
+
self.rf.clone_repository(owner, repo, self.branch)
|
|
155
|
+
#self.fetch_versions()
|
|
156
|
+
|
|
157
|
+
if self.local_version != self.db_version:
|
|
158
|
+
# delete and redo the DB?
|
|
159
|
+
pass
|
|
160
|
+
"""
|
|
161
|
+
class StateUniverse(BaseState):
|
|
162
|
+
def __init__(self, settings: UniverseSettings):
|
|
163
|
+
super().__init__(**settings.model_dump())
|
|
164
|
+
self.db_sqlmodel=Universe
|
|
165
|
+
|
|
166
|
+
class StateProject(BaseState):
|
|
167
|
+
def __init__(self, settings: ProjectSettings):
|
|
168
|
+
mdict = settings.model_dump()
|
|
169
|
+
self.project_name = mdict.pop("project_name")
|
|
170
|
+
super().__init__(**mdict)
|
|
171
|
+
self.db_sqlmodel = Project
|
|
172
|
+
|
|
173
|
+
class StateService:
|
|
174
|
+
def __init__(self, service_settings: ServiceSettings):
|
|
175
|
+
self.universe= StateUniverse(service_settings.universe)
|
|
176
|
+
self.projects = {name: StateProject(proj) for name, proj in service_settings.projects.items()}
|
|
177
|
+
self.connect_db()
|
|
178
|
+
|
|
179
|
+
def get_state_summary(self):
|
|
180
|
+
universe_status = self.universe.check_sync_status()
|
|
181
|
+
project_statuses = {name: proj.check_sync_status() for name, proj in self.projects.items()}
|
|
182
|
+
return {"universe": universe_status, "projects": project_statuses}
|
|
183
|
+
|
|
184
|
+
def connect_db(self):
|
|
185
|
+
self.universe.fetch_versions()
|
|
186
|
+
for _,proj_state in self.projects.items():
|
|
187
|
+
proj_state.fetch_versions()
|
|
188
|
+
|
|
189
|
+
def synchronize_all(self):
|
|
190
|
+
self.universe.sync()
|
|
191
|
+
for project in self.projects.values():
|
|
192
|
+
project.sync()
|
|
193
|
+
def table(self):
|
|
194
|
+
table = Table(show_header=False, show_lines=True)
|
|
195
|
+
table.add_row("","Remote github repo","Local repository","Cache Database")
|
|
196
|
+
table.add_row("Universe path",self.universe.github_repo,self.universe.local_path,self.universe.db_path)
|
|
197
|
+
table.add_row("Version",self.universe.github_version,self.universe.local_version,self.universe.db_version)
|
|
198
|
+
for proj_name,proj in self.projects.items():
|
|
199
|
+
|
|
200
|
+
#table.add_row("","Remote github repo","Local repository","Cache Database")
|
|
201
|
+
table.add_row(f"{proj_name} path",proj.github_repo,proj.local_path,proj.db_path)
|
|
202
|
+
table.add_row("Version",proj.github_version,proj.local_version,proj.db_version)
|
|
203
|
+
return table
|
|
204
|
+
|
|
205
|
+
if __name__ == "__main__":
|
|
206
|
+
# Load settings from file
|
|
207
|
+
service_settings = ServiceSettings.load_from_file("src/esgvoc/core/service/settings.toml")
|
|
208
|
+
|
|
209
|
+
# Initialize StateService
|
|
210
|
+
state_service = StateService(service_settings)
|
|
211
|
+
state_service.get_state_summary()
|
|
212
|
+
|
|
213
|
+
# Synchronize all
|
|
214
|
+
state_service.synchronize_all()
|
|
215
|
+
|
|
216
|
+
# pprint(state_service.universe.github_version)
|
|
217
|
+
# pprint(state_service.universe.local_version)
|
|
218
|
+
# pprint(state_service.universe.db_version)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# Check for differences
|
|
222
|
+
#pprint(state_service.find_version_differences())
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: esgvoc
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: python library and CLI to interact with WCRP CV
|
|
5
|
+
Project-URL: Repository, https://github.com/ESPRI-Mod/cmipld
|
|
6
|
+
Author-email: TROUSSELLIER Laurent <laurent.troussellier@ipsl.fr>, Gardoll Sébastien <sebastien@gardoll.fr>
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: idna>=3.10
|
|
9
|
+
Requires-Dist: pydantic>=2.9.2
|
|
10
|
+
Requires-Dist: pyld>=2.0.4
|
|
11
|
+
Requires-Dist: requests>=2.32.3
|
|
12
|
+
Requires-Dist: sqlalchemy>=2.0.36
|
|
13
|
+
Requires-Dist: sqlmodel>=0.0.22
|
|
14
|
+
Requires-Dist: toml>=0.10.2
|
|
15
|
+
Requires-Dist: typer>=0.15.0
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Esgvoc Library
|
|
20
|
+
|
|
21
|
+
Esgvoc is a Python library designed to simplify interaction with controlled vocabularies (CVs) used in climate data projects. It supports querying, caching, and validating terms across various CV repositories like the Universe and project-specific repositories (e.g., CMIP6Plus).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Features
|
|
26
|
+
|
|
27
|
+
- **Query controlled vocabularies**:
|
|
28
|
+
- Retrieve terms, collections, or descriptors.
|
|
29
|
+
- Perform cross-validation and search operations.
|
|
30
|
+
|
|
31
|
+
- **Caching**:
|
|
32
|
+
- Download CVs to a local database for offline use.
|
|
33
|
+
- Keep the local cache up-to-date.
|
|
34
|
+
|
|
35
|
+
- **Validation**:
|
|
36
|
+
- Validate strings against CV terms and templates.
|
|
37
|
+
- Supports case-sensitive, wildcard, and approximate matching.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
Esgvoc is available on PyPI. Install it with pip:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install esgvoc
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
in order to get the latest possible CV
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
esgvoc install
|
|
54
|
+
```
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
esgvoc/__init__.py,sha256=kPejgt5LFlzacS1O0vGN5JC7AlpRpWCmGIHUcBr_6-M,35
|
|
2
|
+
esgvoc/api/__init__.py,sha256=yjrmVpHAR5_OChzzWziebIeUYh_6qyXBJx3VUimaFkE,2933
|
|
3
|
+
esgvoc/api/_utils.py,sha256=eIVP7d3jbr7u_LNliLoM2Up3Zl5rcxIw_hjqLdfaxUA,1390
|
|
4
|
+
esgvoc/api/projects.py,sha256=JZuqDTeAv7GcJ4IclyHU1bKWrGdqGafvxQAJ8fgux-8,38629
|
|
5
|
+
esgvoc/api/report.py,sha256=MFMN9oVG9WWRvPtBeQj4o7XDahPu-Fq302Qy2G8Qt2Q,2875
|
|
6
|
+
esgvoc/api/search.py,sha256=JYTegv0ebdm8AqK8V6DccPR88K2vWVtHXnstJmG07xE,3501
|
|
7
|
+
esgvoc/api/universe.py,sha256=YGphsws7iSQZKYfRx4XfiZOuBylc0HYFG2KItsjyVjM,9609
|
|
8
|
+
esgvoc/api/data_descriptors/__init__.py,sha256=K-qzokgt3bdiwSJcnBzAub4Duw87cHTX9VdF-RU-pQ8,2910
|
|
9
|
+
esgvoc/api/data_descriptors/activity.py,sha256=o-RlmAFJkIb9TkUZIjEJaHi9dnQ70GHH5-5bKACRwMI,1371
|
|
10
|
+
esgvoc/api/data_descriptors/consortium.py,sha256=fhAaEqxBpBDSmO7rIya1nnEz9aRyhQQFnIBExcyQxQM,1467
|
|
11
|
+
esgvoc/api/data_descriptors/date.py,sha256=q4YXqaEryrcXO3ndk4SwyKwn56iGuBiJbHs8JSUpXJU,781
|
|
12
|
+
esgvoc/api/data_descriptors/experiment.py,sha256=DCPYhgGWgD7LKgheqreXKGd5_0pQ6a1GnJ17IDt5WXY,1838
|
|
13
|
+
esgvoc/api/data_descriptors/forcing_index.py,sha256=m2SBobNTN8P_TFS-uIwMyqbJ9d6CNr21bzW7qT3MVe0,796
|
|
14
|
+
esgvoc/api/data_descriptors/frequency.py,sha256=XIu1v5NIcaNKj-9K3WNa4jH_Lds5XO5CP4M79nNVuVE,842
|
|
15
|
+
esgvoc/api/data_descriptors/grid_label.py,sha256=i7bZJv_Y1s8gx2TR5D02i3Xify1rMu9lXRAwvYRugEY,846
|
|
16
|
+
esgvoc/api/data_descriptors/initialisation_index.py,sha256=TtxvKO0IaD4ZEzm6LKCqKEaRYFHtLyTKo4UGj4kL9fI,809
|
|
17
|
+
esgvoc/api/data_descriptors/institution.py,sha256=sr4pgTrNIEnl_GcaJeNndsYfYXSMKwFybzZDquD0qdM,1316
|
|
18
|
+
esgvoc/api/data_descriptors/license.py,sha256=4hQiAQoBbecYZbApAzr2mnR8063yL13j7b_VSKgkIhA,933
|
|
19
|
+
esgvoc/api/data_descriptors/mip_era.py,sha256=P7Vyen9uFCoOhh4qooPy-hDJ_jcP-1ISl25cmZBC9KI,824
|
|
20
|
+
esgvoc/api/data_descriptors/model_component.py,sha256=WGsv_bd5TrNeyBx1a6f-wjXRXDMMf5C5hyJtTknjk68,864
|
|
21
|
+
esgvoc/api/data_descriptors/organisation.py,sha256=dyB2l1lYDb3SMCuO4Ptsz9gImooeD3fqaTr6Ih2N_vg,855
|
|
22
|
+
esgvoc/api/data_descriptors/physic_index.py,sha256=S_G-0hrbbGFqWj0xks6kAIwKkjPYRrRCDHaeZqvOdYk,794
|
|
23
|
+
esgvoc/api/data_descriptors/product.py,sha256=rYhmSZy5vPwzZ2ZYIQXkoW5E-FeG8lAS_1lPaU7sDtg,805
|
|
24
|
+
esgvoc/api/data_descriptors/realisation_index.py,sha256=spmDj6fBYfnqFiTJRH_OHJbm9gRmq-QqEvt1UJMXwl8,805
|
|
25
|
+
esgvoc/api/data_descriptors/realm.py,sha256=GAebWcwyR48EyZiuapLD8eZxYEv3oGQ6Cndeho0DVAs,799
|
|
26
|
+
esgvoc/api/data_descriptors/resolution.py,sha256=70eg9lAYGFS7Qws5mL2QwrU9N7tleA0YQp-V5ZIMj0E,841
|
|
27
|
+
esgvoc/api/data_descriptors/source.py,sha256=1wWZY63NXk-8b3io8OYdsaZpVaXHS1d-M1bBhf54Ysc,1898
|
|
28
|
+
esgvoc/api/data_descriptors/source_type.py,sha256=1GRDoFun3ncRLcAhjRvl3OgGL2ieq7Cv8v1_hcI60BU,793
|
|
29
|
+
esgvoc/api/data_descriptors/sub_experiment.py,sha256=Z-GxS96201hIYtDgGnymHpkpKt16xjvk4QQnY_vSjr0,799
|
|
30
|
+
esgvoc/api/data_descriptors/table.py,sha256=bhh-bSx7UYAAETmMAGNOIfwkyvywIhX0HlJh9a2njV0,1009
|
|
31
|
+
esgvoc/api/data_descriptors/time_range.py,sha256=bBVRwmc7NEeGacV-UA0vadQXsyrlwBjyjVoYTw26rYA,630
|
|
32
|
+
esgvoc/api/data_descriptors/variable.py,sha256=2Y6MfDzRvLF3FR5bhhJ6jTmPc2NTZVsoGF1Y2SEGeLU,2145
|
|
33
|
+
esgvoc/api/data_descriptors/variant_label.py,sha256=ExP68t3v1xtjFEjOgVr2NFEOkQVGmf9GzStRlSPmNMU,931
|
|
34
|
+
esgvoc/apps/drs/__init__.py,sha256=uNkxKa7wbUAHlldtrCxTZraJJ2Y9NQEdpWTy-lLrGAY,562
|
|
35
|
+
esgvoc/apps/drs/models.py,sha256=ZWTXkgMKTCvMkmOB2c-l_npl_9ph0qXewa0e5J02mAI,937
|
|
36
|
+
esgvoc/apps/drs/parser.py,sha256=DlCnVn0cguu5_I-gKinw_Hd_gomfET90AqMlZlawVz8,734
|
|
37
|
+
esgvoc/cli/config.py,sha256=JCUppLaMPO6Xa3pZ-RL-meWy4wwTw1hr_IJM3LL1vac,2877
|
|
38
|
+
esgvoc/cli/get.py,sha256=60W-MSrIUvQkAmGB2HXr2qGKYbel-_IKDL5N_uEIr8Y,5122
|
|
39
|
+
esgvoc/cli/install.py,sha256=tJl9Z9UoBbRXK8dMAehTTHfH9aT6fAqtEX7tt5Wl0F8,236
|
|
40
|
+
esgvoc/cli/main.py,sha256=jXJ9rNzI276cYJaZ8LxKBsblxHLw51_OK0_OyAGPe8A,490
|
|
41
|
+
esgvoc/cli/status.py,sha256=_6Wx3bGzsoxv_Hv0ENO-NcFJzzzUpLTlsUlQH-E3d-o,465
|
|
42
|
+
esgvoc/cli/valid.py,sha256=C_z1nrAP7mD3Iws0esVJsXpbiwjWZBtgXmikJruRRtY,7057
|
|
43
|
+
esgvoc/core/constants.py,sha256=b9GoM3pTICki95gMCnUZbg4_mMiywKhJX5ME01pgwMs,431
|
|
44
|
+
esgvoc/core/convert.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
+
esgvoc/core/data_handler.py,sha256=BmcD_dSvX5fCkUEGAQnadPTeuKA7IvgMmQtesMXKh5g,5004
|
|
46
|
+
esgvoc/core/logging.conf,sha256=GK52lsTu17CfC2tKiMAIvkuHkIt5tqPmkWv68COOilc,278
|
|
47
|
+
esgvoc/core/logging_handler.py,sha256=NhqKX2nFYVi01EhZPysFZ4UWBWJwgxafC_pWtd28YlU,115
|
|
48
|
+
esgvoc/core/repo_fetcher.py,sha256=m0Qh2kBAa-ij2ZJ7uEYYW-CakoLs5deFHl7zniLgW5Y,9738
|
|
49
|
+
esgvoc/core/db/__init__.py,sha256=gjAT_lUJtPqxsc8WnwbG_sxDqSIFAlG8vEKxdGyH82w,150
|
|
50
|
+
esgvoc/core/db/connection.py,sha256=dhnCy1mwytUUvt149WHk7eYW0KSP64IaF3kMvKOQSJE,877
|
|
51
|
+
esgvoc/core/db/project_ingestion.py,sha256=Sx63UhHjZ_2b5hkObDci2X7VB9f3DfU7-uKtqgl12Rk,6114
|
|
52
|
+
esgvoc/core/db/universe_ingestion.py,sha256=weF8f4AUF20NHiDoUe8paQmxOpuNUe8QSTslEtOfKGA,5148
|
|
53
|
+
esgvoc/core/db/models/mixins.py,sha256=M4gP42fydNcrZO9Z9wfKVU_1tK__fc2NosWumpU9lc0,291
|
|
54
|
+
esgvoc/core/db/models/project.py,sha256=KHnVsaFbA67e28doJ4TFCDkW04x4Es-uv3fDL3WDup4,2349
|
|
55
|
+
esgvoc/core/db/models/universe.py,sha256=32oWFvzqqFr86M14phbfvfmEzlldi3DSTAALjTigR_8,2199
|
|
56
|
+
esgvoc/core/service/__init__.py,sha256=ByAE1LhD5cKIcwrSIH0Y___KP2Ewh55P57sAMQ4CHeM,308
|
|
57
|
+
esgvoc/core/service/data_merger.py,sha256=GNFp5DTV2jlBVJZNpILngi6jCbUvVGcqka4EMWKj_Os,3456
|
|
58
|
+
esgvoc/core/service/esg_voc.py,sha256=5G0P4_xmQzoI_RG_agpq-yHoYYZx220P27v2nPrpyNs,2420
|
|
59
|
+
esgvoc/core/service/settings.py,sha256=oNA4BKgZopxLA51R2PlRJ9rpqHgOqhGWLx-pffVUOlk,2160
|
|
60
|
+
esgvoc/core/service/settings.toml,sha256=XzcnD3l4xk5UXNxgod7uSbQ_iYFn2CjyLvVPlQ88Edw,383
|
|
61
|
+
esgvoc/core/service/settings_default.toml,sha256=lxHkuLn3T65SxWh5owxqREMQ88LwIgoQ85-5HaDVuRo,529
|
|
62
|
+
esgvoc/core/service/state.py,sha256=RPzgGTPyhqizbDKSEHbPGmNRss7iSi_bZnDafJokrEI,9203
|
|
63
|
+
esgvoc-0.1.2.dist-info/METADATA,sha256=GGTJeCE71fd-SDGePr6LkmRFkz2_0WfHpqMFfeMQ6-k,1456
|
|
64
|
+
esgvoc-0.1.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
65
|
+
esgvoc-0.1.2.dist-info/entry_points.txt,sha256=ZXufSC7Jlx1lb52U6Buv9IitJMcqAAXOerR2V9DaIto,48
|
|
66
|
+
esgvoc-0.1.2.dist-info/RECORD,,
|