esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +120 -4
- esgvoc/api/projects.py +733 -505
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +168 -98
- esgvoc/api/universe.py +368 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +46 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +57 -15
- esgvoc/core/db/models/universe.py +49 -10
- esgvoc/core/db/project_ingestion.py +79 -65
- esgvoc/core/db/universe_ingestion.py +71 -40
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/api/_utils.py +0 -53
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.3.0.dist-info/RECORD +0 -78
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/core/repo_fetcher.py
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
3
9
|
import requests
|
|
4
10
|
from pydantic import BaseModel, ValidationError
|
|
5
|
-
from typing import List, Optional
|
|
6
|
-
from contextlib import contextmanager
|
|
7
|
-
import logging
|
|
8
|
-
import sys
|
|
9
11
|
|
|
10
12
|
_LOGGER = logging.getLogger(__name__)
|
|
11
13
|
|
|
14
|
+
|
|
12
15
|
@contextmanager
|
|
13
16
|
def redirect_stdout_to_log(level=logging.INFO):
|
|
14
17
|
"""
|
|
15
18
|
Redirect stdout to the global _LOGGER temporarily.
|
|
16
19
|
"""
|
|
20
|
+
|
|
17
21
|
class StreamToLogger:
|
|
18
22
|
def __init__(self, log_level):
|
|
19
23
|
self.log_level = log_level
|
|
@@ -48,6 +52,7 @@ class GitHubRepository(BaseModel):
|
|
|
48
52
|
created_at: str
|
|
49
53
|
updated_at: str
|
|
50
54
|
|
|
55
|
+
|
|
51
56
|
class GitHubBranch(BaseModel):
|
|
52
57
|
name: str
|
|
53
58
|
commit: dict
|
|
@@ -59,7 +64,7 @@ class RepoFetcher:
|
|
|
59
64
|
DataFetcher is responsible for fetching data from external sources such as GitHub.
|
|
60
65
|
"""
|
|
61
66
|
|
|
62
|
-
def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
|
|
67
|
+
def __init__(self, base_url: str = "https://api.github.com", local_path: str = ".cache/repos"):
|
|
63
68
|
self.base_url = base_url
|
|
64
69
|
self.repo_dir = local_path
|
|
65
70
|
|
|
@@ -100,7 +105,6 @@ class RepoFetcher:
|
|
|
100
105
|
except ValidationError as e:
|
|
101
106
|
raise Exception(f"Data validation error: {e}")
|
|
102
107
|
|
|
103
|
-
|
|
104
108
|
def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
|
|
105
109
|
"""
|
|
106
110
|
Fetch details of a specific branch in a repository.
|
|
@@ -120,7 +124,7 @@ class RepoFetcher:
|
|
|
120
124
|
except ValidationError as e:
|
|
121
125
|
raise Exception(f"Data validation error: {e}")
|
|
122
126
|
|
|
123
|
-
def list_directory(self,owner, repo, branch=
|
|
127
|
+
def list_directory(self, owner, repo, branch="main"):
|
|
124
128
|
"""
|
|
125
129
|
List directories in the root of a GitHub repository.
|
|
126
130
|
|
|
@@ -133,10 +137,10 @@ class RepoFetcher:
|
|
|
133
137
|
response = requests.get(url)
|
|
134
138
|
response.raise_for_status() # Raise an error for bad responses
|
|
135
139
|
contents = response.json()
|
|
136
|
-
directories = [item[
|
|
140
|
+
directories = [item["name"] for item in contents if item["type"] == "dir"]
|
|
137
141
|
return directories
|
|
138
142
|
|
|
139
|
-
def list_files(self,owner, repo, directory, branch=
|
|
143
|
+
def list_files(self, owner, repo, directory, branch="main"):
|
|
140
144
|
"""
|
|
141
145
|
List files in a specific directory of a GitHub repository.
|
|
142
146
|
|
|
@@ -150,10 +154,10 @@ class RepoFetcher:
|
|
|
150
154
|
response = requests.get(url)
|
|
151
155
|
response.raise_for_status() # Raise an error for bad responses
|
|
152
156
|
contents = response.json()
|
|
153
|
-
files = [item[
|
|
157
|
+
files = [item["name"] for item in contents if item["type"] == "file"]
|
|
154
158
|
return files
|
|
155
|
-
|
|
156
|
-
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
|
|
159
|
+
|
|
160
|
+
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str | None = None):
|
|
157
161
|
"""
|
|
158
162
|
Clone a GitHub repository to a target directory.
|
|
159
163
|
:param owner: Repository owner
|
|
@@ -162,52 +166,47 @@ class RepoFetcher:
|
|
|
162
166
|
:param branch: (Optional) The branch to clone. Clones the default branch if None.
|
|
163
167
|
"""
|
|
164
168
|
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
165
|
-
destination = local_path if local_path else f"{self.repo_dir}/{repo}"
|
|
169
|
+
destination = local_path if local_path else f"{self.repo_dir}/{repo}"
|
|
166
170
|
|
|
167
171
|
command = ["git", "clone", repo_url, destination]
|
|
168
172
|
if branch:
|
|
169
173
|
command.extend(["--branch", branch])
|
|
170
174
|
with redirect_stdout_to_log():
|
|
171
|
-
|
|
172
175
|
try:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
176
|
+
if not Path(destination).exists():
|
|
177
|
+
subprocess.run(command, check=True)
|
|
178
|
+
_LOGGER.debug(f"Repository cloned successfully into {destination}")
|
|
179
|
+
else:
|
|
177
180
|
current_work_dir = os.getcwd()
|
|
178
|
-
os.chdir(f"{
|
|
181
|
+
os.chdir(f"{destination}")
|
|
179
182
|
command = ["git", "pull"]
|
|
180
183
|
subprocess.run(command, check=True)
|
|
181
184
|
os.chdir(current_work_dir)
|
|
182
185
|
|
|
186
|
+
except Exception as e:
|
|
187
|
+
raise Exception(f"Failed to clone repository: {e}")
|
|
183
188
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
""" Fetch the latest commit version (or any other versioning scheme) from GitHub. """
|
|
189
|
-
details = self.fetch_branch_details( owner, repo, branch)
|
|
190
|
-
return details.commit.get('sha')
|
|
189
|
+
def get_github_version_with_api(self, owner: str, repo: str, branch: str = "main"):
|
|
190
|
+
"""Fetch the latest commit version (or any other versioning scheme) from GitHub."""
|
|
191
|
+
details = self.fetch_branch_details(owner, repo, branch)
|
|
192
|
+
return details.commit.get("sha")
|
|
191
193
|
|
|
192
|
-
def get_github_version(self, owner: str, repo: str, branch: str="main"):
|
|
193
|
-
"""
|
|
194
|
+
def get_github_version(self, owner: str, repo: str, branch: str = "main"):
|
|
195
|
+
"""Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch"""
|
|
194
196
|
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
195
197
|
command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
|
|
196
198
|
if branch:
|
|
197
199
|
command.extend([branch])
|
|
198
200
|
|
|
199
201
|
# with redirect_stdout_to_log():
|
|
200
|
-
output=None
|
|
202
|
+
output = None
|
|
201
203
|
try:
|
|
202
|
-
result = subprocess.run(command,
|
|
203
|
-
text=True,
|
|
204
|
-
check=True)
|
|
204
|
+
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
|
205
205
|
# Parse the output to get the commit hash
|
|
206
206
|
output = result.stdout.strip()
|
|
207
207
|
_LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
|
|
208
208
|
except Exception as e:
|
|
209
|
-
|
|
210
|
-
_LOGGER.debug("error in with git fetch " + repr(e))
|
|
209
|
+
_LOGGER.debug("error in with git fetch " + repr(e))
|
|
211
210
|
if output is not None:
|
|
212
211
|
commit_hash = output.split()[0]
|
|
213
212
|
return commit_hash
|
|
@@ -216,45 +215,48 @@ class RepoFetcher:
|
|
|
216
215
|
# return git_hash
|
|
217
216
|
|
|
218
217
|
def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
|
|
219
|
-
"""
|
|
218
|
+
"""Check the version of the local repository by fetching the latest commit hash."""
|
|
220
219
|
# repo_path = os.path.join(self.repo_dir, repo)
|
|
221
220
|
if os.path.exists(repo_path):
|
|
222
|
-
#print("EXIST")
|
|
221
|
+
# print("EXIST")
|
|
223
222
|
command = ["git", "-C", repo_path]
|
|
224
223
|
if branch:
|
|
225
224
|
command.extend(["switch", branch])
|
|
226
225
|
# Ensure we are on the correct branch
|
|
227
226
|
with redirect_stdout_to_log():
|
|
228
|
-
subprocess.run(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
227
|
+
subprocess.run(
|
|
228
|
+
command,
|
|
229
|
+
stdout=subprocess.PIPE, # Capture stdout
|
|
230
|
+
stderr=subprocess.PIPE, # Capture stderr
|
|
231
|
+
text=True,
|
|
232
|
+
) # Decode output as text
|
|
232
233
|
# Get the latest commit hash (SHA) from the local repository
|
|
233
|
-
commit_hash = subprocess.check_output(
|
|
234
|
-
|
|
235
|
-
|
|
234
|
+
commit_hash = subprocess.check_output(
|
|
235
|
+
["git", "-C", repo_path, "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
|
|
236
|
+
).strip()
|
|
236
237
|
return commit_hash
|
|
237
238
|
return None
|
|
238
239
|
|
|
240
|
+
|
|
239
241
|
if __name__ == "__main__":
|
|
240
242
|
fetcher = RepoFetcher()
|
|
241
|
-
|
|
243
|
+
|
|
242
244
|
# Fetch repositories for a user
|
|
243
|
-
#repos = fetcher.fetch_repositories("ESPRI-Mod")
|
|
244
|
-
#for repo in repos:
|
|
245
|
+
# repos = fetcher.fetch_repositories("ESPRI-Mod")
|
|
246
|
+
# for repo in repos:
|
|
245
247
|
# print(repo)
|
|
246
248
|
|
|
247
249
|
# Fetch a specific repository's details
|
|
248
|
-
#repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
|
|
249
|
-
#"print(repo_details)
|
|
250
|
-
#branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
251
|
-
#print(branch_details)
|
|
252
|
-
|
|
253
|
-
fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
|
|
254
|
-
|
|
255
|
-
#a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
256
|
-
#print(a)
|
|
257
|
-
#a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
|
|
258
|
-
#print(a)
|
|
259
|
-
|
|
260
|
-
fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
|
|
250
|
+
# repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
|
|
251
|
+
# "print(repo_details)
|
|
252
|
+
# branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
253
|
+
# print(branch_details)
|
|
254
|
+
|
|
255
|
+
fetcher.clone_repository("ESPRI-Mod", "mip-cmor-tables", branch="uni_proj_ld")
|
|
256
|
+
|
|
257
|
+
# a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
258
|
+
# print(a)
|
|
259
|
+
# a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
|
|
260
|
+
# print(a)
|
|
261
|
+
|
|
262
|
+
fetcher.clone_repository("ESPRI-Mod", "CMIP6Plus_CVs", branch="uni_proj_ld")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
2
|
from typing import Dict, List, Set
|
|
3
|
+
|
|
3
4
|
from esgvoc.core.data_handler import JsonLdResource
|
|
4
|
-
import logging
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
@@ -9,16 +9,23 @@ logger = logging.getLogger(__name__)
|
|
|
9
9
|
def merge_dicts(original: list, custom: list) -> dict:
|
|
10
10
|
"""Shallow merge: Overwrites original data with custom data."""
|
|
11
11
|
b = original[0]
|
|
12
|
-
a = custom[0]
|
|
12
|
+
a = custom[0]
|
|
13
13
|
merged = {**{k: v for k, v in a.items() if k != "@id"}, **{k: v for k, v in b.items() if k != "@id"}}
|
|
14
14
|
return merged
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
def merge(uri: str) -> Dict:
|
|
17
18
|
mdm = DataMerger(data=JsonLdResource(uri=uri))
|
|
18
19
|
return mdm.merge_linked_json()[-1]
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
class DataMerger:
|
|
21
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
data: JsonLdResource,
|
|
26
|
+
allowed_base_uris: Set[str] = {"https://espri-mod.github.io/mip-cmor-tables"},
|
|
27
|
+
locally_available: dict = {},
|
|
28
|
+
):
|
|
22
29
|
self.data = data
|
|
23
30
|
self.allowed_base_uris = allowed_base_uris
|
|
24
31
|
self.locally_available = locally_available
|
|
@@ -29,42 +36,49 @@ class DataMerger:
|
|
|
29
36
|
|
|
30
37
|
def _get_next_id(self, data: dict) -> str | None:
|
|
31
38
|
"""Extract the next @id from the data if it is a valid customization reference."""
|
|
32
|
-
if isinstance(data,list):
|
|
39
|
+
if isinstance(data, list):
|
|
33
40
|
data = data[0]
|
|
34
41
|
if "@id" in data and self._should_resolve(data["@id"]):
|
|
35
|
-
return data["@id"] + ".json"
|
|
42
|
+
return data["@id"] + ".json"
|
|
36
43
|
return None
|
|
37
44
|
|
|
38
45
|
def merge_linked_json(self) -> List[Dict]:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
46
|
+
try:
|
|
47
|
+
"""Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
|
|
48
|
+
result_list = [self.data.json_dict] # Start with the original json object
|
|
49
|
+
visited = set(self.data.uri) # Track visited URIs to prevent cycles
|
|
50
|
+
current_data = self.data
|
|
51
|
+
# print(current_data.expanded)
|
|
52
|
+
while True:
|
|
53
|
+
next_id = self._get_next_id(current_data.expanded[0])
|
|
54
|
+
|
|
55
|
+
if not next_id or next_id in visited or not self._should_resolve(next_id):
|
|
56
|
+
break
|
|
57
|
+
|
|
58
|
+
visited.add(next_id)
|
|
59
|
+
|
|
60
|
+
# Fetch and merge the next customization
|
|
61
|
+
# do we have it in local ? if so use it instead of remote
|
|
62
|
+
for local_repo in self.locally_available.keys():
|
|
63
|
+
if next_id.startswith(local_repo):
|
|
64
|
+
next_id = next_id.replace(local_repo, self.locally_available[local_repo])
|
|
65
|
+
|
|
66
|
+
next_data_instance = JsonLdResource(uri=next_id)
|
|
67
|
+
merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
|
|
68
|
+
next_data_instance.json_dict = merged_json_data
|
|
69
|
+
|
|
70
|
+
# Add the merged instance to the result list
|
|
71
|
+
result_list.append(merged_json_data)
|
|
72
|
+
current_data = next_data_instance
|
|
73
|
+
return result_list
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(self.data)
|
|
76
|
+
print(e)
|
|
77
|
+
|
|
65
78
|
|
|
66
79
|
if __name__ == "__main__":
|
|
67
80
|
import warnings
|
|
81
|
+
|
|
68
82
|
warnings.simplefilter("ignore")
|
|
69
83
|
|
|
70
84
|
# test from institution_id ipsl exapnd and merge with institution ipsl
|
|
@@ -80,4 +94,3 @@ if __name__ == "__main__":
|
|
|
80
94
|
# print(mdm.merge_linked_json())
|
|
81
95
|
#
|
|
82
96
|
#
|
|
83
|
-
|