esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +117 -2
- esgvoc/api/projects.py +242 -279
- esgvoc/api/search.py +30 -3
- esgvoc/api/universe.py +42 -27
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +43 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/db/models/project.py +11 -11
- esgvoc/core/db/models/universe.py +3 -3
- esgvoc/core/db/project_ingestion.py +40 -40
- esgvoc/core/db/universe_ingestion.py +36 -33
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.4.0.dist-info/RECORD +0 -80
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -30,17 +30,18 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
|
|
|
30
30
|
try:
|
|
31
31
|
connection = db.DBConnection(universe_db_file_path)
|
|
32
32
|
except Exception as e:
|
|
33
|
-
msg = f
|
|
33
|
+
msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
|
|
34
34
|
_LOGGER.fatal(msg)
|
|
35
35
|
raise IOError(msg) from e
|
|
36
36
|
|
|
37
37
|
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
38
|
-
if
|
|
39
|
-
|
|
38
|
+
if (
|
|
39
|
+
data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
|
|
40
|
+
): # TODO may be put that in setting
|
|
40
41
|
try:
|
|
41
42
|
ingest_data_descriptor(data_descriptor_dir_path, connection)
|
|
42
43
|
except Exception as e:
|
|
43
|
-
msg = f
|
|
44
|
+
msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
|
|
44
45
|
_LOGGER.fatal(msg)
|
|
45
46
|
raise EsgvocDbError(msg) from e
|
|
46
47
|
|
|
@@ -48,20 +49,24 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
|
|
|
48
49
|
# Well, the following instructions are not data duplication. It is more building an index.
|
|
49
50
|
# Read: https://sqlite.org/fts5.html
|
|
50
51
|
try:
|
|
51
|
-
sql_query =
|
|
52
|
-
|
|
52
|
+
sql_query = (
|
|
53
|
+
"INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
|
|
54
|
+
+ "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
|
|
55
|
+
) # noqa: S608
|
|
53
56
|
session.exec(text(sql_query)) # type: ignore
|
|
54
57
|
except Exception as e:
|
|
55
|
-
msg = f
|
|
58
|
+
msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
|
|
56
59
|
_LOGGER.fatal(msg)
|
|
57
60
|
raise EsgvocDbError(msg) from e
|
|
58
61
|
session.commit()
|
|
59
62
|
try:
|
|
60
|
-
sql_query =
|
|
61
|
-
|
|
63
|
+
sql_query = (
|
|
64
|
+
"INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
|
|
65
|
+
+ "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
|
|
66
|
+
) # noqa: S608
|
|
62
67
|
session.exec(text(sql_query)) # type: ignore
|
|
63
68
|
except Exception as e:
|
|
64
|
-
msg = f
|
|
69
|
+
msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
|
|
65
70
|
_LOGGER.fatal(msg)
|
|
66
71
|
raise EsgvocDbError(msg) from e
|
|
67
72
|
session.commit()
|
|
@@ -74,24 +79,21 @@ def ingest_metadata_universe(connection, git_hash):
|
|
|
74
79
|
session.commit()
|
|
75
80
|
|
|
76
81
|
|
|
77
|
-
def ingest_data_descriptor(data_descriptor_path: Path,
|
|
78
|
-
connection: db.DBConnection) -> None:
|
|
82
|
+
def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
|
|
79
83
|
data_descriptor_id = data_descriptor_path.name
|
|
80
84
|
|
|
81
85
|
context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
82
86
|
try:
|
|
83
87
|
context = read_json_file(context_file_path)
|
|
84
88
|
except Exception as e:
|
|
85
|
-
msg = f
|
|
86
|
-
{data_descriptor_id}. Skip.\n{str(e)}
|
|
89
|
+
msg = f"Unable to read the context file {context_file_path} of data descriptor \
|
|
90
|
+
{data_descriptor_id}. Skip.\n{str(e)}"
|
|
87
91
|
_LOGGER.warning(msg)
|
|
88
92
|
return
|
|
89
93
|
|
|
90
94
|
with connection.create_session() as session:
|
|
91
95
|
# We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
|
|
92
|
-
data_descriptor = UDataDescriptor(id=data_descriptor_id,
|
|
93
|
-
context=context,
|
|
94
|
-
term_kind="")
|
|
96
|
+
data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
|
|
95
97
|
term_kind_dd = None
|
|
96
98
|
|
|
97
99
|
_LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
|
|
@@ -99,28 +101,34 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
99
101
|
_LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
|
|
100
102
|
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
101
103
|
try:
|
|
102
|
-
locally_available = {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
104
|
+
locally_available = {
|
|
105
|
+
"https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
json_specs = DataMerger(
|
|
109
|
+
data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
|
|
110
|
+
).merge_linked_json()[-1]
|
|
111
|
+
|
|
106
112
|
term_kind = infer_term_kind(json_specs)
|
|
107
113
|
term_id = json_specs["id"]
|
|
108
114
|
|
|
109
115
|
if term_kind_dd is None:
|
|
110
116
|
term_kind_dd = term_kind
|
|
111
|
-
|
|
112
117
|
except Exception as e:
|
|
113
|
-
_LOGGER.warning(
|
|
114
|
-
|
|
118
|
+
_LOGGER.warning(
|
|
119
|
+
f"Unable to read term {term_file_path} for data descriptor "
|
|
120
|
+
+ f"{data_descriptor_path}. Skip.\n{str(e)}"
|
|
121
|
+
)
|
|
115
122
|
continue
|
|
116
123
|
if term_id and json_specs and data_descriptor and term_kind:
|
|
117
|
-
_LOGGER.debug("adding {term_id}")
|
|
124
|
+
_LOGGER.debug(f"adding {term_id}")
|
|
118
125
|
term = UTerm(
|
|
119
126
|
id=term_id,
|
|
120
127
|
specs=json_specs,
|
|
121
128
|
data_descriptor=data_descriptor,
|
|
122
129
|
kind=term_kind,
|
|
123
130
|
)
|
|
131
|
+
|
|
124
132
|
session.add(term)
|
|
125
133
|
if term_kind_dd is not None:
|
|
126
134
|
data_descriptor.term_kind = term_kind_dd
|
|
@@ -128,14 +136,8 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
128
136
|
session.commit()
|
|
129
137
|
|
|
130
138
|
|
|
131
|
-
def get_universe_term(data_descriptor_id: str,
|
|
132
|
-
|
|
133
|
-
universe_db_session: Session) -> tuple[TermKind, dict]:
|
|
134
|
-
statement = (
|
|
135
|
-
select(UTerm)
|
|
136
|
-
.join(UDataDescriptor)
|
|
137
|
-
.where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
138
|
-
)
|
|
139
|
+
def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
|
|
140
|
+
statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
139
141
|
results = universe_db_session.exec(statement)
|
|
140
142
|
term = results.one()
|
|
141
143
|
return term.kind, term.specs
|
|
@@ -143,6 +145,7 @@ def get_universe_term(data_descriptor_id: str,
|
|
|
143
145
|
|
|
144
146
|
if __name__ == "__main__":
|
|
145
147
|
import os
|
|
148
|
+
|
|
146
149
|
root_dir = Path(str(os.getcwd())).parent.parent
|
|
147
150
|
print(root_dir)
|
|
148
151
|
universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
|
esgvoc/core/logging_handler.py
CHANGED
|
@@ -1,4 +1,26 @@
|
|
|
1
1
|
import logging.config
|
|
2
|
-
from pathlib import Path
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
LOGGING_CONFIG = {
|
|
4
|
+
'version': 1,
|
|
5
|
+
'disable_existing_loggers': False,
|
|
6
|
+
'formatters': {
|
|
7
|
+
'esgvoc_formatter': {
|
|
8
|
+
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
|
9
|
+
},
|
|
10
|
+
},
|
|
11
|
+
'handlers': {
|
|
12
|
+
'esgvoc_stdout': {
|
|
13
|
+
'class': 'logging.StreamHandler',
|
|
14
|
+
'formatter': 'esgvoc_formatter',
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
'loggers': {
|
|
18
|
+
'esgvoc': {
|
|
19
|
+
'handlers': ['esgvoc_stdout'],
|
|
20
|
+
'level': 'ERROR',
|
|
21
|
+
'propagate': False,
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
logging.config.dictConfig(LOGGING_CONFIG)
|
esgvoc/core/repo_fetcher.py
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
3
9
|
import requests
|
|
4
10
|
from pydantic import BaseModel, ValidationError
|
|
5
|
-
from typing import List, Optional
|
|
6
|
-
from contextlib import contextmanager
|
|
7
|
-
import logging
|
|
8
|
-
import sys
|
|
9
11
|
|
|
10
12
|
_LOGGER = logging.getLogger(__name__)
|
|
11
13
|
|
|
14
|
+
|
|
12
15
|
@contextmanager
|
|
13
16
|
def redirect_stdout_to_log(level=logging.INFO):
|
|
14
17
|
"""
|
|
15
18
|
Redirect stdout to the global _LOGGER temporarily.
|
|
16
19
|
"""
|
|
20
|
+
|
|
17
21
|
class StreamToLogger:
|
|
18
22
|
def __init__(self, log_level):
|
|
19
23
|
self.log_level = log_level
|
|
@@ -48,6 +52,7 @@ class GitHubRepository(BaseModel):
|
|
|
48
52
|
created_at: str
|
|
49
53
|
updated_at: str
|
|
50
54
|
|
|
55
|
+
|
|
51
56
|
class GitHubBranch(BaseModel):
|
|
52
57
|
name: str
|
|
53
58
|
commit: dict
|
|
@@ -59,7 +64,7 @@ class RepoFetcher:
|
|
|
59
64
|
DataFetcher is responsible for fetching data from external sources such as GitHub.
|
|
60
65
|
"""
|
|
61
66
|
|
|
62
|
-
def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
|
|
67
|
+
def __init__(self, base_url: str = "https://api.github.com", local_path: str = ".cache/repos"):
|
|
63
68
|
self.base_url = base_url
|
|
64
69
|
self.repo_dir = local_path
|
|
65
70
|
|
|
@@ -100,7 +105,6 @@ class RepoFetcher:
|
|
|
100
105
|
except ValidationError as e:
|
|
101
106
|
raise Exception(f"Data validation error: {e}")
|
|
102
107
|
|
|
103
|
-
|
|
104
108
|
def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
|
|
105
109
|
"""
|
|
106
110
|
Fetch details of a specific branch in a repository.
|
|
@@ -120,7 +124,7 @@ class RepoFetcher:
|
|
|
120
124
|
except ValidationError as e:
|
|
121
125
|
raise Exception(f"Data validation error: {e}")
|
|
122
126
|
|
|
123
|
-
def list_directory(self,owner, repo, branch=
|
|
127
|
+
def list_directory(self, owner, repo, branch="main"):
|
|
124
128
|
"""
|
|
125
129
|
List directories in the root of a GitHub repository.
|
|
126
130
|
|
|
@@ -133,10 +137,10 @@ class RepoFetcher:
|
|
|
133
137
|
response = requests.get(url)
|
|
134
138
|
response.raise_for_status() # Raise an error for bad responses
|
|
135
139
|
contents = response.json()
|
|
136
|
-
directories = [item[
|
|
140
|
+
directories = [item["name"] for item in contents if item["type"] == "dir"]
|
|
137
141
|
return directories
|
|
138
142
|
|
|
139
|
-
def list_files(self,owner, repo, directory, branch=
|
|
143
|
+
def list_files(self, owner, repo, directory, branch="main"):
|
|
140
144
|
"""
|
|
141
145
|
List files in a specific directory of a GitHub repository.
|
|
142
146
|
|
|
@@ -150,10 +154,10 @@ class RepoFetcher:
|
|
|
150
154
|
response = requests.get(url)
|
|
151
155
|
response.raise_for_status() # Raise an error for bad responses
|
|
152
156
|
contents = response.json()
|
|
153
|
-
files = [item[
|
|
157
|
+
files = [item["name"] for item in contents if item["type"] == "file"]
|
|
154
158
|
return files
|
|
155
|
-
|
|
156
|
-
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
|
|
159
|
+
|
|
160
|
+
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str | None = None):
|
|
157
161
|
"""
|
|
158
162
|
Clone a GitHub repository to a target directory.
|
|
159
163
|
:param owner: Repository owner
|
|
@@ -162,52 +166,47 @@ class RepoFetcher:
|
|
|
162
166
|
:param branch: (Optional) The branch to clone. Clones the default branch if None.
|
|
163
167
|
"""
|
|
164
168
|
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
165
|
-
destination = local_path if local_path else f"{self.repo_dir}/{repo}"
|
|
169
|
+
destination = local_path if local_path else f"{self.repo_dir}/{repo}"
|
|
166
170
|
|
|
167
171
|
command = ["git", "clone", repo_url, destination]
|
|
168
172
|
if branch:
|
|
169
173
|
command.extend(["--branch", branch])
|
|
170
174
|
with redirect_stdout_to_log():
|
|
171
|
-
|
|
172
175
|
try:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
176
|
+
if not Path(destination).exists():
|
|
177
|
+
subprocess.run(command, check=True)
|
|
178
|
+
_LOGGER.debug(f"Repository cloned successfully into {destination}")
|
|
179
|
+
else:
|
|
177
180
|
current_work_dir = os.getcwd()
|
|
178
|
-
os.chdir(f"{
|
|
181
|
+
os.chdir(f"{destination}")
|
|
179
182
|
command = ["git", "pull"]
|
|
180
183
|
subprocess.run(command, check=True)
|
|
181
184
|
os.chdir(current_work_dir)
|
|
182
185
|
|
|
186
|
+
except Exception as e:
|
|
187
|
+
raise Exception(f"Failed to clone repository: {e}")
|
|
183
188
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
""" Fetch the latest commit version (or any other versioning scheme) from GitHub. """
|
|
189
|
-
details = self.fetch_branch_details( owner, repo, branch)
|
|
190
|
-
return details.commit.get('sha')
|
|
189
|
+
def get_github_version_with_api(self, owner: str, repo: str, branch: str = "main"):
|
|
190
|
+
"""Fetch the latest commit version (or any other versioning scheme) from GitHub."""
|
|
191
|
+
details = self.fetch_branch_details(owner, repo, branch)
|
|
192
|
+
return details.commit.get("sha")
|
|
191
193
|
|
|
192
|
-
def get_github_version(self, owner: str, repo: str, branch: str="main"):
|
|
193
|
-
"""
|
|
194
|
+
def get_github_version(self, owner: str, repo: str, branch: str = "main"):
|
|
195
|
+
"""Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch"""
|
|
194
196
|
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
195
197
|
command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
|
|
196
198
|
if branch:
|
|
197
199
|
command.extend([branch])
|
|
198
200
|
|
|
199
201
|
# with redirect_stdout_to_log():
|
|
200
|
-
output=None
|
|
202
|
+
output = None
|
|
201
203
|
try:
|
|
202
|
-
result = subprocess.run(command,
|
|
203
|
-
text=True,
|
|
204
|
-
check=True)
|
|
204
|
+
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
|
205
205
|
# Parse the output to get the commit hash
|
|
206
206
|
output = result.stdout.strip()
|
|
207
207
|
_LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
|
|
208
208
|
except Exception as e:
|
|
209
|
-
|
|
210
|
-
_LOGGER.debug("error in with git fetch " + repr(e))
|
|
209
|
+
_LOGGER.debug("error in with git fetch " + repr(e))
|
|
211
210
|
if output is not None:
|
|
212
211
|
commit_hash = output.split()[0]
|
|
213
212
|
return commit_hash
|
|
@@ -216,45 +215,48 @@ class RepoFetcher:
|
|
|
216
215
|
# return git_hash
|
|
217
216
|
|
|
218
217
|
def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
|
|
219
|
-
"""
|
|
218
|
+
"""Check the version of the local repository by fetching the latest commit hash."""
|
|
220
219
|
# repo_path = os.path.join(self.repo_dir, repo)
|
|
221
220
|
if os.path.exists(repo_path):
|
|
222
|
-
#print("EXIST")
|
|
221
|
+
# print("EXIST")
|
|
223
222
|
command = ["git", "-C", repo_path]
|
|
224
223
|
if branch:
|
|
225
224
|
command.extend(["switch", branch])
|
|
226
225
|
# Ensure we are on the correct branch
|
|
227
226
|
with redirect_stdout_to_log():
|
|
228
|
-
subprocess.run(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
227
|
+
subprocess.run(
|
|
228
|
+
command,
|
|
229
|
+
stdout=subprocess.PIPE, # Capture stdout
|
|
230
|
+
stderr=subprocess.PIPE, # Capture stderr
|
|
231
|
+
text=True,
|
|
232
|
+
) # Decode output as text
|
|
232
233
|
# Get the latest commit hash (SHA) from the local repository
|
|
233
|
-
commit_hash = subprocess.check_output(
|
|
234
|
-
|
|
235
|
-
|
|
234
|
+
commit_hash = subprocess.check_output(
|
|
235
|
+
["git", "-C", repo_path, "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
|
|
236
|
+
).strip()
|
|
236
237
|
return commit_hash
|
|
237
238
|
return None
|
|
238
239
|
|
|
240
|
+
|
|
239
241
|
if __name__ == "__main__":
|
|
240
242
|
fetcher = RepoFetcher()
|
|
241
|
-
|
|
243
|
+
|
|
242
244
|
# Fetch repositories for a user
|
|
243
|
-
#repos = fetcher.fetch_repositories("ESPRI-Mod")
|
|
244
|
-
#for repo in repos:
|
|
245
|
+
# repos = fetcher.fetch_repositories("ESPRI-Mod")
|
|
246
|
+
# for repo in repos:
|
|
245
247
|
# print(repo)
|
|
246
248
|
|
|
247
249
|
# Fetch a specific repository's details
|
|
248
|
-
#repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
|
|
249
|
-
#"print(repo_details)
|
|
250
|
-
#branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
251
|
-
#print(branch_details)
|
|
252
|
-
|
|
253
|
-
fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
|
|
254
|
-
|
|
255
|
-
#a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
256
|
-
#print(a)
|
|
257
|
-
#a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
|
|
258
|
-
#print(a)
|
|
259
|
-
|
|
260
|
-
fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
|
|
250
|
+
# repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
|
|
251
|
+
# "print(repo_details)
|
|
252
|
+
# branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
253
|
+
# print(branch_details)
|
|
254
|
+
|
|
255
|
+
fetcher.clone_repository("ESPRI-Mod", "mip-cmor-tables", branch="uni_proj_ld")
|
|
256
|
+
|
|
257
|
+
# a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
258
|
+
# print(a)
|
|
259
|
+
# a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
|
|
260
|
+
# print(a)
|
|
261
|
+
|
|
262
|
+
fetcher.clone_repository("ESPRI-Mod", "CMIP6Plus_CVs", branch="uni_proj_ld")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
2
|
from typing import Dict, List, Set
|
|
3
|
+
|
|
3
4
|
from esgvoc.core.data_handler import JsonLdResource
|
|
4
|
-
import logging
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
@@ -9,16 +9,23 @@ logger = logging.getLogger(__name__)
|
|
|
9
9
|
def merge_dicts(original: list, custom: list) -> dict:
|
|
10
10
|
"""Shallow merge: Overwrites original data with custom data."""
|
|
11
11
|
b = original[0]
|
|
12
|
-
a = custom[0]
|
|
12
|
+
a = custom[0]
|
|
13
13
|
merged = {**{k: v for k, v in a.items() if k != "@id"}, **{k: v for k, v in b.items() if k != "@id"}}
|
|
14
14
|
return merged
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
def merge(uri: str) -> Dict:
|
|
17
18
|
mdm = DataMerger(data=JsonLdResource(uri=uri))
|
|
18
19
|
return mdm.merge_linked_json()[-1]
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
class DataMerger:
|
|
21
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
data: JsonLdResource,
|
|
26
|
+
allowed_base_uris: Set[str] = {"https://espri-mod.github.io/mip-cmor-tables"},
|
|
27
|
+
locally_available: dict = {},
|
|
28
|
+
):
|
|
22
29
|
self.data = data
|
|
23
30
|
self.allowed_base_uris = allowed_base_uris
|
|
24
31
|
self.locally_available = locally_available
|
|
@@ -29,42 +36,49 @@ class DataMerger:
|
|
|
29
36
|
|
|
30
37
|
def _get_next_id(self, data: dict) -> str | None:
|
|
31
38
|
"""Extract the next @id from the data if it is a valid customization reference."""
|
|
32
|
-
if isinstance(data,list):
|
|
39
|
+
if isinstance(data, list):
|
|
33
40
|
data = data[0]
|
|
34
41
|
if "@id" in data and self._should_resolve(data["@id"]):
|
|
35
|
-
return data["@id"] + ".json"
|
|
42
|
+
return data["@id"] + ".json"
|
|
36
43
|
return None
|
|
37
44
|
|
|
38
45
|
def merge_linked_json(self) -> List[Dict]:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
46
|
+
try:
|
|
47
|
+
"""Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
|
|
48
|
+
result_list = [self.data.json_dict] # Start with the original json object
|
|
49
|
+
visited = set(self.data.uri) # Track visited URIs to prevent cycles
|
|
50
|
+
current_data = self.data
|
|
51
|
+
# print(current_data.expanded)
|
|
52
|
+
while True:
|
|
53
|
+
next_id = self._get_next_id(current_data.expanded[0])
|
|
54
|
+
|
|
55
|
+
if not next_id or next_id in visited or not self._should_resolve(next_id):
|
|
56
|
+
break
|
|
57
|
+
|
|
58
|
+
visited.add(next_id)
|
|
59
|
+
|
|
60
|
+
# Fetch and merge the next customization
|
|
61
|
+
# do we have it in local ? if so use it instead of remote
|
|
62
|
+
for local_repo in self.locally_available.keys():
|
|
63
|
+
if next_id.startswith(local_repo):
|
|
64
|
+
next_id = next_id.replace(local_repo, self.locally_available[local_repo])
|
|
65
|
+
|
|
66
|
+
next_data_instance = JsonLdResource(uri=next_id)
|
|
67
|
+
merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
|
|
68
|
+
next_data_instance.json_dict = merged_json_data
|
|
69
|
+
|
|
70
|
+
# Add the merged instance to the result list
|
|
71
|
+
result_list.append(merged_json_data)
|
|
72
|
+
current_data = next_data_instance
|
|
73
|
+
return result_list
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(self.data)
|
|
76
|
+
print(e)
|
|
77
|
+
|
|
65
78
|
|
|
66
79
|
if __name__ == "__main__":
|
|
67
80
|
import warnings
|
|
81
|
+
|
|
68
82
|
warnings.simplefilter("ignore")
|
|
69
83
|
|
|
70
84
|
# test from institution_id ipsl exapnd and merge with institution ipsl
|
|
@@ -80,4 +94,3 @@ if __name__ == "__main__":
|
|
|
80
94
|
# print(mdm.merge_linked_json())
|
|
81
95
|
#
|
|
82
96
|
#
|
|
83
|
-
|