esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (73) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +50 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/mip_era.py +1 -1
  26. esgvoc/api/data_descriptors/model_component.py +1 -1
  27. esgvoc/api/data_descriptors/obs_type.py +5 -0
  28. esgvoc/api/data_descriptors/organisation.py +1 -1
  29. esgvoc/api/data_descriptors/physic_index.py +1 -1
  30. esgvoc/api/data_descriptors/product.py +2 -2
  31. esgvoc/api/data_descriptors/publication_status.py +5 -0
  32. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  33. esgvoc/api/data_descriptors/realm.py +1 -1
  34. esgvoc/api/data_descriptors/region.py +5 -0
  35. esgvoc/api/data_descriptors/resolution.py +3 -3
  36. esgvoc/api/data_descriptors/source.py +9 -5
  37. esgvoc/api/data_descriptors/source_type.py +1 -1
  38. esgvoc/api/data_descriptors/table.py +3 -2
  39. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  40. esgvoc/api/data_descriptors/time_range.py +4 -3
  41. esgvoc/api/data_descriptors/title.py +5 -0
  42. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  43. esgvoc/api/data_descriptors/variable.py +25 -12
  44. esgvoc/api/data_descriptors/variant_label.py +3 -3
  45. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  46. esgvoc/api/project_specs.py +117 -2
  47. esgvoc/api/projects.py +242 -279
  48. esgvoc/api/search.py +30 -3
  49. esgvoc/api/universe.py +42 -27
  50. esgvoc/apps/jsg/cmip6_template.json +74 -0
  51. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  52. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  53. esgvoc/cli/config.py +500 -0
  54. esgvoc/cli/find.py +138 -0
  55. esgvoc/cli/get.py +43 -38
  56. esgvoc/cli/main.py +10 -3
  57. esgvoc/cli/status.py +27 -18
  58. esgvoc/cli/valid.py +10 -15
  59. esgvoc/core/db/models/project.py +11 -11
  60. esgvoc/core/db/models/universe.py +3 -3
  61. esgvoc/core/db/project_ingestion.py +40 -40
  62. esgvoc/core/db/universe_ingestion.py +36 -33
  63. esgvoc/core/logging_handler.py +24 -2
  64. esgvoc/core/repo_fetcher.py +61 -59
  65. esgvoc/core/service/data_merger.py +47 -34
  66. esgvoc/core/service/state.py +107 -83
  67. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  68. esgvoc-1.0.0.dist-info/RECORD +95 -0
  69. esgvoc/core/logging.conf +0 -21
  70. esgvoc-0.4.0.dist-info/RECORD +0 -80
  71. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -30,17 +30,18 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
30
30
  try:
31
31
  connection = db.DBConnection(universe_db_file_path)
32
32
  except Exception as e:
33
- msg = f'Unable to read universe SQLite file at {universe_db_file_path}. Abort.'
33
+ msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
34
34
  _LOGGER.fatal(msg)
35
35
  raise IOError(msg) from e
36
36
 
37
37
  for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
38
- if data_descriptor_dir_path.is_dir() and \
39
- (data_descriptor_dir_path / "000_context.jsonld").exists(): # TODO may be put that in setting
38
+ if (
39
+ data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
40
+ ): # TODO may be put that in setting
40
41
  try:
41
42
  ingest_data_descriptor(data_descriptor_dir_path, connection)
42
43
  except Exception as e:
43
- msg = f'unexpected error while processing data descriptor {data_descriptor_dir_path}'
44
+ msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
44
45
  _LOGGER.fatal(msg)
45
46
  raise EsgvocDbError(msg) from e
46
47
 
@@ -48,20 +49,24 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
48
49
  # Well, the following instructions are not data duplication. It is more building an index.
49
50
  # Read: https://sqlite.org/fts5.html
50
51
  try:
51
- sql_query = 'INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) ' + \
52
- 'SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;' # noqa: S608
52
+ sql_query = (
53
+ "INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
54
+ + "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
55
+ ) # noqa: S608
53
56
  session.exec(text(sql_query)) # type: ignore
54
57
  except Exception as e:
55
- msg = f'unable to insert rows into uterms_fts5 table for {universe_db_file_path}'
58
+ msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
56
59
  _LOGGER.fatal(msg)
57
60
  raise EsgvocDbError(msg) from e
58
61
  session.commit()
59
62
  try:
60
- sql_query = 'INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) ' + \
61
- 'SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;' # noqa: S608
63
+ sql_query = (
64
+ "INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
65
+ + "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
66
+ ) # noqa: S608
62
67
  session.exec(text(sql_query)) # type: ignore
63
68
  except Exception as e:
64
- msg = f'unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}'
69
+ msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
65
70
  _LOGGER.fatal(msg)
66
71
  raise EsgvocDbError(msg) from e
67
72
  session.commit()
@@ -74,24 +79,21 @@ def ingest_metadata_universe(connection, git_hash):
74
79
  session.commit()
75
80
 
76
81
 
77
- def ingest_data_descriptor(data_descriptor_path: Path,
78
- connection: db.DBConnection) -> None:
82
+ def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
79
83
  data_descriptor_id = data_descriptor_path.name
80
84
 
81
85
  context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
82
86
  try:
83
87
  context = read_json_file(context_file_path)
84
88
  except Exception as e:
85
- msg = f'Unable to read the context file {context_file_path} of data descriptor \
86
- {data_descriptor_id}. Skip.\n{str(e)}'
89
+ msg = f"Unable to read the context file {context_file_path} of data descriptor \
90
+ {data_descriptor_id}. Skip.\n{str(e)}"
87
91
  _LOGGER.warning(msg)
88
92
  return
89
93
 
90
94
  with connection.create_session() as session:
91
95
  # We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
92
- data_descriptor = UDataDescriptor(id=data_descriptor_id,
93
- context=context,
94
- term_kind="")
96
+ data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
95
97
  term_kind_dd = None
96
98
 
97
99
  _LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
@@ -99,28 +101,34 @@ def ingest_data_descriptor(data_descriptor_path: Path,
99
101
  _LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
100
102
  if term_file_path.is_file() and term_file_path.suffix == ".json":
101
103
  try:
102
- locally_available = {"https://espri-mod.github.io/mip-cmor-tables":
103
- service.current_state.universe.local_path}
104
- json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
105
- locally_available=locally_available).merge_linked_json()[-1]
104
+ locally_available = {
105
+ "https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
106
+ }
107
+
108
+ json_specs = DataMerger(
109
+ data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
110
+ ).merge_linked_json()[-1]
111
+
106
112
  term_kind = infer_term_kind(json_specs)
107
113
  term_id = json_specs["id"]
108
114
 
109
115
  if term_kind_dd is None:
110
116
  term_kind_dd = term_kind
111
-
112
117
  except Exception as e:
113
- _LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor ' +
114
- f'{data_descriptor_path}. Skip.\n{str(e)}')
118
+ _LOGGER.warning(
119
+ f"Unable to read term {term_file_path} for data descriptor "
120
+ + f"{data_descriptor_path}. Skip.\n{str(e)}"
121
+ )
115
122
  continue
116
123
  if term_id and json_specs and data_descriptor and term_kind:
117
- _LOGGER.debug("adding {term_id}")
124
+ _LOGGER.debug(f"adding {term_id}")
118
125
  term = UTerm(
119
126
  id=term_id,
120
127
  specs=json_specs,
121
128
  data_descriptor=data_descriptor,
122
129
  kind=term_kind,
123
130
  )
131
+
124
132
  session.add(term)
125
133
  if term_kind_dd is not None:
126
134
  data_descriptor.term_kind = term_kind_dd
@@ -128,14 +136,8 @@ def ingest_data_descriptor(data_descriptor_path: Path,
128
136
  session.commit()
129
137
 
130
138
 
131
- def get_universe_term(data_descriptor_id: str,
132
- term_id: str,
133
- universe_db_session: Session) -> tuple[TermKind, dict]:
134
- statement = (
135
- select(UTerm)
136
- .join(UDataDescriptor)
137
- .where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
138
- )
139
+ def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
140
+ statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
139
141
  results = universe_db_session.exec(statement)
140
142
  term = results.one()
141
143
  return term.kind, term.specs
@@ -143,6 +145,7 @@ def get_universe_term(data_descriptor_id: str,
143
145
 
144
146
  if __name__ == "__main__":
145
147
  import os
148
+
146
149
  root_dir = Path(str(os.getcwd())).parent.parent
147
150
  print(root_dir)
148
151
  universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
@@ -1,4 +1,26 @@
1
1
  import logging.config
2
- from pathlib import Path
3
2
 
4
- logging.config.fileConfig(f"{Path(__file__).parent}/logging.conf", disable_existing_loggers=False)
3
+ LOGGING_CONFIG = {
4
+ 'version': 1,
5
+ 'disable_existing_loggers': False,
6
+ 'formatters': {
7
+ 'esgvoc_formatter': {
8
+ 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
9
+ },
10
+ },
11
+ 'handlers': {
12
+ 'esgvoc_stdout': {
13
+ 'class': 'logging.StreamHandler',
14
+ 'formatter': 'esgvoc_formatter',
15
+ },
16
+ },
17
+ 'loggers': {
18
+ 'esgvoc': {
19
+ 'handlers': ['esgvoc_stdout'],
20
+ 'level': 'ERROR',
21
+ 'propagate': False,
22
+ }
23
+ }
24
+ }
25
+
26
+ logging.config.dictConfig(LOGGING_CONFIG)
@@ -1,19 +1,23 @@
1
+ import logging
1
2
  import os
2
3
  import subprocess
4
+ import sys
5
+ from contextlib import contextmanager
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
3
9
  import requests
4
10
  from pydantic import BaseModel, ValidationError
5
- from typing import List, Optional
6
- from contextlib import contextmanager
7
- import logging
8
- import sys
9
11
 
10
12
  _LOGGER = logging.getLogger(__name__)
11
13
 
14
+
12
15
  @contextmanager
13
16
  def redirect_stdout_to_log(level=logging.INFO):
14
17
  """
15
18
  Redirect stdout to the global _LOGGER temporarily.
16
19
  """
20
+
17
21
  class StreamToLogger:
18
22
  def __init__(self, log_level):
19
23
  self.log_level = log_level
@@ -48,6 +52,7 @@ class GitHubRepository(BaseModel):
48
52
  created_at: str
49
53
  updated_at: str
50
54
 
55
+
51
56
  class GitHubBranch(BaseModel):
52
57
  name: str
53
58
  commit: dict
@@ -59,7 +64,7 @@ class RepoFetcher:
59
64
  DataFetcher is responsible for fetching data from external sources such as GitHub.
60
65
  """
61
66
 
62
- def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
67
+ def __init__(self, base_url: str = "https://api.github.com", local_path: str = ".cache/repos"):
63
68
  self.base_url = base_url
64
69
  self.repo_dir = local_path
65
70
 
@@ -100,7 +105,6 @@ class RepoFetcher:
100
105
  except ValidationError as e:
101
106
  raise Exception(f"Data validation error: {e}")
102
107
 
103
-
104
108
  def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
105
109
  """
106
110
  Fetch details of a specific branch in a repository.
@@ -120,7 +124,7 @@ class RepoFetcher:
120
124
  except ValidationError as e:
121
125
  raise Exception(f"Data validation error: {e}")
122
126
 
123
- def list_directory(self,owner, repo, branch='main'):
127
+ def list_directory(self, owner, repo, branch="main"):
124
128
  """
125
129
  List directories in the root of a GitHub repository.
126
130
 
@@ -133,10 +137,10 @@ class RepoFetcher:
133
137
  response = requests.get(url)
134
138
  response.raise_for_status() # Raise an error for bad responses
135
139
  contents = response.json()
136
- directories = [item['name'] for item in contents if item['type'] == 'dir']
140
+ directories = [item["name"] for item in contents if item["type"] == "dir"]
137
141
  return directories
138
142
 
139
- def list_files(self,owner, repo, directory, branch='main'):
143
+ def list_files(self, owner, repo, directory, branch="main"):
140
144
  """
141
145
  List files in a specific directory of a GitHub repository.
142
146
 
@@ -150,10 +154,10 @@ class RepoFetcher:
150
154
  response = requests.get(url)
151
155
  response.raise_for_status() # Raise an error for bad responses
152
156
  contents = response.json()
153
- files = [item['name'] for item in contents if item['type'] == 'file']
157
+ files = [item["name"] for item in contents if item["type"] == "file"]
154
158
  return files
155
-
156
- def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
159
+
160
+ def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str | None = None):
157
161
  """
158
162
  Clone a GitHub repository to a target directory.
159
163
  :param owner: Repository owner
@@ -162,52 +166,47 @@ class RepoFetcher:
162
166
  :param branch: (Optional) The branch to clone. Clones the default branch if None.
163
167
  """
164
168
  repo_url = f"https://github.com/{owner}/{repo}.git"
165
- destination = local_path if local_path else f"{self.repo_dir}/{repo}"
169
+ destination = local_path if local_path else f"{self.repo_dir}/{repo}"
166
170
 
167
171
  command = ["git", "clone", repo_url, destination]
168
172
  if branch:
169
173
  command.extend(["--branch", branch])
170
174
  with redirect_stdout_to_log():
171
-
172
175
  try:
173
- subprocess.run(command, check=True)
174
- _LOGGER.debug(f"Repository cloned successfully into {destination}")
175
- except subprocess.CalledProcessError:
176
- try:
176
+ if not Path(destination).exists():
177
+ subprocess.run(command, check=True)
178
+ _LOGGER.debug(f"Repository cloned successfully into {destination}")
179
+ else:
177
180
  current_work_dir = os.getcwd()
178
- os.chdir(f"{self.repo_dir}/{repo}")
181
+ os.chdir(f"{destination}")
179
182
  command = ["git", "pull"]
180
183
  subprocess.run(command, check=True)
181
184
  os.chdir(current_work_dir)
182
185
 
186
+ except Exception as e:
187
+ raise Exception(f"Failed to clone repository: {e}")
183
188
 
184
- except Exception as e:
185
- raise Exception(f"Failed to clone repository: {e}")
186
-
187
- def get_github_version_with_api(self, owner: str, repo: str, branch: str ="main"):
188
- """ Fetch the latest commit version (or any other versioning scheme) from GitHub. """
189
- details = self.fetch_branch_details( owner, repo, branch)
190
- return details.commit.get('sha')
189
+ def get_github_version_with_api(self, owner: str, repo: str, branch: str = "main"):
190
+ """Fetch the latest commit version (or any other versioning scheme) from GitHub."""
191
+ details = self.fetch_branch_details(owner, repo, branch)
192
+ return details.commit.get("sha")
191
193
 
192
- def get_github_version(self, owner: str, repo: str, branch: str="main"):
193
- """ Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch """
194
+ def get_github_version(self, owner: str, repo: str, branch: str = "main"):
195
+ """Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch"""
194
196
  repo_url = f"https://github.com/{owner}/{repo}.git"
195
197
  command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
196
198
  if branch:
197
199
  command.extend([branch])
198
200
 
199
201
  # with redirect_stdout_to_log():
200
- output=None
202
+ output = None
201
203
  try:
202
- result = subprocess.run(command, capture_output=True,
203
- text=True,
204
- check=True)
204
+ result = subprocess.run(command, capture_output=True, text=True, check=True)
205
205
  # Parse the output to get the commit hash
206
206
  output = result.stdout.strip()
207
207
  _LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
208
208
  except Exception as e:
209
-
210
- _LOGGER.debug("error in with git fetch " + repr(e))
209
+ _LOGGER.debug("error in with git fetch " + repr(e))
211
210
  if output is not None:
212
211
  commit_hash = output.split()[0]
213
212
  return commit_hash
@@ -216,45 +215,48 @@ class RepoFetcher:
216
215
  # return git_hash
217
216
 
218
217
  def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
219
- """ Check the version of the local repository by fetching the latest commit hash. """
218
+ """Check the version of the local repository by fetching the latest commit hash."""
220
219
  # repo_path = os.path.join(self.repo_dir, repo)
221
220
  if os.path.exists(repo_path):
222
- #print("EXIST")
221
+ # print("EXIST")
223
222
  command = ["git", "-C", repo_path]
224
223
  if branch:
225
224
  command.extend(["switch", branch])
226
225
  # Ensure we are on the correct branch
227
226
  with redirect_stdout_to_log():
228
- subprocess.run(command,
229
- stdout=subprocess.PIPE, # Capture stdout
230
- stderr=subprocess.PIPE, # Capture stderr
231
- text=True) # Decode output as text
227
+ subprocess.run(
228
+ command,
229
+ stdout=subprocess.PIPE, # Capture stdout
230
+ stderr=subprocess.PIPE, # Capture stderr
231
+ text=True,
232
+ ) # Decode output as text
232
233
  # Get the latest commit hash (SHA) from the local repository
233
- commit_hash = subprocess.check_output(["git", "-C", repo_path, "rev-parse", "HEAD"],
234
- stderr=subprocess.PIPE,
235
- text=True).strip()
234
+ commit_hash = subprocess.check_output(
235
+ ["git", "-C", repo_path, "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
236
+ ).strip()
236
237
  return commit_hash
237
238
  return None
238
239
 
240
+
239
241
  if __name__ == "__main__":
240
242
  fetcher = RepoFetcher()
241
-
243
+
242
244
  # Fetch repositories for a user
243
- #repos = fetcher.fetch_repositories("ESPRI-Mod")
244
- #for repo in repos:
245
+ # repos = fetcher.fetch_repositories("ESPRI-Mod")
246
+ # for repo in repos:
245
247
  # print(repo)
246
248
 
247
249
  # Fetch a specific repository's details
248
- #repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
249
- #"print(repo_details)
250
- #branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
251
- #print(branch_details)
252
-
253
- fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
254
-
255
- #a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
256
- #print(a)
257
- #a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
258
- #print(a)
259
-
260
- fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
250
+ # repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
251
+ # "print(repo_details)
252
+ # branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
253
+ # print(branch_details)
254
+
255
+ fetcher.clone_repository("ESPRI-Mod", "mip-cmor-tables", branch="uni_proj_ld")
256
+
257
+ # a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
258
+ # print(a)
259
+ # a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
260
+ # print(a)
261
+
262
+ fetcher.clone_repository("ESPRI-Mod", "CMIP6Plus_CVs", branch="uni_proj_ld")
@@ -1,7 +1,7 @@
1
-
1
+ import logging
2
2
  from typing import Dict, List, Set
3
+
3
4
  from esgvoc.core.data_handler import JsonLdResource
4
- import logging
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
@@ -9,16 +9,23 @@ logger = logging.getLogger(__name__)
9
9
  def merge_dicts(original: list, custom: list) -> dict:
10
10
  """Shallow merge: Overwrites original data with custom data."""
11
11
  b = original[0]
12
- a = custom[0]
12
+ a = custom[0]
13
13
  merged = {**{k: v for k, v in a.items() if k != "@id"}, **{k: v for k, v in b.items() if k != "@id"}}
14
14
  return merged
15
15
 
16
- def merge(uri:str)->Dict:
16
+
17
+ def merge(uri: str) -> Dict:
17
18
  mdm = DataMerger(data=JsonLdResource(uri=uri))
18
19
  return mdm.merge_linked_json()[-1]
19
20
 
21
+
20
22
  class DataMerger:
21
- def __init__(self, data: JsonLdResource, allowed_base_uris: Set[str]={"https://espri-mod.github.io/mip-cmor-tables"}, locally_available:dict = {}):
23
+ def __init__(
24
+ self,
25
+ data: JsonLdResource,
26
+ allowed_base_uris: Set[str] = {"https://espri-mod.github.io/mip-cmor-tables"},
27
+ locally_available: dict = {},
28
+ ):
22
29
  self.data = data
23
30
  self.allowed_base_uris = allowed_base_uris
24
31
  self.locally_available = locally_available
@@ -29,42 +36,49 @@ class DataMerger:
29
36
 
30
37
  def _get_next_id(self, data: dict) -> str | None:
31
38
  """Extract the next @id from the data if it is a valid customization reference."""
32
- if isinstance(data,list):
39
+ if isinstance(data, list):
33
40
  data = data[0]
34
41
  if "@id" in data and self._should_resolve(data["@id"]):
35
- return data["@id"] + ".json"
42
+ return data["@id"] + ".json"
36
43
  return None
37
44
 
38
45
  def merge_linked_json(self) -> List[Dict]:
39
- """Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
40
- result_list = [self.data.json_dict] # Start with the original json object
41
- visited = set(self.data.uri) # Track visited URIs to prevent cycles
42
- current_data = self.data
43
- #print(current_data.expanded)
44
-
45
- while True:
46
- next_id = self._get_next_id(current_data.expanded[0])
47
- if not next_id or next_id in visited or not self._should_resolve(next_id):
48
- break
49
- visited.add(next_id)
50
-
51
- # Fetch and merge the next customization
52
- #do we have it in local ? if so use it instead of remote
53
- for local_repo in self.locally_available.keys():
54
- if next_id.startswith(local_repo):
55
- next_id = next_id.replace(local_repo,self.locally_available[local_repo])
56
- next_data_instance = JsonLdResource(uri=next_id)
57
- merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
58
- next_data_instance.json_dict = merged_json_data
59
-
60
- # Add the merged instance to the result list
61
- result_list.append(merged_json_data)
62
- current_data = next_data_instance
63
-
64
- return result_list
46
+ try:
47
+ """Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
48
+ result_list = [self.data.json_dict] # Start with the original json object
49
+ visited = set(self.data.uri) # Track visited URIs to prevent cycles
50
+ current_data = self.data
51
+ # print(current_data.expanded)
52
+ while True:
53
+ next_id = self._get_next_id(current_data.expanded[0])
54
+
55
+ if not next_id or next_id in visited or not self._should_resolve(next_id):
56
+ break
57
+
58
+ visited.add(next_id)
59
+
60
+ # Fetch and merge the next customization
61
+ # do we have it in local ? if so use it instead of remote
62
+ for local_repo in self.locally_available.keys():
63
+ if next_id.startswith(local_repo):
64
+ next_id = next_id.replace(local_repo, self.locally_available[local_repo])
65
+
66
+ next_data_instance = JsonLdResource(uri=next_id)
67
+ merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
68
+ next_data_instance.json_dict = merged_json_data
69
+
70
+ # Add the merged instance to the result list
71
+ result_list.append(merged_json_data)
72
+ current_data = next_data_instance
73
+ return result_list
74
+ except Exception as e:
75
+ print(self.data)
76
+ print(e)
77
+
65
78
 
66
79
  if __name__ == "__main__":
67
80
  import warnings
81
+
68
82
  warnings.simplefilter("ignore")
69
83
 
70
84
  # test from institution_id ipsl exapnd and merge with institution ipsl
@@ -80,4 +94,3 @@ if __name__ == "__main__":
80
94
  # print(mdm.merge_linked_json())
81
95
  #
82
96
  #
83
-