PyPI - esgvoc - Versions diffs - 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

esgvoc 0.4.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of esgvoc might be problematic. Click here for more details.

Files changed (73) hide show

esgvoc/__init__.py +1 -1
esgvoc/api/data_descriptors/__init__.py +50 -28
esgvoc/api/data_descriptors/activity.py +3 -3
esgvoc/api/data_descriptors/area_label.py +16 -1
esgvoc/api/data_descriptors/branded_suffix.py +20 -0
esgvoc/api/data_descriptors/branded_variable.py +12 -0
esgvoc/api/data_descriptors/consortium.py +14 -13
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +6 -0
esgvoc/api/data_descriptors/creation_date.py +5 -0
esgvoc/api/data_descriptors/data_descriptor.py +14 -9
esgvoc/api/data_descriptors/data_specs_version.py +5 -0
esgvoc/api/data_descriptors/date.py +1 -1
esgvoc/api/data_descriptors/directory_date.py +1 -1
esgvoc/api/data_descriptors/experiment.py +13 -11
esgvoc/api/data_descriptors/forcing_index.py +1 -1
esgvoc/api/data_descriptors/frequency.py +3 -3
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid_label.py +2 -2
esgvoc/api/data_descriptors/horizontal_label.py +15 -1
esgvoc/api/data_descriptors/initialisation_index.py +1 -1
esgvoc/api/data_descriptors/institution.py +8 -5
esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
esgvoc/api/data_descriptors/license.py +3 -3
esgvoc/api/data_descriptors/mip_era.py +1 -1
esgvoc/api/data_descriptors/model_component.py +1 -1
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +1 -1
esgvoc/api/data_descriptors/physic_index.py +1 -1
esgvoc/api/data_descriptors/product.py +2 -2
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realisation_index.py +1 -1
esgvoc/api/data_descriptors/realm.py +1 -1
esgvoc/api/data_descriptors/region.py +5 -0
esgvoc/api/data_descriptors/resolution.py +3 -3
esgvoc/api/data_descriptors/source.py +9 -5
esgvoc/api/data_descriptors/source_type.py +1 -1
esgvoc/api/data_descriptors/table.py +3 -2
esgvoc/api/data_descriptors/temporal_label.py +15 -1
esgvoc/api/data_descriptors/time_range.py +4 -3
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +5 -0
esgvoc/api/data_descriptors/variable.py +25 -12
esgvoc/api/data_descriptors/variant_label.py +3 -3
esgvoc/api/data_descriptors/vertical_label.py +14 -0
esgvoc/api/project_specs.py +117 -2
esgvoc/api/projects.py +242 -279
esgvoc/api/search.py +30 -3
esgvoc/api/universe.py +42 -27
esgvoc/apps/jsg/cmip6_template.json +74 -0
esgvoc/apps/jsg/cmip6plus_template.json +74 -0
esgvoc/apps/jsg/json_schema_generator.py +185 -0
esgvoc/cli/config.py +500 -0
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +43 -38
esgvoc/cli/main.py +10 -3
esgvoc/cli/status.py +27 -18
esgvoc/cli/valid.py +10 -15
esgvoc/core/db/models/project.py +11 -11
esgvoc/core/db/models/universe.py +3 -3
esgvoc/core/db/project_ingestion.py +40 -40
esgvoc/core/db/universe_ingestion.py +36 -33
esgvoc/core/logging_handler.py +24 -2
esgvoc/core/repo_fetcher.py +61 -59
esgvoc/core/service/data_merger.py +47 -34
esgvoc/core/service/state.py +107 -83
{esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
esgvoc-1.0.0.dist-info/RECORD +95 -0
esgvoc/core/logging.conf +0 -21
esgvoc-0.4.0.dist-info/RECORD +0 -80
{esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
{esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
{esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0

esgvoc/core/db/universe_ingestion.py CHANGED Viewed

@@ -30,17 +30,18 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
     try:
         connection = db.DBConnection(universe_db_file_path)
     except Exception as e:
-        msg = f'Unable to read universe SQLite file at {universe_db_file_path}. Abort.'
+        msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
         _LOGGER.fatal(msg)
         raise IOError(msg) from e
     for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
-        if data_descriptor_dir_path.is_dir() and \
-           (data_descriptor_dir_path / "000_context.jsonld").exists():  # TODO may be put that in setting
+        if (
+            data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
+        ):  # TODO may be put that in setting
             try:
                 ingest_data_descriptor(data_descriptor_dir_path, connection)
             except Exception as e:
-                msg = f'unexpected error while processing data descriptor {data_descriptor_dir_path}'
+                msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
                 _LOGGER.fatal(msg)
                 raise EsgvocDbError(msg) from e
@@ -48,20 +49,24 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
         # Well, the following instructions are not data duplication. It is more building an index.
         # Read: https://sqlite.org/fts5.html
         try:
-            sql_query = 'INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) ' + \
-                        'SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;'  # noqa: S608
+            sql_query = (
+                "INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
+                + "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
+            )  # noqa: S608
             session.exec(text(sql_query))  # type: ignore
         except Exception as e:
-            msg = f'unable to insert rows into uterms_fts5 table for {universe_db_file_path}'
+            msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
             _LOGGER.fatal(msg)
             raise EsgvocDbError(msg) from e
         session.commit()
         try:
-            sql_query = 'INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) ' + \
-                        'SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;'  # noqa: S608
+            sql_query = (
+                "INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
+                + "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
+            )  # noqa: S608
             session.exec(text(sql_query))  # type: ignore
         except Exception as e:
-            msg = f'unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}'
+            msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
             _LOGGER.fatal(msg)
             raise EsgvocDbError(msg) from e
         session.commit()
@@ -74,24 +79,21 @@ def ingest_metadata_universe(connection, git_hash):
         session.commit()
-def ingest_data_descriptor(data_descriptor_path: Path,
-                           connection: db.DBConnection) -> None:
+def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
     data_descriptor_id = data_descriptor_path.name
     context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
     try:
         context = read_json_file(context_file_path)
     except Exception as e:
-        msg = f'Unable to read the context file {context_file_path} of data descriptor \
-               {data_descriptor_id}. Skip.\n{str(e)}'
+        msg = f"Unable to read the context file {context_file_path} of data descriptor \
+               {data_descriptor_id}. Skip.\n{str(e)}"
         _LOGGER.warning(msg)
         return
     with connection.create_session() as session:
         # We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
-        data_descriptor = UDataDescriptor(id=data_descriptor_id,
-                                          context=context,
-                                          term_kind="")
+        data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
         term_kind_dd = None
         _LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
@@ -99,28 +101,34 @@ def ingest_data_descriptor(data_descriptor_path: Path,
             _LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
             if term_file_path.is_file() and term_file_path.suffix == ".json":
                 try:
-                    locally_available = {"https://espri-mod.github.io/mip-cmor-tables":
-                                         service.current_state.universe.local_path}
-                    json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
-                                            locally_available=locally_available).merge_linked_json()[-1]
+                    locally_available = {
+                        "https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
+                    }
+                    json_specs = DataMerger(
+                        data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
+                    ).merge_linked_json()[-1]
                     term_kind = infer_term_kind(json_specs)
                     term_id = json_specs["id"]
                     if term_kind_dd is None:
                         term_kind_dd = term_kind
                 except Exception as e:
-                    _LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor ' +
-                                    f'{data_descriptor_path}. Skip.\n{str(e)}')
+                    _LOGGER.warning(
+                        f"Unable to read term {term_file_path} for data descriptor "
+                        + f"{data_descriptor_path}. Skip.\n{str(e)}"
+                    )
                     continue
                 if term_id and json_specs and data_descriptor and term_kind:
-                    _LOGGER.debug("adding {term_id}")
+                    _LOGGER.debug(f"adding {term_id}")
                     term = UTerm(
                         id=term_id,
                         specs=json_specs,
                         data_descriptor=data_descriptor,
                         kind=term_kind,
                     )
                     session.add(term)
         if term_kind_dd is not None:
             data_descriptor.term_kind = term_kind_dd
@@ -128,14 +136,8 @@ def ingest_data_descriptor(data_descriptor_path: Path,
         session.commit()
-def get_universe_term(data_descriptor_id: str,
-                      term_id: str,
-                      universe_db_session: Session) -> tuple[TermKind, dict]:
-    statement = (
-        select(UTerm)
-        .join(UDataDescriptor)
-        .where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
-    )
+def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
+    statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
     results = universe_db_session.exec(statement)
     term = results.one()
     return term.kind, term.specs
@@ -143,6 +145,7 @@ def get_universe_term(data_descriptor_id: str,
 if __name__ == "__main__":
     import os
     root_dir = Path(str(os.getcwd())).parent.parent
     print(root_dir)
     universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))

esgvoc/core/logging_handler.py CHANGED Viewed

@@ -1,4 +1,26 @@
 import logging.config
-from pathlib import Path
-logging.config.fileConfig(f"{Path(__file__).parent}/logging.conf", disable_existing_loggers=False)
+LOGGING_CONFIG = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'esgvoc_formatter': {
+            'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
+        },
+    },
+    'handlers': {
+        'esgvoc_stdout': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'esgvoc_formatter',
+        },
+    },
+    'loggers': {
+        'esgvoc': {
+            'handlers': ['esgvoc_stdout'],
+            'level': 'ERROR',
+            'propagate': False,
+        }
+    }
+}
+logging.config.dictConfig(LOGGING_CONFIG)

esgvoc/core/repo_fetcher.py CHANGED Viewed

@@ -1,19 +1,23 @@
+import logging
 import os
 import subprocess
+import sys
+from contextlib import contextmanager
+from pathlib import Path
+from typing import List, Optional
 import requests
 from pydantic import BaseModel, ValidationError
-from typing import List, Optional
-from contextlib import contextmanager
-import logging
-import sys
 _LOGGER = logging.getLogger(__name__)
 @contextmanager
 def redirect_stdout_to_log(level=logging.INFO):
     """
     Redirect stdout to the global _LOGGER temporarily.
     """
     class StreamToLogger:
         def __init__(self, log_level):
             self.log_level = log_level
@@ -48,6 +52,7 @@ class GitHubRepository(BaseModel):
     created_at: str
     updated_at: str
 class GitHubBranch(BaseModel):
     name: str
     commit: dict
@@ -59,7 +64,7 @@ class RepoFetcher:
     DataFetcher is responsible for fetching data from external sources such as GitHub.
     """
-    def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
+    def __init__(self, base_url: str = "https://api.github.com", local_path: str = ".cache/repos"):
         self.base_url = base_url
         self.repo_dir = local_path
@@ -100,7 +105,6 @@ class RepoFetcher:
         except ValidationError as e:
             raise Exception(f"Data validation error: {e}")
     def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
         """
         Fetch details of a specific branch in a repository.
@@ -120,7 +124,7 @@ class RepoFetcher:
         except ValidationError as e:
             raise Exception(f"Data validation error: {e}")
-    def list_directory(self,owner, repo, branch='main'):
+    def list_directory(self, owner, repo, branch="main"):
         """
         List directories in the root of a GitHub repository.
@@ -133,10 +137,10 @@ class RepoFetcher:
         response = requests.get(url)
         response.raise_for_status()  # Raise an error for bad responses
         contents = response.json()
-        directories = [item['name'] for item in contents if item['type'] == 'dir']
+        directories = [item["name"] for item in contents if item["type"] == "dir"]
         return directories
-    def list_files(self,owner, repo, directory, branch='main'):
+    def list_files(self, owner, repo, directory, branch="main"):
         """
         List files in a specific directory of a GitHub repository.
@@ -150,10 +154,10 @@ class RepoFetcher:
         response = requests.get(url)
         response.raise_for_status()  # Raise an error for bad responses
         contents = response.json()
-        files = [item['name'] for item in contents if item['type'] == 'file']
+        files = [item["name"] for item in contents if item["type"] == "file"]
         return files
-    def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
+    def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str | None = None):
         """
         Clone a GitHub repository to a target directory.
         :param owner: Repository owner
@@ -162,52 +166,47 @@ class RepoFetcher:
         :param branch: (Optional) The branch to clone. Clones the default branch if None.
         """
         repo_url = f"https://github.com/{owner}/{repo}.git"
-        destination = local_path if local_path else f"{self.repo_dir}/{repo}"
+        destination = local_path if local_path else f"{self.repo_dir}/{repo}"
         command = ["git", "clone", repo_url, destination]
         if branch:
             command.extend(["--branch", branch])
         with redirect_stdout_to_log():
             try:
-                subprocess.run(command, check=True)
-                _LOGGER.debug(f"Repository cloned successfully into {destination}")
-            except subprocess.CalledProcessError:
-                try:
+                if not Path(destination).exists():
+                    subprocess.run(command, check=True)
+                    _LOGGER.debug(f"Repository cloned successfully into {destination}")
+                else:
                     current_work_dir = os.getcwd()
-                    os.chdir(f"{self.repo_dir}/{repo}")
+                    os.chdir(f"{destination}")
                     command = ["git", "pull"]
                     subprocess.run(command, check=True)
                     os.chdir(current_work_dir)
+            except Exception as e:
+                raise Exception(f"Failed to clone repository: {e}")
-                except Exception as e:
-                    raise Exception(f"Failed to clone repository: {e}")
-    def get_github_version_with_api(self, owner: str, repo: str, branch: str ="main"):
-        """ Fetch the latest commit version (or any other versioning scheme) from GitHub. """
-        details = self.fetch_branch_details( owner, repo, branch)
-        return details.commit.get('sha')
+    def get_github_version_with_api(self, owner: str, repo: str, branch: str = "main"):
+        """Fetch the latest commit version (or any other versioning scheme) from GitHub."""
+        details = self.fetch_branch_details(owner, repo, branch)
+        return details.commit.get("sha")
-    def get_github_version(self, owner: str, repo: str, branch: str="main"):
-        """ Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch """
+    def get_github_version(self, owner: str, repo: str, branch: str = "main"):
+        """Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch"""
         repo_url = f"https://github.com/{owner}/{repo}.git"
         command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
         if branch:
             command.extend([branch])
         # with redirect_stdout_to_log():
-        output=None
+        output = None
         try:
-            result = subprocess.run(command,   capture_output=True,
-                                                text=True,
-                                                check=True)
+            result = subprocess.run(command, capture_output=True, text=True, check=True)
             # Parse the output to get the commit hash
             output = result.stdout.strip()
             _LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
         except Exception as e:
-            _LOGGER.debug("error in with git fetch " +  repr(e))
+            _LOGGER.debug("error in with git fetch " + repr(e))
         if output is not None:
             commit_hash = output.split()[0]
             return commit_hash
@@ -216,45 +215,48 @@ class RepoFetcher:
         # return git_hash
     def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
-        """ Check the version of the local repository by fetching the latest commit hash. """
+        """Check the version of the local repository by fetching the latest commit hash."""
         # repo_path = os.path.join(self.repo_dir, repo)
         if os.path.exists(repo_path):
-            #print("EXIST")
+            # print("EXIST")
             command = ["git", "-C", repo_path]
             if branch:
                 command.extend(["switch", branch])
             # Ensure we are on the correct branch
             with redirect_stdout_to_log():
-                subprocess.run(command,
-                                stdout=subprocess.PIPE,  # Capture stdout
-                                stderr=subprocess.PIPE,  # Capture stderr
-                                text=True)                # Decode output as text
+                subprocess.run(
+                    command,
+                    stdout=subprocess.PIPE,  # Capture stdout
+                    stderr=subprocess.PIPE,  # Capture stderr
+                    text=True,
+                )  # Decode output as text
                 # Get the latest commit hash (SHA) from the local repository
-                commit_hash = subprocess.check_output(["git", "-C", repo_path, "rev-parse", "HEAD"],
-                                                      stderr=subprocess.PIPE,
-                                                      text=True).strip()
+                commit_hash = subprocess.check_output(
+                    ["git", "-C", repo_path, "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
+                ).strip()
             return commit_hash
         return None
 if __name__ == "__main__":
     fetcher = RepoFetcher()
     # Fetch repositories for a user
-    #repos = fetcher.fetch_repositories("ESPRI-Mod")
-    #for repo in repos:
+    # repos = fetcher.fetch_repositories("ESPRI-Mod")
+    # for repo in repos:
     #    print(repo)
     # Fetch a specific repository's details
-    #repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
-    #"print(repo_details)
-    #branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
-    #print(branch_details)
-    fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
-    #a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
-    #print(a)
-    #a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
-    #print(a)
-    fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
+    # repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
+    # "print(repo_details)
+    # branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
+    # print(branch_details)
+    fetcher.clone_repository("ESPRI-Mod", "mip-cmor-tables", branch="uni_proj_ld")
+    # a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
+    # print(a)
+    # a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
+    # print(a)
+    fetcher.clone_repository("ESPRI-Mod", "CMIP6Plus_CVs", branch="uni_proj_ld")

esgvoc/core/service/data_merger.py CHANGED Viewed

@@ -1,7 +1,7 @@
+import logging
 from typing import Dict, List, Set
 from esgvoc.core.data_handler import JsonLdResource
-import logging
 logger = logging.getLogger(__name__)
@@ -9,16 +9,23 @@ logger = logging.getLogger(__name__)
 def merge_dicts(original: list, custom: list) -> dict:
     """Shallow merge: Overwrites original data with custom data."""
     b = original[0]
-    a = custom[0]
+    a = custom[0]
     merged = {**{k: v for k, v in a.items() if k != "@id"}, **{k: v for k, v in b.items() if k != "@id"}}
     return merged
-def merge(uri:str)->Dict:
+def merge(uri: str) -> Dict:
     mdm = DataMerger(data=JsonLdResource(uri=uri))
     return mdm.merge_linked_json()[-1]
 class DataMerger:
-    def __init__(self, data: JsonLdResource, allowed_base_uris: Set[str]={"https://espri-mod.github.io/mip-cmor-tables"}, locally_available:dict = {}):
+    def __init__(
+        self,
+        data: JsonLdResource,
+        allowed_base_uris: Set[str] = {"https://espri-mod.github.io/mip-cmor-tables"},
+        locally_available: dict = {},
+    ):
         self.data = data
         self.allowed_base_uris = allowed_base_uris
         self.locally_available = locally_available
@@ -29,42 +36,49 @@ class DataMerger:
     def _get_next_id(self, data: dict) -> str | None:
         """Extract the next @id from the data if it is a valid customization reference."""
-        if isinstance(data,list):
+        if isinstance(data, list):
             data = data[0]
         if "@id" in data and self._should_resolve(data["@id"]):
-            return data["@id"] + ".json"
+            return data["@id"] + ".json"
         return None
     def merge_linked_json(self) -> List[Dict]:
-        """Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
-        result_list = [self.data.json_dict]  # Start with the original json object
-        visited = set(self.data.uri)  # Track visited URIs to prevent cycles
-        current_data = self.data
-        #print(current_data.expanded)
-        while True:
-            next_id = self._get_next_id(current_data.expanded[0])
-            if not next_id or next_id in visited or not self._should_resolve(next_id):
-                break
-            visited.add(next_id)
-            # Fetch and merge the next customization
-            #do we have it in local ? if so use it instead of remote
-            for local_repo in self.locally_available.keys():
-                if next_id.startswith(local_repo):
-                    next_id = next_id.replace(local_repo,self.locally_available[local_repo])
-            next_data_instance = JsonLdResource(uri=next_id)
-            merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
-            next_data_instance.json_dict = merged_json_data
-            # Add the merged instance to the result list
-            result_list.append(merged_json_data)
-            current_data = next_data_instance
-        return result_list
+        try:
+            """Fetch and merge data recursively, returning a list of progressively merged Data json instances."""
+            result_list = [self.data.json_dict]  # Start with the original json object
+            visited = set(self.data.uri)  # Track visited URIs to prevent cycles
+            current_data = self.data
+            # print(current_data.expanded)
+            while True:
+                next_id = self._get_next_id(current_data.expanded[0])
+                if not next_id or next_id in visited or not self._should_resolve(next_id):
+                    break
+                visited.add(next_id)
+                # Fetch and merge the next customization
+                # do we have it in local ? if so use it instead of remote
+                for local_repo in self.locally_available.keys():
+                    if next_id.startswith(local_repo):
+                        next_id = next_id.replace(local_repo, self.locally_available[local_repo])
+                next_data_instance = JsonLdResource(uri=next_id)
+                merged_json_data = merge_dicts([current_data.json_dict], [next_data_instance.json_dict])
+                next_data_instance.json_dict = merged_json_data
+                # Add the merged instance to the result list
+                result_list.append(merged_json_data)
+                current_data = next_data_instance
+            return result_list
+        except Exception as e:
+            print(self.data)
+            print(e)
 if __name__ == "__main__":
     import warnings
     warnings.simplefilter("ignore")
     # test from institution_id ipsl exapnd and merge with institution ipsl
@@ -80,4 +94,3 @@ if __name__ == "__main__":
     # print(mdm.merge_linked_json())
     #
     #

esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

esgvoc 0.4.0py3-none-any.whl → 1.0.0py3-none-any.whl