esgvoc 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (66) hide show
  1. esgvoc/__init__.py +1 -0
  2. esgvoc/api/__init__.py +62 -0
  3. esgvoc/api/_utils.py +39 -0
  4. esgvoc/api/data_descriptors/__init__.py +60 -0
  5. esgvoc/api/data_descriptors/activity.py +51 -0
  6. esgvoc/api/data_descriptors/consortium.py +66 -0
  7. esgvoc/api/data_descriptors/date.py +48 -0
  8. esgvoc/api/data_descriptors/experiment.py +60 -0
  9. esgvoc/api/data_descriptors/forcing_index.py +47 -0
  10. esgvoc/api/data_descriptors/frequency.py +45 -0
  11. esgvoc/api/data_descriptors/grid_label.py +46 -0
  12. esgvoc/api/data_descriptors/initialisation_index.py +46 -0
  13. esgvoc/api/data_descriptors/institution.py +58 -0
  14. esgvoc/api/data_descriptors/license.py +47 -0
  15. esgvoc/api/data_descriptors/mip_era.py +46 -0
  16. esgvoc/api/data_descriptors/model_component.py +47 -0
  17. esgvoc/api/data_descriptors/organisation.py +42 -0
  18. esgvoc/api/data_descriptors/physic_index.py +47 -0
  19. esgvoc/api/data_descriptors/product.py +45 -0
  20. esgvoc/api/data_descriptors/realisation_index.py +46 -0
  21. esgvoc/api/data_descriptors/realm.py +44 -0
  22. esgvoc/api/data_descriptors/resolution.py +46 -0
  23. esgvoc/api/data_descriptors/source.py +57 -0
  24. esgvoc/api/data_descriptors/source_type.py +43 -0
  25. esgvoc/api/data_descriptors/sub_experiment.py +43 -0
  26. esgvoc/api/data_descriptors/table.py +50 -0
  27. esgvoc/api/data_descriptors/time_range.py +28 -0
  28. esgvoc/api/data_descriptors/variable.py +77 -0
  29. esgvoc/api/data_descriptors/variant_label.py +49 -0
  30. esgvoc/api/projects.py +854 -0
  31. esgvoc/api/report.py +86 -0
  32. esgvoc/api/search.py +92 -0
  33. esgvoc/api/universe.py +218 -0
  34. esgvoc/apps/drs/__init__.py +16 -0
  35. esgvoc/apps/drs/models.py +43 -0
  36. esgvoc/apps/drs/parser.py +27 -0
  37. esgvoc/cli/config.py +79 -0
  38. esgvoc/cli/get.py +142 -0
  39. esgvoc/cli/install.py +14 -0
  40. esgvoc/cli/main.py +22 -0
  41. esgvoc/cli/status.py +26 -0
  42. esgvoc/cli/valid.py +156 -0
  43. esgvoc/core/constants.py +13 -0
  44. esgvoc/core/convert.py +0 -0
  45. esgvoc/core/data_handler.py +133 -0
  46. esgvoc/core/db/__init__.py +5 -0
  47. esgvoc/core/db/connection.py +31 -0
  48. esgvoc/core/db/models/mixins.py +18 -0
  49. esgvoc/core/db/models/project.py +65 -0
  50. esgvoc/core/db/models/universe.py +59 -0
  51. esgvoc/core/db/project_ingestion.py +152 -0
  52. esgvoc/core/db/universe_ingestion.py +120 -0
  53. esgvoc/core/logging.conf +21 -0
  54. esgvoc/core/logging_handler.py +4 -0
  55. esgvoc/core/repo_fetcher.py +259 -0
  56. esgvoc/core/service/__init__.py +8 -0
  57. esgvoc/core/service/data_merger.py +83 -0
  58. esgvoc/core/service/esg_voc.py +79 -0
  59. esgvoc/core/service/settings.py +64 -0
  60. esgvoc/core/service/settings.toml +12 -0
  61. esgvoc/core/service/settings_default.toml +20 -0
  62. esgvoc/core/service/state.py +222 -0
  63. esgvoc-0.1.2.dist-info/METADATA +54 -0
  64. esgvoc-0.1.2.dist-info/RECORD +66 -0
  65. esgvoc-0.1.2.dist-info/WHEEL +4 -0
  66. esgvoc-0.1.2.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ import sqlalchemy as sa
5
+ from sqlalchemy.dialects.sqlite import JSON
6
+ from sqlmodel import Column, Field, Relationship, SQLModel
7
+
8
+ import esgvoc.core.db.connection as db
9
+ from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
10
+
11
+ _LOGGER = logging.getLogger("universe_db_creation")
12
+
13
+
14
+ class Universe(SQLModel, PkMixin, table=True):
15
+ __tablename__ = "universes"
16
+ git_hash: str
17
+ data_descriptors: list["DataDescriptor"] = Relationship(back_populates="universe")
18
+
19
+
20
+ class DataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
21
+ __tablename__ = "data_descriptors"
22
+ context: dict = Field(sa_column=sa.Column(JSON))
23
+ universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
24
+ universe: Universe = Relationship(back_populates="data_descriptors")
25
+ terms: list["UTerm"] = Relationship(back_populates="data_descriptor")
26
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
27
+
28
+
29
+ class UTerm(SQLModel, PkMixin, IdMixin, table=True):
30
+ __tablename__ = "uterms"
31
+ specs: dict = Field(sa_column=sa.Column(JSON))
32
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
33
+ data_descriptor_pk: int | None = Field(
34
+ default=None, foreign_key="data_descriptors.pk"
35
+ )
36
+ data_descriptor: DataDescriptor = Relationship(back_populates="terms")
37
+
38
+
39
+ def universe_create_db(db_file_path: Path) -> None:
40
+ try:
41
+ connection = db.DBConnection(db_file_path)
42
+ except Exception as e:
43
+ msg = f'Unable to create SQLite file at {db_file_path}. Abort.'
44
+ _LOGGER.fatal(msg)
45
+ raise RuntimeError(msg) from e
46
+ try:
47
+ # Avoid creating project tables.
48
+ tables_to_be_created = [SQLModel.metadata.tables['uterms'],
49
+ SQLModel.metadata.tables['data_descriptors'],
50
+ SQLModel.metadata.tables['universes']]
51
+ SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
52
+ except Exception as e:
53
+ msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
54
+ _LOGGER.fatal(msg)
55
+ raise RuntimeError(msg) from e
56
+
57
+
58
+ if __name__ == "__main__":
59
+ pass
@@ -0,0 +1,152 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ import esgvoc.core.constants
5
+ from esgvoc.core.data_handler import JsonLdResource
6
+ from esgvoc.core.db.connection import DBConnection
7
+ from esgvoc.core.service.data_merger import DataMerger
8
+ from esgvoc.core.db.models.mixins import TermKind
9
+ from pydantic import BaseModel
10
+
11
+ import esgvoc.core.db.connection as db
12
+ from esgvoc.core.db.connection import read_json_file
13
+ from esgvoc.core.db.models.project import Collection, Project, PTerm
14
+
15
+
16
+ _LOGGER = logging.getLogger("project_ingestion")
17
+
18
+ def infer_term_kind(json_specs: dict) -> TermKind:
19
+ if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
20
+ return TermKind.PATTERN
21
+ elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
22
+ return TermKind.COMPOSITE
23
+ else:
24
+ return TermKind.PLAIN
25
+
26
+
27
+ def ingest_metadata_project(connection:DBConnection,git_hash):
28
+ with connection.create_session() as session:
29
+ project = Project(id=str(connection.file_path.stem), git_hash=git_hash,specs={})
30
+ session.add(project)
31
+ session.commit()
32
+
33
+ ###############################
34
+ def get_data_descriptor_id_from_context(collection_context: dict) -> str:
35
+ data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY]
36
+ return Path(data_descriptor_url).name
37
+
38
+
39
+ def instantiate_project_term(universe_term_json_specs: dict,
40
+ project_term_json_specs_update: dict,
41
+ pydantic_class: type[BaseModel]) -> dict:
42
+ term_from_universe = pydantic_class(**universe_term_json_specs)
43
+ updated_term = term_from_universe.model_copy(
44
+ update=project_term_json_specs_update, deep=True
45
+ )
46
+ return updated_term.model_dump()
47
+
48
+
49
+ def ingest_collection(collection_dir_path: Path,
50
+ project: Project,
51
+ project_db_session) -> None:
52
+
53
+
54
+ collection_id = collection_dir_path.name
55
+ collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
56
+ try:
57
+ collection_context = read_json_file(collection_context_file_path)
58
+ data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
59
+ except Exception as e:
60
+ msg = f'Unable to read project context file {collection_context_file_path}. Abort.'
61
+ _LOGGER.fatal(msg)
62
+ raise RuntimeError(msg) from e
63
+ # [KEEP]
64
+ collection = Collection(
65
+ id=collection_id,
66
+ context=collection_context,
67
+ project=project,
68
+ data_descriptor_id=data_descriptor_id,
69
+ term_kind="") # we ll know it only when we ll add a term (hypothesis all term have the same kind in a collection
70
+ term_kind_collection = None
71
+
72
+ for term_file_path in collection_dir_path.iterdir():
73
+ _LOGGER.debug(f"found term path : {term_file_path}")
74
+ if term_file_path.is_file() and term_file_path.suffix==".json":
75
+ try:
76
+ json_specs = DataMerger(data=JsonLdResource(uri =str(term_file_path)),
77
+ locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/mip-cmor-tables"}).merge_linked_json()[-1]
78
+ term_kind = infer_term_kind(json_specs)
79
+ term_id = json_specs["id"]
80
+
81
+ if term_kind_collection is None:
82
+ term_kind_collection = term_kind
83
+
84
+ except Exception as e:
85
+ _LOGGER.warning(f'Unable to read term {term_file_path}. Skip.\n{str(e)}')
86
+ continue
87
+ try:
88
+ term = PTerm(
89
+ id=term_id,
90
+ specs=json_specs,
91
+ collection=collection,
92
+ kind=term_kind,
93
+ )
94
+ project_db_session.add(term)
95
+ except Exception as e:
96
+ _LOGGER.error(
97
+ f"fail to find term {term_id} in data descriptor {data_descriptor_id} "
98
+ + f"for the collection {collection_id} of the project {project.id}. Skip {term_id}.\n{str(e)}"
99
+ )
100
+ continue
101
+ if term_kind_collection:
102
+ collection.term_kind = term_kind_collection
103
+ project_db_session.add(collection)
104
+
105
+ def ingest_project(project_dir_path: Path,
106
+ project_db_file_path: Path,
107
+ git_hash : str
108
+ ):
109
+ try:
110
+ project_connection = db.DBConnection(project_db_file_path)
111
+ except Exception as e:
112
+ msg = f'Unable to read project SQLite file at {project_db_file_path}. Abort.'
113
+ _LOGGER.fatal(msg)
114
+ raise RuntimeError(msg) from e
115
+
116
+ with project_connection.create_session() as project_db_session:
117
+ try:
118
+ project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
119
+ project_json_specs = read_json_file(project_specs_file_path)
120
+ project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
121
+ except Exception as e:
122
+ msg = f'Unable to read project specs file {project_specs_file_path}. Abort.'
123
+ _LOGGER.fatal(msg)
124
+ raise RuntimeError(msg) from e
125
+
126
+ project = Project(id=project_id, specs=project_json_specs,git_hash=git_hash)
127
+ project_db_session.add(project)
128
+
129
+
130
+ for collection_dir_path in project_dir_path.iterdir():
131
+ if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists(): #TODO maybe put that in settings
132
+ _LOGGER.debug(f"found collection dir : {collection_dir_path}")
133
+ try:
134
+ ingest_collection(collection_dir_path,
135
+ project,
136
+ project_db_session)
137
+ except Exception as e:
138
+ msg = f'Unexpected error while ingesting collection {collection_dir_path}. Abort.'
139
+ _LOGGER.fatal(msg)
140
+ raise RuntimeError(msg) from e
141
+ project_db_session.commit()
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
@@ -0,0 +1,120 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ import esgvoc.core.constants
5
+ from esgvoc.core.data_handler import JsonLdResource
6
+ from esgvoc.core.service.data_merger import DataMerger
7
+ from sqlmodel import Session, select
8
+
9
+ import esgvoc.core.db.connection as db
10
+ from esgvoc.core.db.connection import read_json_file
11
+ from esgvoc.core.db.models.mixins import TermKind
12
+ from esgvoc.core.db.models.universe import DataDescriptor, UTerm, Universe
13
+ from esgvoc.core.db.models.universe import universe_create_db
14
+
15
+ _LOGGER = logging.getLogger(__name__)
16
+
17
+ def infer_term_kind(json_specs: dict) -> TermKind:
18
+ if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
19
+ return TermKind.PATTERN
20
+ elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
21
+ return TermKind.COMPOSITE
22
+ else:
23
+ return TermKind.PLAIN
24
+
25
+
26
+ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -> None:
27
+ try:
28
+ connection = db.DBConnection(universe_db_file_path)
29
+ except Exception as e:
30
+ msg = f'Unable to read universe SQLite file at {universe_db_file_path}. Abort.'
31
+ _LOGGER.fatal(msg)
32
+ raise IOError(msg) from e
33
+
34
+ for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
35
+ if data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists(): # TODO maybe put that in setting
36
+ try:
37
+ ingest_data_descriptor(data_descriptor_dir_path, connection)
38
+ except Exception as e:
39
+ msg = f'Unexpected error while processing data descriptor {data_descriptor_dir_path}. Abort.'
40
+ _LOGGER.fatal(msg)
41
+ raise RuntimeError(msg) from e
42
+
43
+ def ingest_metadata_universe(connection,git_hash):
44
+ with connection.create_session() as session:
45
+ universe = Universe(git_hash=git_hash)
46
+ session.add(universe)
47
+ session.commit()
48
+
49
+ def ingest_data_descriptor(data_descriptor_path: Path,
50
+ connection: db.DBConnection) -> None:
51
+
52
+
53
+ data_descriptor_id = data_descriptor_path.name
54
+
55
+ context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
56
+ try:
57
+ context = read_json_file(context_file_path)
58
+ except Exception as e:
59
+ msg = f'Unable to read the context file {context_file_path} of data descriptor \
60
+ {data_descriptor_id}. Skip.\n{str(e)}'
61
+ _LOGGER.warning(msg)
62
+ return
63
+
64
+
65
+ with connection.create_session() as session:
66
+ data_descriptor = DataDescriptor(id=data_descriptor_id,
67
+ context=context,
68
+ term_kind="") # we ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
69
+ term_kind_dd = None
70
+
71
+ _LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
72
+ for term_file_path in data_descriptor_path.iterdir():
73
+ _LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
74
+ if term_file_path.is_file() and term_file_path.suffix == ".json":
75
+ try:
76
+ json_specs=DataMerger(data=JsonLdResource(uri=str(term_file_path)),
77
+ locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/mip-cmor-tables"}).merge_linked_json()[-1]
78
+ term_kind = infer_term_kind(json_specs)
79
+ term_id = json_specs["id"]
80
+
81
+ if term_kind_dd is None:
82
+ term_kind_dd = term_kind
83
+
84
+ except Exception as e:
85
+ _LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor {data_descriptor_path}. Skip.\n{str(e)}')
86
+ continue
87
+ if term_id and json_specs and data_descriptor and term_kind:
88
+ _LOGGER.debug("adding {term_id}")
89
+ term = UTerm(
90
+ id=term_id,
91
+ specs=json_specs,
92
+ data_descriptor=data_descriptor,
93
+ kind=term_kind,
94
+ )
95
+ session.add(term)
96
+ if term_kind_dd is not None:
97
+ data_descriptor.term_kind = term_kind_dd
98
+ session.add(data_descriptor)
99
+ session.commit()
100
+
101
+ def get_universe_term(data_descriptor_id: str,
102
+ term_id: str,
103
+ universe_db_session: Session) -> tuple[TermKind, dict]:
104
+ statement = (
105
+ select(UTerm)
106
+ .join(DataDescriptor)
107
+ .where(DataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
108
+ )
109
+ results = universe_db_session.exec(statement)
110
+ term = results.one()
111
+ return term.kind, term.specs
112
+
113
+
114
+ if __name__ == "__main__":
115
+ #ingest_universe(db.UNIVERSE_DIR_PATH, db.UNIVERSE_DB_FILE_PATH)
116
+ import os
117
+ root_dir = Path(str(os.getcwd())).parent.parent
118
+ print(root_dir)
119
+ universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
120
+ ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"),root_dir / Path(".cache/dbs/universe.sqlite"))
@@ -0,0 +1,21 @@
1
+ [loggers]
2
+ keys=root
3
+
4
+ [logger_root]
5
+ level=ERROR
6
+ handlers=stdout
7
+
8
+ [handlers]
9
+ keys=stdout
10
+
11
+ [handler_stdout]
12
+ class=StreamHandler
13
+ level=NOTSET
14
+ formatter=stdout
15
+ args=(sys.stdout,)
16
+
17
+ [formatters]
18
+ keys=stdout
19
+
20
+ [formatter_stdout]
21
+ format=%(asctime)s [%(levelname)s] %(name)s: %(message)s
@@ -0,0 +1,4 @@
1
+ import logging.config
2
+ from pathlib import Path
3
+
4
+ logging.config.fileConfig(f"{Path(__file__).parent}/logging.conf")
@@ -0,0 +1,259 @@
1
+ import os
2
+ import subprocess
3
+ import requests
4
+ from pydantic import BaseModel, ValidationError
5
+ from typing import List, Optional
6
+ from contextlib import contextmanager
7
+ import logging
8
+ import sys
9
+
10
+ _LOGGER = logging.getLogger(__name__)
11
+
12
+ @contextmanager
13
+ def redirect_stdout_to_log(level=logging.INFO):
14
+ """
15
+ Redirect stdout to the global _LOGGER temporarily.
16
+ """
17
+ class StreamToLogger:
18
+ def __init__(self, log_level):
19
+ self.log_level = log_level
20
+
21
+ def write(self, message):
22
+ if message.strip(): # Avoid logging empty lines
23
+ _LOGGER.debug(self.log_level, message.strip())
24
+
25
+ def flush(self):
26
+ pass # No-op for compatibility
27
+
28
+ old_stdout = sys.stdout
29
+ old_stderr = sys.stderr
30
+ sys.stdout = StreamToLogger(level)
31
+ sys.stderr = StreamToLogger(level)
32
+ try:
33
+ yield
34
+ finally:
35
+ sys.stdout = old_stdout
36
+ sys.stderr = old_stderr
37
+
38
+
39
+ class GitHubRepository(BaseModel):
40
+ id: int
41
+ name: str
42
+ full_name: str
43
+ description: Optional[str]
44
+ html_url: str
45
+ stargazers_count: int
46
+ forks_count: int
47
+ language: Optional[str]
48
+ created_at: str
49
+ updated_at: str
50
+
51
+ class GitHubBranch(BaseModel):
52
+ name: str
53
+ commit: dict
54
+ protected: bool
55
+
56
+
57
+ class RepoFetcher:
58
+ """
59
+ DataFetcher is responsible for fetching data from external sources such as GitHub.
60
+ """
61
+
62
+ def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
63
+ self.base_url = base_url
64
+ self.repo_dir = local_path
65
+
66
+ def fetch_repositories(self, user: str) -> List[GitHubRepository]:
67
+ """
68
+ Fetch repositories of a given GitHub user.
69
+ :param user: GitHub username
70
+ :return: List of GitHubRepository objects
71
+ """
72
+ url = f"{self.base_url}/users/{user}/repos"
73
+ response = requests.get(url)
74
+
75
+ if response.status_code != 200:
76
+ raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
77
+
78
+ try:
79
+ data = response.json()
80
+ return [GitHubRepository(**repo) for repo in data]
81
+ except ValidationError as e:
82
+ raise Exception(f"Data validation error: {e}")
83
+
84
+ def fetch_repository_details(self, owner: str, repo: str) -> GitHubRepository:
85
+ """
86
+ Fetch details of a specific repository.
87
+ :param owner: Repository owner
88
+ :param repo: Repository name
89
+ :return: GitHubRepository object
90
+ """
91
+ url = f"{self.base_url}/repos/{owner}/{repo}"
92
+ response = requests.get(url)
93
+
94
+ if response.status_code != 200:
95
+ raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
96
+
97
+ try:
98
+ data = response.json()
99
+ return GitHubRepository(**data)
100
+ except ValidationError as e:
101
+ raise Exception(f"Data validation error: {e}")
102
+
103
+
104
+ def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
105
+ """
106
+ Fetch details of a specific branch in a repository.
107
+ :param owner: Repository owner
108
+ :param repo: Repository name
109
+ :param branch: Branch name
110
+ :return: GitHubBranch object
111
+ """
112
+ url = f"{self.base_url}/repos/{owner}/{repo}/branches/{branch}"
113
+ response = requests.get(url)
114
+
115
+ if response.status_code != 200:
116
+ raise Exception(f"Failed to fetch branch data: {response.status_code} - {response.text}")
117
+
118
+ try:
119
+ return GitHubBranch(**response.json())
120
+ except ValidationError as e:
121
+ raise Exception(f"Data validation error: {e}")
122
+
123
+ def list_directory(self,owner, repo, branch='main'):
124
+ """
125
+ List directories in the root of a GitHub repository.
126
+
127
+ :param owner: GitHub username or organization name.
128
+ :param repo: Repository name.
129
+ :param branch: Branch name (default: 'main').
130
+ :return: List of directories in the repository.
131
+ """
132
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/?ref={branch}"
133
+ response = requests.get(url)
134
+ response.raise_for_status() # Raise an error for bad responses
135
+ contents = response.json()
136
+ directories = [item['name'] for item in contents if item['type'] == 'dir']
137
+ return directories
138
+
139
+ def list_files(self,owner, repo, directory, branch='main'):
140
+ """
141
+ List files in a specific directory of a GitHub repository.
142
+
143
+ :param owner: GitHub username or organization name.
144
+ :param repo: Repository name.
145
+ :param directory: Target directory path within the repo.
146
+ :param branch: Branch name (default: 'main').
147
+ :return: List of files in the specified directory.
148
+ """
149
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{directory}?ref={branch}"
150
+ response = requests.get(url)
151
+ response.raise_for_status() # Raise an error for bad responses
152
+ contents = response.json()
153
+ files = [item['name'] for item in contents if item['type'] == 'file']
154
+ return files
155
+
156
+ def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None):
157
+ """
158
+ Clone a GitHub repository to a target directory.
159
+ :param owner: Repository owner
160
+ :param repo: Repository name
161
+ :param target_dir: The directory where the repository should be cloned.
162
+ :param branch: (Optional) The branch to clone. Clones the default branch if None.
163
+ """
164
+ repo_url = f"https://github.com/{owner}/{repo}.git"
165
+
166
+ command = ["git", "clone", repo_url, f"{self.repo_dir}/{repo}"]
167
+ if branch:
168
+ command.extend(["--branch", branch])
169
+ with redirect_stdout_to_log():
170
+
171
+ try:
172
+ subprocess.run(command, check=True)
173
+ _LOGGER.debug(f"Repository cloned successfully into {self.repo_dir}/{repo}")
174
+ except subprocess.CalledProcessError:
175
+ try:
176
+ current_work_dir = os.getcwd()
177
+ os.chdir(f"{self.repo_dir}/{repo}")
178
+ command = ["git", "pull"]
179
+ subprocess.run(command, check=True)
180
+ os.chdir(current_work_dir)
181
+
182
+
183
+ except Exception as e:
184
+ raise Exception(f"Failed to clone repository: {e}")
185
+
186
+ def get_github_version_with_api(self, owner: str, repo: str, branch: str ="main"):
187
+ """ Fetch the latest commit version (or any other versioning scheme) from GitHub. """
188
+ details = self.fetch_branch_details( owner, repo, branch)
189
+ return details.commit.get('sha')
190
+
191
+ def get_github_version(self, owner: str, repo: str, branch: str="main"):
192
+ """ Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch """
193
+ repo_url = f"https://github.com/{owner}/{repo}.git"
194
+ command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
195
+ if branch:
196
+ command.extend([branch])
197
+
198
+ # with redirect_stdout_to_log():
199
+ output=None
200
+ try:
201
+ result = subprocess.run(command, capture_output=True,
202
+ text=True,
203
+ check=True)
204
+ # Parse the output to get the commit hash
205
+ output = result.stdout.strip()
206
+ _LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
207
+ except Exception as e:
208
+
209
+ _LOGGER.debug("error in with git fetch " + repr(e))
210
+ if output is not None:
211
+ commit_hash = output.split()[0]
212
+ return commit_hash
213
+ return None
214
+
215
+ # return git_hash
216
+
217
+ def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
218
+ """ Check the version of the local repository by fetching the latest commit hash. """
219
+ # repo_path = os.path.join(self.repo_dir, repo)
220
+ if os.path.exists(repo_path):
221
+ #print("EXIST")
222
+ command = ["git", "-C", repo_path]
223
+ if branch:
224
+ command.extend(["switch", branch])
225
+ # Ensure we are on the correct branch
226
+ with redirect_stdout_to_log():
227
+ subprocess.run(command,
228
+ stdout=subprocess.PIPE, # Capture stdout
229
+ stderr=subprocess.PIPE, # Capture stderr
230
+ text=True) # Decode output as text
231
+ # Get the latest commit hash (SHA) from the local repository
232
+ commit_hash = subprocess.check_output(["git", "-C", repo_path, "rev-parse", "HEAD"],
233
+ stderr=subprocess.PIPE,
234
+ text=True).strip()
235
+ return commit_hash
236
+ return None
237
+
238
+ if __name__ == "__main__":
239
+ fetcher = RepoFetcher()
240
+
241
+ # Fetch repositories for a user
242
+ #repos = fetcher.fetch_repositories("ESPRI-Mod")
243
+ #for repo in repos:
244
+ # print(repo)
245
+
246
+ # Fetch a specific repository's details
247
+ #repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
248
+ #"print(repo_details)
249
+ #branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
250
+ #print(branch_details)
251
+
252
+ fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
253
+
254
+ #a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
255
+ #print(a)
256
+ #a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
257
+ #print(a)
258
+
259
+ fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
@@ -0,0 +1,8 @@
1
+ from esgvoc.core.service.settings import ServiceSettings
2
+ from esgvoc.core.service.state import StateService
3
+ from pathlib import Path
4
+
5
+ settings_path = Path(__file__).parent / "settings.toml"
6
+ service_settings = ServiceSettings.load_from_file(str(settings_path))
7
+ state_service = StateService(service_settings)
8
+