esgvoc 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -0
- esgvoc/api/__init__.py +62 -0
- esgvoc/api/_utils.py +39 -0
- esgvoc/api/data_descriptors/__init__.py +60 -0
- esgvoc/api/data_descriptors/activity.py +51 -0
- esgvoc/api/data_descriptors/consortium.py +66 -0
- esgvoc/api/data_descriptors/date.py +48 -0
- esgvoc/api/data_descriptors/experiment.py +60 -0
- esgvoc/api/data_descriptors/forcing_index.py +47 -0
- esgvoc/api/data_descriptors/frequency.py +45 -0
- esgvoc/api/data_descriptors/grid_label.py +46 -0
- esgvoc/api/data_descriptors/initialisation_index.py +46 -0
- esgvoc/api/data_descriptors/institution.py +58 -0
- esgvoc/api/data_descriptors/license.py +47 -0
- esgvoc/api/data_descriptors/mip_era.py +46 -0
- esgvoc/api/data_descriptors/model_component.py +47 -0
- esgvoc/api/data_descriptors/organisation.py +42 -0
- esgvoc/api/data_descriptors/physic_index.py +47 -0
- esgvoc/api/data_descriptors/product.py +45 -0
- esgvoc/api/data_descriptors/realisation_index.py +46 -0
- esgvoc/api/data_descriptors/realm.py +44 -0
- esgvoc/api/data_descriptors/resolution.py +46 -0
- esgvoc/api/data_descriptors/source.py +57 -0
- esgvoc/api/data_descriptors/source_type.py +43 -0
- esgvoc/api/data_descriptors/sub_experiment.py +43 -0
- esgvoc/api/data_descriptors/table.py +50 -0
- esgvoc/api/data_descriptors/time_range.py +28 -0
- esgvoc/api/data_descriptors/variable.py +77 -0
- esgvoc/api/data_descriptors/variant_label.py +49 -0
- esgvoc/api/projects.py +854 -0
- esgvoc/api/report.py +86 -0
- esgvoc/api/search.py +92 -0
- esgvoc/api/universe.py +218 -0
- esgvoc/apps/drs/__init__.py +16 -0
- esgvoc/apps/drs/models.py +43 -0
- esgvoc/apps/drs/parser.py +27 -0
- esgvoc/cli/config.py +79 -0
- esgvoc/cli/get.py +142 -0
- esgvoc/cli/install.py +14 -0
- esgvoc/cli/main.py +22 -0
- esgvoc/cli/status.py +26 -0
- esgvoc/cli/valid.py +156 -0
- esgvoc/core/constants.py +13 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +133 -0
- esgvoc/core/db/__init__.py +5 -0
- esgvoc/core/db/connection.py +31 -0
- esgvoc/core/db/models/mixins.py +18 -0
- esgvoc/core/db/models/project.py +65 -0
- esgvoc/core/db/models/universe.py +59 -0
- esgvoc/core/db/project_ingestion.py +152 -0
- esgvoc/core/db/universe_ingestion.py +120 -0
- esgvoc/core/logging.conf +21 -0
- esgvoc/core/logging_handler.py +4 -0
- esgvoc/core/repo_fetcher.py +259 -0
- esgvoc/core/service/__init__.py +8 -0
- esgvoc/core/service/data_merger.py +83 -0
- esgvoc/core/service/esg_voc.py +79 -0
- esgvoc/core/service/settings.py +64 -0
- esgvoc/core/service/settings.toml +12 -0
- esgvoc/core/service/settings_default.toml +20 -0
- esgvoc/core/service/state.py +222 -0
- esgvoc-0.1.2.dist-info/METADATA +54 -0
- esgvoc-0.1.2.dist-info/RECORD +66 -0
- esgvoc-0.1.2.dist-info/WHEEL +4 -0
- esgvoc-0.1.2.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy.dialects.sqlite import JSON
|
|
6
|
+
from sqlmodel import Column, Field, Relationship, SQLModel
|
|
7
|
+
|
|
8
|
+
import esgvoc.core.db.connection as db
|
|
9
|
+
from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
|
|
10
|
+
|
|
11
|
+
_LOGGER = logging.getLogger("universe_db_creation")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Universe(SQLModel, PkMixin, table=True):
|
|
15
|
+
__tablename__ = "universes"
|
|
16
|
+
git_hash: str
|
|
17
|
+
data_descriptors: list["DataDescriptor"] = Relationship(back_populates="universe")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
|
|
21
|
+
__tablename__ = "data_descriptors"
|
|
22
|
+
context: dict = Field(sa_column=sa.Column(JSON))
|
|
23
|
+
universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
|
|
24
|
+
universe: Universe = Relationship(back_populates="data_descriptors")
|
|
25
|
+
terms: list["UTerm"] = Relationship(back_populates="data_descriptor")
|
|
26
|
+
term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UTerm(SQLModel, PkMixin, IdMixin, table=True):
|
|
30
|
+
__tablename__ = "uterms"
|
|
31
|
+
specs: dict = Field(sa_column=sa.Column(JSON))
|
|
32
|
+
kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
|
|
33
|
+
data_descriptor_pk: int | None = Field(
|
|
34
|
+
default=None, foreign_key="data_descriptors.pk"
|
|
35
|
+
)
|
|
36
|
+
data_descriptor: DataDescriptor = Relationship(back_populates="terms")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def universe_create_db(db_file_path: Path) -> None:
|
|
40
|
+
try:
|
|
41
|
+
connection = db.DBConnection(db_file_path)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
msg = f'Unable to create SQLite file at {db_file_path}. Abort.'
|
|
44
|
+
_LOGGER.fatal(msg)
|
|
45
|
+
raise RuntimeError(msg) from e
|
|
46
|
+
try:
|
|
47
|
+
# Avoid creating project tables.
|
|
48
|
+
tables_to_be_created = [SQLModel.metadata.tables['uterms'],
|
|
49
|
+
SQLModel.metadata.tables['data_descriptors'],
|
|
50
|
+
SQLModel.metadata.tables['universes']]
|
|
51
|
+
SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
msg = f'Unable to create tables in SQLite database at {db_file_path}. Abort.'
|
|
54
|
+
_LOGGER.fatal(msg)
|
|
55
|
+
raise RuntimeError(msg) from e
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
pass
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import esgvoc.core.constants
|
|
5
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
+
from esgvoc.core.db.connection import DBConnection
|
|
7
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
8
|
+
from esgvoc.core.db.models.mixins import TermKind
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
import esgvoc.core.db.connection as db
|
|
12
|
+
from esgvoc.core.db.connection import read_json_file
|
|
13
|
+
from esgvoc.core.db.models.project import Collection, Project, PTerm
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_LOGGER = logging.getLogger("project_ingestion")
|
|
17
|
+
|
|
18
|
+
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
19
|
+
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
20
|
+
return TermKind.PATTERN
|
|
21
|
+
elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
|
|
22
|
+
return TermKind.COMPOSITE
|
|
23
|
+
else:
|
|
24
|
+
return TermKind.PLAIN
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ingest_metadata_project(connection:DBConnection,git_hash):
|
|
28
|
+
with connection.create_session() as session:
|
|
29
|
+
project = Project(id=str(connection.file_path.stem), git_hash=git_hash,specs={})
|
|
30
|
+
session.add(project)
|
|
31
|
+
session.commit()
|
|
32
|
+
|
|
33
|
+
###############################
|
|
34
|
+
def get_data_descriptor_id_from_context(collection_context: dict) -> str:
|
|
35
|
+
data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY]
|
|
36
|
+
return Path(data_descriptor_url).name
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def instantiate_project_term(universe_term_json_specs: dict,
|
|
40
|
+
project_term_json_specs_update: dict,
|
|
41
|
+
pydantic_class: type[BaseModel]) -> dict:
|
|
42
|
+
term_from_universe = pydantic_class(**universe_term_json_specs)
|
|
43
|
+
updated_term = term_from_universe.model_copy(
|
|
44
|
+
update=project_term_json_specs_update, deep=True
|
|
45
|
+
)
|
|
46
|
+
return updated_term.model_dump()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ingest_collection(collection_dir_path: Path,
|
|
50
|
+
project: Project,
|
|
51
|
+
project_db_session) -> None:
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
collection_id = collection_dir_path.name
|
|
55
|
+
collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
56
|
+
try:
|
|
57
|
+
collection_context = read_json_file(collection_context_file_path)
|
|
58
|
+
data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
msg = f'Unable to read project context file {collection_context_file_path}. Abort.'
|
|
61
|
+
_LOGGER.fatal(msg)
|
|
62
|
+
raise RuntimeError(msg) from e
|
|
63
|
+
# [KEEP]
|
|
64
|
+
collection = Collection(
|
|
65
|
+
id=collection_id,
|
|
66
|
+
context=collection_context,
|
|
67
|
+
project=project,
|
|
68
|
+
data_descriptor_id=data_descriptor_id,
|
|
69
|
+
term_kind="") # we ll know it only when we ll add a term (hypothesis all term have the same kind in a collection
|
|
70
|
+
term_kind_collection = None
|
|
71
|
+
|
|
72
|
+
for term_file_path in collection_dir_path.iterdir():
|
|
73
|
+
_LOGGER.debug(f"found term path : {term_file_path}")
|
|
74
|
+
if term_file_path.is_file() and term_file_path.suffix==".json":
|
|
75
|
+
try:
|
|
76
|
+
json_specs = DataMerger(data=JsonLdResource(uri =str(term_file_path)),
|
|
77
|
+
locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/mip-cmor-tables"}).merge_linked_json()[-1]
|
|
78
|
+
term_kind = infer_term_kind(json_specs)
|
|
79
|
+
term_id = json_specs["id"]
|
|
80
|
+
|
|
81
|
+
if term_kind_collection is None:
|
|
82
|
+
term_kind_collection = term_kind
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
_LOGGER.warning(f'Unable to read term {term_file_path}. Skip.\n{str(e)}')
|
|
86
|
+
continue
|
|
87
|
+
try:
|
|
88
|
+
term = PTerm(
|
|
89
|
+
id=term_id,
|
|
90
|
+
specs=json_specs,
|
|
91
|
+
collection=collection,
|
|
92
|
+
kind=term_kind,
|
|
93
|
+
)
|
|
94
|
+
project_db_session.add(term)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
_LOGGER.error(
|
|
97
|
+
f"fail to find term {term_id} in data descriptor {data_descriptor_id} "
|
|
98
|
+
+ f"for the collection {collection_id} of the project {project.id}. Skip {term_id}.\n{str(e)}"
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
if term_kind_collection:
|
|
102
|
+
collection.term_kind = term_kind_collection
|
|
103
|
+
project_db_session.add(collection)
|
|
104
|
+
|
|
105
|
+
def ingest_project(project_dir_path: Path,
|
|
106
|
+
project_db_file_path: Path,
|
|
107
|
+
git_hash : str
|
|
108
|
+
):
|
|
109
|
+
try:
|
|
110
|
+
project_connection = db.DBConnection(project_db_file_path)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
msg = f'Unable to read project SQLite file at {project_db_file_path}. Abort.'
|
|
113
|
+
_LOGGER.fatal(msg)
|
|
114
|
+
raise RuntimeError(msg) from e
|
|
115
|
+
|
|
116
|
+
with project_connection.create_session() as project_db_session:
|
|
117
|
+
try:
|
|
118
|
+
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
119
|
+
project_json_specs = read_json_file(project_specs_file_path)
|
|
120
|
+
project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
|
|
121
|
+
except Exception as e:
|
|
122
|
+
msg = f'Unable to read project specs file {project_specs_file_path}. Abort.'
|
|
123
|
+
_LOGGER.fatal(msg)
|
|
124
|
+
raise RuntimeError(msg) from e
|
|
125
|
+
|
|
126
|
+
project = Project(id=project_id, specs=project_json_specs,git_hash=git_hash)
|
|
127
|
+
project_db_session.add(project)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
for collection_dir_path in project_dir_path.iterdir():
|
|
131
|
+
if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists(): #TODO maybe put that in settings
|
|
132
|
+
_LOGGER.debug(f"found collection dir : {collection_dir_path}")
|
|
133
|
+
try:
|
|
134
|
+
ingest_collection(collection_dir_path,
|
|
135
|
+
project,
|
|
136
|
+
project_db_session)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
msg = f'Unexpected error while ingesting collection {collection_dir_path}. Abort.'
|
|
139
|
+
_LOGGER.fatal(msg)
|
|
140
|
+
raise RuntimeError(msg) from e
|
|
141
|
+
project_db_session.commit()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import esgvoc.core.constants
|
|
5
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
7
|
+
from sqlmodel import Session, select
|
|
8
|
+
|
|
9
|
+
import esgvoc.core.db.connection as db
|
|
10
|
+
from esgvoc.core.db.connection import read_json_file
|
|
11
|
+
from esgvoc.core.db.models.mixins import TermKind
|
|
12
|
+
from esgvoc.core.db.models.universe import DataDescriptor, UTerm, Universe
|
|
13
|
+
from esgvoc.core.db.models.universe import universe_create_db
|
|
14
|
+
|
|
15
|
+
_LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
18
|
+
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
19
|
+
return TermKind.PATTERN
|
|
20
|
+
elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
|
|
21
|
+
return TermKind.COMPOSITE
|
|
22
|
+
else:
|
|
23
|
+
return TermKind.PLAIN
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -> None:
|
|
27
|
+
try:
|
|
28
|
+
connection = db.DBConnection(universe_db_file_path)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
msg = f'Unable to read universe SQLite file at {universe_db_file_path}. Abort.'
|
|
31
|
+
_LOGGER.fatal(msg)
|
|
32
|
+
raise IOError(msg) from e
|
|
33
|
+
|
|
34
|
+
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
35
|
+
if data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists(): # TODO maybe put that in setting
|
|
36
|
+
try:
|
|
37
|
+
ingest_data_descriptor(data_descriptor_dir_path, connection)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
msg = f'Unexpected error while processing data descriptor {data_descriptor_dir_path}. Abort.'
|
|
40
|
+
_LOGGER.fatal(msg)
|
|
41
|
+
raise RuntimeError(msg) from e
|
|
42
|
+
|
|
43
|
+
def ingest_metadata_universe(connection,git_hash):
|
|
44
|
+
with connection.create_session() as session:
|
|
45
|
+
universe = Universe(git_hash=git_hash)
|
|
46
|
+
session.add(universe)
|
|
47
|
+
session.commit()
|
|
48
|
+
|
|
49
|
+
def ingest_data_descriptor(data_descriptor_path: Path,
|
|
50
|
+
connection: db.DBConnection) -> None:
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
data_descriptor_id = data_descriptor_path.name
|
|
54
|
+
|
|
55
|
+
context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
56
|
+
try:
|
|
57
|
+
context = read_json_file(context_file_path)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
msg = f'Unable to read the context file {context_file_path} of data descriptor \
|
|
60
|
+
{data_descriptor_id}. Skip.\n{str(e)}'
|
|
61
|
+
_LOGGER.warning(msg)
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
with connection.create_session() as session:
|
|
66
|
+
data_descriptor = DataDescriptor(id=data_descriptor_id,
|
|
67
|
+
context=context,
|
|
68
|
+
term_kind="") # we ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
|
|
69
|
+
term_kind_dd = None
|
|
70
|
+
|
|
71
|
+
_LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
|
|
72
|
+
for term_file_path in data_descriptor_path.iterdir():
|
|
73
|
+
_LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
|
|
74
|
+
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
75
|
+
try:
|
|
76
|
+
json_specs=DataMerger(data=JsonLdResource(uri=str(term_file_path)),
|
|
77
|
+
locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/mip-cmor-tables"}).merge_linked_json()[-1]
|
|
78
|
+
term_kind = infer_term_kind(json_specs)
|
|
79
|
+
term_id = json_specs["id"]
|
|
80
|
+
|
|
81
|
+
if term_kind_dd is None:
|
|
82
|
+
term_kind_dd = term_kind
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
_LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor {data_descriptor_path}. Skip.\n{str(e)}')
|
|
86
|
+
continue
|
|
87
|
+
if term_id and json_specs and data_descriptor and term_kind:
|
|
88
|
+
_LOGGER.debug("adding {term_id}")
|
|
89
|
+
term = UTerm(
|
|
90
|
+
id=term_id,
|
|
91
|
+
specs=json_specs,
|
|
92
|
+
data_descriptor=data_descriptor,
|
|
93
|
+
kind=term_kind,
|
|
94
|
+
)
|
|
95
|
+
session.add(term)
|
|
96
|
+
if term_kind_dd is not None:
|
|
97
|
+
data_descriptor.term_kind = term_kind_dd
|
|
98
|
+
session.add(data_descriptor)
|
|
99
|
+
session.commit()
|
|
100
|
+
|
|
101
|
+
def get_universe_term(data_descriptor_id: str,
|
|
102
|
+
term_id: str,
|
|
103
|
+
universe_db_session: Session) -> tuple[TermKind, dict]:
|
|
104
|
+
statement = (
|
|
105
|
+
select(UTerm)
|
|
106
|
+
.join(DataDescriptor)
|
|
107
|
+
.where(DataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
108
|
+
)
|
|
109
|
+
results = universe_db_session.exec(statement)
|
|
110
|
+
term = results.one()
|
|
111
|
+
return term.kind, term.specs
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
#ingest_universe(db.UNIVERSE_DIR_PATH, db.UNIVERSE_DB_FILE_PATH)
|
|
116
|
+
import os
|
|
117
|
+
root_dir = Path(str(os.getcwd())).parent.parent
|
|
118
|
+
print(root_dir)
|
|
119
|
+
universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
|
|
120
|
+
ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"),root_dir / Path(".cache/dbs/universe.sqlite"))
|
esgvoc/core/logging.conf
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[loggers]
|
|
2
|
+
keys=root
|
|
3
|
+
|
|
4
|
+
[logger_root]
|
|
5
|
+
level=ERROR
|
|
6
|
+
handlers=stdout
|
|
7
|
+
|
|
8
|
+
[handlers]
|
|
9
|
+
keys=stdout
|
|
10
|
+
|
|
11
|
+
[handler_stdout]
|
|
12
|
+
class=StreamHandler
|
|
13
|
+
level=NOTSET
|
|
14
|
+
formatter=stdout
|
|
15
|
+
args=(sys.stdout,)
|
|
16
|
+
|
|
17
|
+
[formatters]
|
|
18
|
+
keys=stdout
|
|
19
|
+
|
|
20
|
+
[formatter_stdout]
|
|
21
|
+
format=%(asctime)s [%(levelname)s] %(name)s: %(message)s
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import requests
|
|
4
|
+
from pydantic import BaseModel, ValidationError
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
_LOGGER = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
@contextmanager
|
|
13
|
+
def redirect_stdout_to_log(level=logging.INFO):
|
|
14
|
+
"""
|
|
15
|
+
Redirect stdout to the global _LOGGER temporarily.
|
|
16
|
+
"""
|
|
17
|
+
class StreamToLogger:
|
|
18
|
+
def __init__(self, log_level):
|
|
19
|
+
self.log_level = log_level
|
|
20
|
+
|
|
21
|
+
def write(self, message):
|
|
22
|
+
if message.strip(): # Avoid logging empty lines
|
|
23
|
+
_LOGGER.debug(self.log_level, message.strip())
|
|
24
|
+
|
|
25
|
+
def flush(self):
|
|
26
|
+
pass # No-op for compatibility
|
|
27
|
+
|
|
28
|
+
old_stdout = sys.stdout
|
|
29
|
+
old_stderr = sys.stderr
|
|
30
|
+
sys.stdout = StreamToLogger(level)
|
|
31
|
+
sys.stderr = StreamToLogger(level)
|
|
32
|
+
try:
|
|
33
|
+
yield
|
|
34
|
+
finally:
|
|
35
|
+
sys.stdout = old_stdout
|
|
36
|
+
sys.stderr = old_stderr
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GitHubRepository(BaseModel):
|
|
40
|
+
id: int
|
|
41
|
+
name: str
|
|
42
|
+
full_name: str
|
|
43
|
+
description: Optional[str]
|
|
44
|
+
html_url: str
|
|
45
|
+
stargazers_count: int
|
|
46
|
+
forks_count: int
|
|
47
|
+
language: Optional[str]
|
|
48
|
+
created_at: str
|
|
49
|
+
updated_at: str
|
|
50
|
+
|
|
51
|
+
class GitHubBranch(BaseModel):
|
|
52
|
+
name: str
|
|
53
|
+
commit: dict
|
|
54
|
+
protected: bool
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RepoFetcher:
|
|
58
|
+
"""
|
|
59
|
+
DataFetcher is responsible for fetching data from external sources such as GitHub.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
|
|
63
|
+
self.base_url = base_url
|
|
64
|
+
self.repo_dir = local_path
|
|
65
|
+
|
|
66
|
+
def fetch_repositories(self, user: str) -> List[GitHubRepository]:
|
|
67
|
+
"""
|
|
68
|
+
Fetch repositories of a given GitHub user.
|
|
69
|
+
:param user: GitHub username
|
|
70
|
+
:return: List of GitHubRepository objects
|
|
71
|
+
"""
|
|
72
|
+
url = f"{self.base_url}/users/{user}/repos"
|
|
73
|
+
response = requests.get(url)
|
|
74
|
+
|
|
75
|
+
if response.status_code != 200:
|
|
76
|
+
raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
data = response.json()
|
|
80
|
+
return [GitHubRepository(**repo) for repo in data]
|
|
81
|
+
except ValidationError as e:
|
|
82
|
+
raise Exception(f"Data validation error: {e}")
|
|
83
|
+
|
|
84
|
+
def fetch_repository_details(self, owner: str, repo: str) -> GitHubRepository:
|
|
85
|
+
"""
|
|
86
|
+
Fetch details of a specific repository.
|
|
87
|
+
:param owner: Repository owner
|
|
88
|
+
:param repo: Repository name
|
|
89
|
+
:return: GitHubRepository object
|
|
90
|
+
"""
|
|
91
|
+
url = f"{self.base_url}/repos/{owner}/{repo}"
|
|
92
|
+
response = requests.get(url)
|
|
93
|
+
|
|
94
|
+
if response.status_code != 200:
|
|
95
|
+
raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
data = response.json()
|
|
99
|
+
return GitHubRepository(**data)
|
|
100
|
+
except ValidationError as e:
|
|
101
|
+
raise Exception(f"Data validation error: {e}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
|
|
105
|
+
"""
|
|
106
|
+
Fetch details of a specific branch in a repository.
|
|
107
|
+
:param owner: Repository owner
|
|
108
|
+
:param repo: Repository name
|
|
109
|
+
:param branch: Branch name
|
|
110
|
+
:return: GitHubBranch object
|
|
111
|
+
"""
|
|
112
|
+
url = f"{self.base_url}/repos/{owner}/{repo}/branches/{branch}"
|
|
113
|
+
response = requests.get(url)
|
|
114
|
+
|
|
115
|
+
if response.status_code != 200:
|
|
116
|
+
raise Exception(f"Failed to fetch branch data: {response.status_code} - {response.text}")
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
return GitHubBranch(**response.json())
|
|
120
|
+
except ValidationError as e:
|
|
121
|
+
raise Exception(f"Data validation error: {e}")
|
|
122
|
+
|
|
123
|
+
def list_directory(self,owner, repo, branch='main'):
|
|
124
|
+
"""
|
|
125
|
+
List directories in the root of a GitHub repository.
|
|
126
|
+
|
|
127
|
+
:param owner: GitHub username or organization name.
|
|
128
|
+
:param repo: Repository name.
|
|
129
|
+
:param branch: Branch name (default: 'main').
|
|
130
|
+
:return: List of directories in the repository.
|
|
131
|
+
"""
|
|
132
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/?ref={branch}"
|
|
133
|
+
response = requests.get(url)
|
|
134
|
+
response.raise_for_status() # Raise an error for bad responses
|
|
135
|
+
contents = response.json()
|
|
136
|
+
directories = [item['name'] for item in contents if item['type'] == 'dir']
|
|
137
|
+
return directories
|
|
138
|
+
|
|
139
|
+
def list_files(self,owner, repo, directory, branch='main'):
|
|
140
|
+
"""
|
|
141
|
+
List files in a specific directory of a GitHub repository.
|
|
142
|
+
|
|
143
|
+
:param owner: GitHub username or organization name.
|
|
144
|
+
:param repo: Repository name.
|
|
145
|
+
:param directory: Target directory path within the repo.
|
|
146
|
+
:param branch: Branch name (default: 'main').
|
|
147
|
+
:return: List of files in the specified directory.
|
|
148
|
+
"""
|
|
149
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{directory}?ref={branch}"
|
|
150
|
+
response = requests.get(url)
|
|
151
|
+
response.raise_for_status() # Raise an error for bad responses
|
|
152
|
+
contents = response.json()
|
|
153
|
+
files = [item['name'] for item in contents if item['type'] == 'file']
|
|
154
|
+
return files
|
|
155
|
+
|
|
156
|
+
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None):
|
|
157
|
+
"""
|
|
158
|
+
Clone a GitHub repository to a target directory.
|
|
159
|
+
:param owner: Repository owner
|
|
160
|
+
:param repo: Repository name
|
|
161
|
+
:param target_dir: The directory where the repository should be cloned.
|
|
162
|
+
:param branch: (Optional) The branch to clone. Clones the default branch if None.
|
|
163
|
+
"""
|
|
164
|
+
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
165
|
+
|
|
166
|
+
command = ["git", "clone", repo_url, f"{self.repo_dir}/{repo}"]
|
|
167
|
+
if branch:
|
|
168
|
+
command.extend(["--branch", branch])
|
|
169
|
+
with redirect_stdout_to_log():
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
subprocess.run(command, check=True)
|
|
173
|
+
_LOGGER.debug(f"Repository cloned successfully into {self.repo_dir}/{repo}")
|
|
174
|
+
except subprocess.CalledProcessError:
|
|
175
|
+
try:
|
|
176
|
+
current_work_dir = os.getcwd()
|
|
177
|
+
os.chdir(f"{self.repo_dir}/{repo}")
|
|
178
|
+
command = ["git", "pull"]
|
|
179
|
+
subprocess.run(command, check=True)
|
|
180
|
+
os.chdir(current_work_dir)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
raise Exception(f"Failed to clone repository: {e}")
|
|
185
|
+
|
|
186
|
+
def get_github_version_with_api(self, owner: str, repo: str, branch: str ="main"):
|
|
187
|
+
""" Fetch the latest commit version (or any other versioning scheme) from GitHub. """
|
|
188
|
+
details = self.fetch_branch_details( owner, repo, branch)
|
|
189
|
+
return details.commit.get('sha')
|
|
190
|
+
|
|
191
|
+
def get_github_version(self, owner: str, repo: str, branch: str="main"):
|
|
192
|
+
""" Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch """
|
|
193
|
+
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
194
|
+
command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
|
|
195
|
+
if branch:
|
|
196
|
+
command.extend([branch])
|
|
197
|
+
|
|
198
|
+
# with redirect_stdout_to_log():
|
|
199
|
+
output=None
|
|
200
|
+
try:
|
|
201
|
+
result = subprocess.run(command, capture_output=True,
|
|
202
|
+
text=True,
|
|
203
|
+
check=True)
|
|
204
|
+
# Parse the output to get the commit hash
|
|
205
|
+
output = result.stdout.strip()
|
|
206
|
+
_LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
|
|
207
|
+
except Exception as e:
|
|
208
|
+
|
|
209
|
+
_LOGGER.debug("error in with git fetch " + repr(e))
|
|
210
|
+
if output is not None:
|
|
211
|
+
commit_hash = output.split()[0]
|
|
212
|
+
return commit_hash
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
# return git_hash
|
|
216
|
+
|
|
217
|
+
def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
|
|
218
|
+
""" Check the version of the local repository by fetching the latest commit hash. """
|
|
219
|
+
# repo_path = os.path.join(self.repo_dir, repo)
|
|
220
|
+
if os.path.exists(repo_path):
|
|
221
|
+
#print("EXIST")
|
|
222
|
+
command = ["git", "-C", repo_path]
|
|
223
|
+
if branch:
|
|
224
|
+
command.extend(["switch", branch])
|
|
225
|
+
# Ensure we are on the correct branch
|
|
226
|
+
with redirect_stdout_to_log():
|
|
227
|
+
subprocess.run(command,
|
|
228
|
+
stdout=subprocess.PIPE, # Capture stdout
|
|
229
|
+
stderr=subprocess.PIPE, # Capture stderr
|
|
230
|
+
text=True) # Decode output as text
|
|
231
|
+
# Get the latest commit hash (SHA) from the local repository
|
|
232
|
+
commit_hash = subprocess.check_output(["git", "-C", repo_path, "rev-parse", "HEAD"],
|
|
233
|
+
stderr=subprocess.PIPE,
|
|
234
|
+
text=True).strip()
|
|
235
|
+
return commit_hash
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
if __name__ == "__main__":
|
|
239
|
+
fetcher = RepoFetcher()
|
|
240
|
+
|
|
241
|
+
# Fetch repositories for a user
|
|
242
|
+
#repos = fetcher.fetch_repositories("ESPRI-Mod")
|
|
243
|
+
#for repo in repos:
|
|
244
|
+
# print(repo)
|
|
245
|
+
|
|
246
|
+
# Fetch a specific repository's details
|
|
247
|
+
#repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
|
|
248
|
+
#"print(repo_details)
|
|
249
|
+
#branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
250
|
+
#print(branch_details)
|
|
251
|
+
|
|
252
|
+
fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
|
|
253
|
+
|
|
254
|
+
#a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
|
|
255
|
+
#print(a)
|
|
256
|
+
#a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
|
|
257
|
+
#print(a)
|
|
258
|
+
|
|
259
|
+
fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from esgvoc.core.service.settings import ServiceSettings
|
|
2
|
+
from esgvoc.core.service.state import StateService
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
settings_path = Path(__file__).parent / "settings.toml"
|
|
6
|
+
service_settings = ServiceSettings.load_from_file(str(settings_path))
|
|
7
|
+
state_service = StateService(service_settings)
|
|
8
|
+
|