esgvoc 0.4.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (74) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +52 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/member_id.py +9 -0
  26. esgvoc/api/data_descriptors/mip_era.py +1 -1
  27. esgvoc/api/data_descriptors/model_component.py +1 -1
  28. esgvoc/api/data_descriptors/obs_type.py +5 -0
  29. esgvoc/api/data_descriptors/organisation.py +1 -1
  30. esgvoc/api/data_descriptors/physic_index.py +1 -1
  31. esgvoc/api/data_descriptors/product.py +2 -2
  32. esgvoc/api/data_descriptors/publication_status.py +5 -0
  33. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  34. esgvoc/api/data_descriptors/realm.py +1 -1
  35. esgvoc/api/data_descriptors/region.py +5 -0
  36. esgvoc/api/data_descriptors/resolution.py +3 -3
  37. esgvoc/api/data_descriptors/source.py +9 -5
  38. esgvoc/api/data_descriptors/source_type.py +1 -1
  39. esgvoc/api/data_descriptors/table.py +3 -2
  40. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  41. esgvoc/api/data_descriptors/time_range.py +4 -3
  42. esgvoc/api/data_descriptors/title.py +5 -0
  43. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  44. esgvoc/api/data_descriptors/variable.py +25 -12
  45. esgvoc/api/data_descriptors/variant_label.py +3 -3
  46. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  47. esgvoc/api/project_specs.py +117 -2
  48. esgvoc/api/projects.py +328 -287
  49. esgvoc/api/search.py +30 -3
  50. esgvoc/api/universe.py +42 -27
  51. esgvoc/apps/drs/generator.py +87 -74
  52. esgvoc/apps/jsg/cmip6_template.json +74 -0
  53. esgvoc/apps/jsg/json_schema_generator.py +194 -0
  54. esgvoc/cli/config.py +500 -0
  55. esgvoc/cli/find.py +138 -0
  56. esgvoc/cli/get.py +43 -38
  57. esgvoc/cli/main.py +10 -3
  58. esgvoc/cli/status.py +27 -18
  59. esgvoc/cli/valid.py +10 -15
  60. esgvoc/core/db/models/project.py +11 -11
  61. esgvoc/core/db/models/universe.py +3 -3
  62. esgvoc/core/db/project_ingestion.py +40 -40
  63. esgvoc/core/db/universe_ingestion.py +36 -33
  64. esgvoc/core/logging_handler.py +24 -2
  65. esgvoc/core/repo_fetcher.py +61 -59
  66. esgvoc/core/service/data_merger.py +47 -34
  67. esgvoc/core/service/state.py +107 -83
  68. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/METADATA +5 -20
  69. esgvoc-1.0.1.dist-info/RECORD +95 -0
  70. esgvoc/core/logging.conf +0 -21
  71. esgvoc-0.4.0.dist-info/RECORD +0 -80
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/WHEEL +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/entry_points.txt +0 -0
  74. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -10,11 +10,11 @@ import esgvoc.core.service as service
10
10
  from esgvoc.core.data_handler import JsonLdResource
11
11
  from esgvoc.core.db.connection import DBConnection, read_json_file
12
12
  from esgvoc.core.db.models.mixins import TermKind
13
- from esgvoc.core.db.models.project import Collection, Project, PTerm
13
+ from esgvoc.core.db.models.project import PCollection, Project, PTerm
14
14
  from esgvoc.core.exceptions import EsgvocDbError
15
15
  from esgvoc.core.service.data_merger import DataMerger
16
16
 
17
- _LOGGER = logging.getLogger("project_ingestion")
17
+ _LOGGER = logging.getLogger(__name__)
18
18
 
19
19
 
20
20
  def infer_term_kind(json_specs: dict) -> TermKind:
@@ -34,52 +34,53 @@ def ingest_metadata_project(connection: DBConnection, git_hash):
34
34
 
35
35
 
36
36
  def get_data_descriptor_id_from_context(collection_context: dict) -> str:
37
- data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY]\
38
- [esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY] # noqa E211
37
+ data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][
38
+ esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY
39
+ ] # noqa E211
39
40
  return Path(data_descriptor_url).name
40
41
 
41
42
 
42
- def instantiate_project_term(universe_term_json_specs: dict,
43
- project_term_json_specs_update: dict,
44
- pydantic_class: type[BaseModel]) -> dict:
43
+ def instantiate_project_term(
44
+ universe_term_json_specs: dict, project_term_json_specs_update: dict, pydantic_class: type[BaseModel]
45
+ ) -> dict:
45
46
  term_from_universe = pydantic_class(**universe_term_json_specs)
46
- updated_term = term_from_universe.model_copy(
47
- update=project_term_json_specs_update, deep=True
48
- )
47
+ updated_term = term_from_universe.model_copy(update=project_term_json_specs_update, deep=True)
49
48
  return updated_term.model_dump()
50
49
 
51
50
 
52
- def ingest_collection(collection_dir_path: Path,
53
- project: Project,
54
- project_db_session) -> None:
51
+ def ingest_collection(collection_dir_path: Path, project: Project, project_db_session) -> None:
55
52
  collection_id = collection_dir_path.name
56
53
  collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
57
54
  try:
58
55
  collection_context = read_json_file(collection_context_file_path)
59
56
  data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
60
57
  except Exception as e:
61
- msg = f'unable to read project context file {collection_context_file_path}'
58
+ msg = f"unable to read project context file {collection_context_file_path}"
62
59
  _LOGGER.fatal(msg)
63
60
  raise EsgvocDbError(msg) from e
64
61
  # [KEEP]
65
- collection = Collection(
62
+ collection = PCollection(
66
63
  id=collection_id,
67
64
  context=collection_context,
68
65
  project=project,
69
66
  data_descriptor_id=data_descriptor_id,
70
- term_kind="") # We ll know it only when we ll add a term
71
- # (hypothesis all term have the same kind in a collection) # noqa E116
67
+ term_kind="",
68
+ ) # We ll know it only when we ll add a term
69
+ # (hypothesis all term have the same kind in a collection) # noqa E116
72
70
  term_kind_collection = None
73
71
 
74
72
  for term_file_path in collection_dir_path.iterdir():
75
73
  _LOGGER.debug(f"found term path : {term_file_path}")
76
74
  if term_file_path.is_file() and term_file_path.suffix == ".json":
77
75
  try:
78
- locally_avail = {"https://espri-mod.github.io/mip-cmor-tables":
79
- service.current_state.universe.local_path}
80
- json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
81
- # locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
82
- locally_available=locally_avail).merge_linked_json()[-1]
76
+ locally_avail = {
77
+ "https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
78
+ }
79
+ json_specs = DataMerger(
80
+ data=JsonLdResource(uri=str(term_file_path)),
81
+ # locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
82
+ locally_available=locally_avail,
83
+ ).merge_linked_json()[-1]
83
84
  term_kind = infer_term_kind(json_specs)
84
85
  term_id = json_specs["id"]
85
86
 
@@ -87,7 +88,7 @@ def ingest_collection(collection_dir_path: Path,
87
88
  term_kind_collection = term_kind
88
89
 
89
90
  except Exception as e:
90
- _LOGGER.warning(f'Unable to read term {term_file_path}. Skip.\n{str(e)}')
91
+ _LOGGER.warning(f"Unable to read term {term_file_path}. Skip.\n{str(e)}")
91
92
  continue
92
93
  try:
93
94
  term = PTerm(
@@ -108,14 +109,11 @@ def ingest_collection(collection_dir_path: Path,
108
109
  project_db_session.add(collection)
109
110
 
110
111
 
111
- def ingest_project(project_dir_path: Path,
112
- project_db_file_path: Path,
113
- git_hash: str
114
- ):
112
+ def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
115
113
  try:
116
114
  project_connection = db.DBConnection(project_db_file_path)
117
115
  except Exception as e:
118
- msg = f'unable to read project SQLite file at {project_db_file_path}'
116
+ msg = f"unable to read project SQLite file at {project_db_file_path}"
119
117
  _LOGGER.fatal(msg)
120
118
  raise EsgvocDbError(msg) from e
121
119
 
@@ -125,7 +123,7 @@ def ingest_project(project_dir_path: Path,
125
123
  project_json_specs = read_json_file(project_specs_file_path)
126
124
  project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
127
125
  except Exception as e:
128
- msg = f'unable to read project specs file {project_specs_file_path}'
126
+ msg = f"unable to read project specs file {project_specs_file_path}"
129
127
  _LOGGER.fatal(msg)
130
128
  raise EsgvocDbError(msg) from e
131
129
 
@@ -137,11 +135,9 @@ def ingest_project(project_dir_path: Path,
137
135
  if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists():
138
136
  _LOGGER.debug(f"found collection dir : {collection_dir_path}")
139
137
  try:
140
- ingest_collection(collection_dir_path,
141
- project,
142
- project_db_session)
138
+ ingest_collection(collection_dir_path, project, project_db_session)
143
139
  except Exception as e:
144
- msg = f'unexpected error while ingesting collection {collection_dir_path}'
140
+ msg = f"unexpected error while ingesting collection {collection_dir_path}"
145
141
  _LOGGER.fatal(msg)
146
142
  raise EsgvocDbError(msg) from e
147
143
  project_db_session.commit()
@@ -149,21 +145,25 @@ def ingest_project(project_dir_path: Path,
149
145
  # Well, the following instructions are not data duplication. It is more building an index.
150
146
  # Read: https://sqlite.org/fts5.html
151
147
  try:
152
- sql_query = 'INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) ' + \
153
- 'SELECT pk, id, specs, kind, collection_pk FROM pterms;' # noqa: S608
148
+ sql_query = (
149
+ "INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) " # noqa: S608
150
+ + "SELECT pk, id, specs, kind, collection_pk FROM pterms;"
151
+ )
154
152
  project_db_session.exec(text(sql_query)) # type: ignore
155
153
  except Exception as e:
156
- msg = f'unable to insert rows into pterms_fts5 table for {project_db_file_path}'
154
+ msg = f"unable to insert rows into pterms_fts5 table for {project_db_file_path}"
157
155
  _LOGGER.fatal(msg)
158
156
  raise EsgvocDbError(msg) from e
159
157
  project_db_session.commit()
160
158
  try:
161
- sql_query = 'INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, ' + \
162
- 'project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, ' + \
163
- 'project_pk, term_kind FROM collections;' # noqa: S608
159
+ sql_query = (
160
+ "INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, " # noqa: S608
161
+ + "project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, "
162
+ + "project_pk, term_kind FROM pcollections;"
163
+ )
164
164
  project_db_session.exec(text(sql_query)) # type: ignore
165
165
  except Exception as e:
166
- msg = f'unable to insert rows into pcollections_fts5 table for {project_db_file_path}'
166
+ msg = f"unable to insert rows into pcollections_fts5 table for {project_db_file_path}"
167
167
  _LOGGER.fatal(msg)
168
168
  raise EsgvocDbError(msg) from e
169
169
  project_db_session.commit()
@@ -30,17 +30,18 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
30
30
  try:
31
31
  connection = db.DBConnection(universe_db_file_path)
32
32
  except Exception as e:
33
- msg = f'Unable to read universe SQLite file at {universe_db_file_path}. Abort.'
33
+ msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
34
34
  _LOGGER.fatal(msg)
35
35
  raise IOError(msg) from e
36
36
 
37
37
  for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
38
- if data_descriptor_dir_path.is_dir() and \
39
- (data_descriptor_dir_path / "000_context.jsonld").exists(): # TODO may be put that in setting
38
+ if (
39
+ data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
40
+ ): # TODO may be put that in setting
40
41
  try:
41
42
  ingest_data_descriptor(data_descriptor_dir_path, connection)
42
43
  except Exception as e:
43
- msg = f'unexpected error while processing data descriptor {data_descriptor_dir_path}'
44
+ msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
44
45
  _LOGGER.fatal(msg)
45
46
  raise EsgvocDbError(msg) from e
46
47
 
@@ -48,20 +49,24 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
48
49
  # Well, the following instructions are not data duplication. It is more building an index.
49
50
  # Read: https://sqlite.org/fts5.html
50
51
  try:
51
- sql_query = 'INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) ' + \
52
- 'SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;' # noqa: S608
52
+ sql_query = (
53
+ "INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
54
+ + "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
55
+ ) # noqa: S608
53
56
  session.exec(text(sql_query)) # type: ignore
54
57
  except Exception as e:
55
- msg = f'unable to insert rows into uterms_fts5 table for {universe_db_file_path}'
58
+ msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
56
59
  _LOGGER.fatal(msg)
57
60
  raise EsgvocDbError(msg) from e
58
61
  session.commit()
59
62
  try:
60
- sql_query = 'INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) ' + \
61
- 'SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;' # noqa: S608
63
+ sql_query = (
64
+ "INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
65
+ + "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
66
+ ) # noqa: S608
62
67
  session.exec(text(sql_query)) # type: ignore
63
68
  except Exception as e:
64
- msg = f'unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}'
69
+ msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
65
70
  _LOGGER.fatal(msg)
66
71
  raise EsgvocDbError(msg) from e
67
72
  session.commit()
@@ -74,24 +79,21 @@ def ingest_metadata_universe(connection, git_hash):
74
79
  session.commit()
75
80
 
76
81
 
77
- def ingest_data_descriptor(data_descriptor_path: Path,
78
- connection: db.DBConnection) -> None:
82
+ def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
79
83
  data_descriptor_id = data_descriptor_path.name
80
84
 
81
85
  context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
82
86
  try:
83
87
  context = read_json_file(context_file_path)
84
88
  except Exception as e:
85
- msg = f'Unable to read the context file {context_file_path} of data descriptor \
86
- {data_descriptor_id}. Skip.\n{str(e)}'
89
+ msg = f"Unable to read the context file {context_file_path} of data descriptor \
90
+ {data_descriptor_id}. Skip.\n{str(e)}"
87
91
  _LOGGER.warning(msg)
88
92
  return
89
93
 
90
94
  with connection.create_session() as session:
91
95
  # We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
92
- data_descriptor = UDataDescriptor(id=data_descriptor_id,
93
- context=context,
94
- term_kind="")
96
+ data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
95
97
  term_kind_dd = None
96
98
 
97
99
  _LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
@@ -99,28 +101,34 @@ def ingest_data_descriptor(data_descriptor_path: Path,
99
101
  _LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
100
102
  if term_file_path.is_file() and term_file_path.suffix == ".json":
101
103
  try:
102
- locally_available = {"https://espri-mod.github.io/mip-cmor-tables":
103
- service.current_state.universe.local_path}
104
- json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
105
- locally_available=locally_available).merge_linked_json()[-1]
104
+ locally_available = {
105
+ "https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
106
+ }
107
+
108
+ json_specs = DataMerger(
109
+ data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
110
+ ).merge_linked_json()[-1]
111
+
106
112
  term_kind = infer_term_kind(json_specs)
107
113
  term_id = json_specs["id"]
108
114
 
109
115
  if term_kind_dd is None:
110
116
  term_kind_dd = term_kind
111
-
112
117
  except Exception as e:
113
- _LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor ' +
114
- f'{data_descriptor_path}. Skip.\n{str(e)}')
118
+ _LOGGER.warning(
119
+ f"Unable to read term {term_file_path} for data descriptor "
120
+ + f"{data_descriptor_path}. Skip.\n{str(e)}"
121
+ )
115
122
  continue
116
123
  if term_id and json_specs and data_descriptor and term_kind:
117
- _LOGGER.debug("adding {term_id}")
124
+ _LOGGER.debug(f"adding {term_id}")
118
125
  term = UTerm(
119
126
  id=term_id,
120
127
  specs=json_specs,
121
128
  data_descriptor=data_descriptor,
122
129
  kind=term_kind,
123
130
  )
131
+
124
132
  session.add(term)
125
133
  if term_kind_dd is not None:
126
134
  data_descriptor.term_kind = term_kind_dd
@@ -128,14 +136,8 @@ def ingest_data_descriptor(data_descriptor_path: Path,
128
136
  session.commit()
129
137
 
130
138
 
131
- def get_universe_term(data_descriptor_id: str,
132
- term_id: str,
133
- universe_db_session: Session) -> tuple[TermKind, dict]:
134
- statement = (
135
- select(UTerm)
136
- .join(UDataDescriptor)
137
- .where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
138
- )
139
+ def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
140
+ statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
139
141
  results = universe_db_session.exec(statement)
140
142
  term = results.one()
141
143
  return term.kind, term.specs
@@ -143,6 +145,7 @@ def get_universe_term(data_descriptor_id: str,
143
145
 
144
146
  if __name__ == "__main__":
145
147
  import os
148
+
146
149
  root_dir = Path(str(os.getcwd())).parent.parent
147
150
  print(root_dir)
148
151
  universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
@@ -1,4 +1,26 @@
1
1
  import logging.config
2
- from pathlib import Path
3
2
 
4
- logging.config.fileConfig(f"{Path(__file__).parent}/logging.conf", disable_existing_loggers=False)
3
+ LOGGING_CONFIG = {
4
+ 'version': 1,
5
+ 'disable_existing_loggers': False,
6
+ 'formatters': {
7
+ 'esgvoc_formatter': {
8
+ 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
9
+ },
10
+ },
11
+ 'handlers': {
12
+ 'esgvoc_stdout': {
13
+ 'class': 'logging.StreamHandler',
14
+ 'formatter': 'esgvoc_formatter',
15
+ },
16
+ },
17
+ 'loggers': {
18
+ 'esgvoc': {
19
+ 'handlers': ['esgvoc_stdout'],
20
+ 'level': 'ERROR',
21
+ 'propagate': False,
22
+ }
23
+ }
24
+ }
25
+
26
+ logging.config.dictConfig(LOGGING_CONFIG)
@@ -1,19 +1,23 @@
1
+ import logging
1
2
  import os
2
3
  import subprocess
4
+ import sys
5
+ from contextlib import contextmanager
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
3
9
  import requests
4
10
  from pydantic import BaseModel, ValidationError
5
- from typing import List, Optional
6
- from contextlib import contextmanager
7
- import logging
8
- import sys
9
11
 
10
12
  _LOGGER = logging.getLogger(__name__)
11
13
 
14
+
12
15
  @contextmanager
13
16
  def redirect_stdout_to_log(level=logging.INFO):
14
17
  """
15
18
  Redirect stdout to the global _LOGGER temporarily.
16
19
  """
20
+
17
21
  class StreamToLogger:
18
22
  def __init__(self, log_level):
19
23
  self.log_level = log_level
@@ -48,6 +52,7 @@ class GitHubRepository(BaseModel):
48
52
  created_at: str
49
53
  updated_at: str
50
54
 
55
+
51
56
  class GitHubBranch(BaseModel):
52
57
  name: str
53
58
  commit: dict
@@ -59,7 +64,7 @@ class RepoFetcher:
59
64
  DataFetcher is responsible for fetching data from external sources such as GitHub.
60
65
  """
61
66
 
62
- def __init__(self, base_url: str = "https://api.github.com",local_path: str = ".cache/repos"):
67
+ def __init__(self, base_url: str = "https://api.github.com", local_path: str = ".cache/repos"):
63
68
  self.base_url = base_url
64
69
  self.repo_dir = local_path
65
70
 
@@ -100,7 +105,6 @@ class RepoFetcher:
100
105
  except ValidationError as e:
101
106
  raise Exception(f"Data validation error: {e}")
102
107
 
103
-
104
108
  def fetch_branch_details(self, owner: str, repo: str, branch: str) -> GitHubBranch:
105
109
  """
106
110
  Fetch details of a specific branch in a repository.
@@ -120,7 +124,7 @@ class RepoFetcher:
120
124
  except ValidationError as e:
121
125
  raise Exception(f"Data validation error: {e}")
122
126
 
123
- def list_directory(self,owner, repo, branch='main'):
127
+ def list_directory(self, owner, repo, branch="main"):
124
128
  """
125
129
  List directories in the root of a GitHub repository.
126
130
 
@@ -133,10 +137,10 @@ class RepoFetcher:
133
137
  response = requests.get(url)
134
138
  response.raise_for_status() # Raise an error for bad responses
135
139
  contents = response.json()
136
- directories = [item['name'] for item in contents if item['type'] == 'dir']
140
+ directories = [item["name"] for item in contents if item["type"] == "dir"]
137
141
  return directories
138
142
 
139
- def list_files(self,owner, repo, directory, branch='main'):
143
+ def list_files(self, owner, repo, directory, branch="main"):
140
144
  """
141
145
  List files in a specific directory of a GitHub repository.
142
146
 
@@ -150,10 +154,10 @@ class RepoFetcher:
150
154
  response = requests.get(url)
151
155
  response.raise_for_status() # Raise an error for bad responses
152
156
  contents = response.json()
153
- files = [item['name'] for item in contents if item['type'] == 'file']
157
+ files = [item["name"] for item in contents if item["type"] == "file"]
154
158
  return files
155
-
156
- def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
159
+
160
+ def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str | None = None):
157
161
  """
158
162
  Clone a GitHub repository to a target directory.
159
163
  :param owner: Repository owner
@@ -162,52 +166,47 @@ class RepoFetcher:
162
166
  :param branch: (Optional) The branch to clone. Clones the default branch if None.
163
167
  """
164
168
  repo_url = f"https://github.com/{owner}/{repo}.git"
165
- destination = local_path if local_path else f"{self.repo_dir}/{repo}"
169
+ destination = local_path if local_path else f"{self.repo_dir}/{repo}"
166
170
 
167
171
  command = ["git", "clone", repo_url, destination]
168
172
  if branch:
169
173
  command.extend(["--branch", branch])
170
174
  with redirect_stdout_to_log():
171
-
172
175
  try:
173
- subprocess.run(command, check=True)
174
- _LOGGER.debug(f"Repository cloned successfully into {destination}")
175
- except subprocess.CalledProcessError:
176
- try:
176
+ if not Path(destination).exists():
177
+ subprocess.run(command, check=True)
178
+ _LOGGER.debug(f"Repository cloned successfully into {destination}")
179
+ else:
177
180
  current_work_dir = os.getcwd()
178
- os.chdir(f"{self.repo_dir}/{repo}")
181
+ os.chdir(f"{destination}")
179
182
  command = ["git", "pull"]
180
183
  subprocess.run(command, check=True)
181
184
  os.chdir(current_work_dir)
182
185
 
186
+ except Exception as e:
187
+ raise Exception(f"Failed to clone repository: {e}")
183
188
 
184
- except Exception as e:
185
- raise Exception(f"Failed to clone repository: {e}")
186
-
187
- def get_github_version_with_api(self, owner: str, repo: str, branch: str ="main"):
188
- """ Fetch the latest commit version (or any other versioning scheme) from GitHub. """
189
- details = self.fetch_branch_details( owner, repo, branch)
190
- return details.commit.get('sha')
189
+ def get_github_version_with_api(self, owner: str, repo: str, branch: str = "main"):
190
+ """Fetch the latest commit version (or any other versioning scheme) from GitHub."""
191
+ details = self.fetch_branch_details(owner, repo, branch)
192
+ return details.commit.get("sha")
191
193
 
192
- def get_github_version(self, owner: str, repo: str, branch: str="main"):
193
- """ Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch """
194
+ def get_github_version(self, owner: str, repo: str, branch: str = "main"):
195
+ """Fetch the latest commit version (or any other versioning scheme) from GitHub. with command git fetch"""
194
196
  repo_url = f"https://github.com/{owner}/{repo}.git"
195
197
  command = ["git", "ls-remote", repo_url, f"{self.repo_dir}/{repo}"]
196
198
  if branch:
197
199
  command.extend([branch])
198
200
 
199
201
  # with redirect_stdout_to_log():
200
- output=None
202
+ output = None
201
203
  try:
202
- result = subprocess.run(command, capture_output=True,
203
- text=True,
204
- check=True)
204
+ result = subprocess.run(command, capture_output=True, text=True, check=True)
205
205
  # Parse the output to get the commit hash
206
206
  output = result.stdout.strip()
207
207
  _LOGGER.debug(f"Repository fetch successfully from {self.repo_dir}/{repo}")
208
208
  except Exception as e:
209
-
210
- _LOGGER.debug("error in with git fetch " + repr(e))
209
+ _LOGGER.debug("error in with git fetch " + repr(e))
211
210
  if output is not None:
212
211
  commit_hash = output.split()[0]
213
212
  return commit_hash
@@ -216,45 +215,48 @@ class RepoFetcher:
216
215
  # return git_hash
217
216
 
218
217
  def get_local_repo_version(self, repo_path: str, branch: Optional[str] = "main"):
219
- """ Check the version of the local repository by fetching the latest commit hash. """
218
+ """Check the version of the local repository by fetching the latest commit hash."""
220
219
  # repo_path = os.path.join(self.repo_dir, repo)
221
220
  if os.path.exists(repo_path):
222
- #print("EXIST")
221
+ # print("EXIST")
223
222
  command = ["git", "-C", repo_path]
224
223
  if branch:
225
224
  command.extend(["switch", branch])
226
225
  # Ensure we are on the correct branch
227
226
  with redirect_stdout_to_log():
228
- subprocess.run(command,
229
- stdout=subprocess.PIPE, # Capture stdout
230
- stderr=subprocess.PIPE, # Capture stderr
231
- text=True) # Decode output as text
227
+ subprocess.run(
228
+ command,
229
+ stdout=subprocess.PIPE, # Capture stdout
230
+ stderr=subprocess.PIPE, # Capture stderr
231
+ text=True,
232
+ ) # Decode output as text
232
233
  # Get the latest commit hash (SHA) from the local repository
233
- commit_hash = subprocess.check_output(["git", "-C", repo_path, "rev-parse", "HEAD"],
234
- stderr=subprocess.PIPE,
235
- text=True).strip()
234
+ commit_hash = subprocess.check_output(
235
+ ["git", "-C", repo_path, "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
236
+ ).strip()
236
237
  return commit_hash
237
238
  return None
238
239
 
240
+
239
241
  if __name__ == "__main__":
240
242
  fetcher = RepoFetcher()
241
-
243
+
242
244
  # Fetch repositories for a user
243
- #repos = fetcher.fetch_repositories("ESPRI-Mod")
244
- #for repo in repos:
245
+ # repos = fetcher.fetch_repositories("ESPRI-Mod")
246
+ # for repo in repos:
245
247
  # print(repo)
246
248
 
247
249
  # Fetch a specific repository's details
248
- #repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
249
- #"print(repo_details)
250
- #branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
251
- #print(branch_details)
252
-
253
- fetcher.clone_repository("ESPRI-Mod","mip-cmor-tables", branch="uni_proj_ld")
254
-
255
- #a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
256
- #print(a)
257
- #a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
258
- #print(a)
259
-
260
- fetcher.clone_repository("ESPRI-Mod","CMIP6Plus_CVs", branch="uni_proj_ld")
250
+ # repo_details = fetcher.fetch_repository_details("ESPRI-Mod", "mip-cmor-tables")
251
+ # "print(repo_details)
252
+ # branch_details = fetcher.fetch_branch_details("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
253
+ # print(branch_details)
254
+
255
+ fetcher.clone_repository("ESPRI-Mod", "mip-cmor-tables", branch="uni_proj_ld")
256
+
257
+ # a =fetcher.get_github_version("ESPRI-Mod", "mip-cmor-tables", "uni_proj_ld")
258
+ # print(a)
259
+ # a = fetcher.get_local_repo_version("mip-cmor-tables","uni_proj_ld")
260
+ # print(a)
261
+
262
+ fetcher.clone_repository("ESPRI-Mod", "CMIP6Plus_CVs", branch="uni_proj_ld")