esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +3 -1
- esgvoc/api/__init__.py +96 -72
- esgvoc/api/data_descriptors/__init__.py +18 -12
- esgvoc/api/data_descriptors/activity.py +8 -45
- esgvoc/api/data_descriptors/area_label.py +6 -0
- esgvoc/api/data_descriptors/branded_suffix.py +5 -0
- esgvoc/api/data_descriptors/branded_variable.py +5 -0
- esgvoc/api/data_descriptors/consortium.py +16 -56
- esgvoc/api/data_descriptors/data_descriptor.py +106 -0
- esgvoc/api/data_descriptors/date.py +3 -46
- esgvoc/api/data_descriptors/directory_date.py +3 -46
- esgvoc/api/data_descriptors/experiment.py +19 -54
- esgvoc/api/data_descriptors/forcing_index.py +3 -45
- esgvoc/api/data_descriptors/frequency.py +6 -43
- esgvoc/api/data_descriptors/grid_label.py +6 -44
- esgvoc/api/data_descriptors/horizontal_label.py +6 -0
- esgvoc/api/data_descriptors/initialisation_index.py +3 -44
- esgvoc/api/data_descriptors/institution.py +11 -54
- esgvoc/api/data_descriptors/license.py +4 -44
- esgvoc/api/data_descriptors/mip_era.py +6 -44
- esgvoc/api/data_descriptors/model_component.py +7 -45
- esgvoc/api/data_descriptors/organisation.py +3 -40
- esgvoc/api/data_descriptors/physic_index.py +3 -45
- esgvoc/api/data_descriptors/product.py +4 -43
- esgvoc/api/data_descriptors/realisation_index.py +3 -44
- esgvoc/api/data_descriptors/realm.py +4 -42
- esgvoc/api/data_descriptors/resolution.py +6 -44
- esgvoc/api/data_descriptors/source.py +18 -53
- esgvoc/api/data_descriptors/source_type.py +3 -41
- esgvoc/api/data_descriptors/sub_experiment.py +3 -41
- esgvoc/api/data_descriptors/table.py +6 -48
- esgvoc/api/data_descriptors/temporal_label.py +6 -0
- esgvoc/api/data_descriptors/time_range.py +3 -27
- esgvoc/api/data_descriptors/variable.py +13 -71
- esgvoc/api/data_descriptors/variant_label.py +3 -47
- esgvoc/api/data_descriptors/vertical_label.py +5 -0
- esgvoc/api/project_specs.py +3 -2
- esgvoc/api/projects.py +727 -446
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +29 -16
- esgvoc/api/search.py +140 -95
- esgvoc/api/universe.py +362 -156
- esgvoc/apps/__init__.py +3 -4
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +185 -198
- esgvoc/apps/drs/report.py +272 -136
- esgvoc/apps/drs/validator.py +132 -145
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/drs.py +32 -21
- esgvoc/cli/get.py +35 -31
- esgvoc/cli/install.py +11 -8
- esgvoc/cli/main.py +0 -2
- esgvoc/cli/status.py +5 -5
- esgvoc/cli/valid.py +40 -40
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +50 -8
- esgvoc/core/db/models/universe.py +51 -12
- esgvoc/core/db/project_ingestion.py +60 -46
- esgvoc/core/db/universe_ingestion.py +58 -29
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +1 -1
- esgvoc/core/repo_fetcher.py +4 -3
- esgvoc/core/service/__init__.py +37 -5
- esgvoc/core/service/configuration/config_manager.py +188 -0
- esgvoc/core/service/configuration/setting.py +88 -0
- esgvoc/core/service/state.py +49 -32
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
- esgvoc-0.4.0.dist-info/RECORD +80 -0
- esgvoc/api/_utils.py +0 -39
- esgvoc/cli/config.py +0 -82
- esgvoc/core/service/settings.py +0 -73
- esgvoc/core/service/settings.toml +0 -17
- esgvoc/core/service/settings_default.toml +0 -17
- esgvoc-0.2.1.dist-info/RECORD +0 -73
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
import esgvoc.core.constants
|
|
5
|
-
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
-
from esgvoc.core.db.connection import DBConnection
|
|
7
|
-
from esgvoc.core.service.data_merger import DataMerger
|
|
8
|
-
from esgvoc.core.db.models.mixins import TermKind
|
|
9
4
|
from pydantic import BaseModel
|
|
5
|
+
from sqlalchemy import text
|
|
10
6
|
|
|
7
|
+
import esgvoc.core.constants
|
|
11
8
|
import esgvoc.core.db.connection as db
|
|
12
|
-
|
|
9
|
+
import esgvoc.core.service as service
|
|
10
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
11
|
+
from esgvoc.core.db.connection import DBConnection, read_json_file
|
|
12
|
+
from esgvoc.core.db.models.mixins import TermKind
|
|
13
13
|
from esgvoc.core.db.models.project import Collection, Project, PTerm
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
15
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
16
16
|
|
|
17
17
|
_LOGGER = logging.getLogger("project_ingestion")
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
20
21
|
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
21
22
|
return TermKind.PATTERN
|
|
@@ -25,15 +26,16 @@ def infer_term_kind(json_specs: dict) -> TermKind:
|
|
|
25
26
|
return TermKind.PLAIN
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
def ingest_metadata_project(connection:DBConnection,git_hash):
|
|
29
|
+
def ingest_metadata_project(connection: DBConnection, git_hash):
|
|
29
30
|
with connection.create_session() as session:
|
|
30
|
-
project = Project(id=str(connection.file_path.stem), git_hash=git_hash,specs={})
|
|
31
|
-
session.add(project)
|
|
31
|
+
project = Project(id=str(connection.file_path.stem), git_hash=git_hash, specs={})
|
|
32
|
+
session.add(project)
|
|
32
33
|
session.commit()
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
def get_data_descriptor_id_from_context(collection_context: dict) -> str:
|
|
36
|
-
data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY]
|
|
37
|
+
data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY]\
|
|
38
|
+
[esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY] # noqa E211
|
|
37
39
|
return Path(data_descriptor_url).name
|
|
38
40
|
|
|
39
41
|
|
|
@@ -50,40 +52,40 @@ def instantiate_project_term(universe_term_json_specs: dict,
|
|
|
50
52
|
def ingest_collection(collection_dir_path: Path,
|
|
51
53
|
project: Project,
|
|
52
54
|
project_db_session) -> None:
|
|
53
|
-
|
|
54
|
-
|
|
55
55
|
collection_id = collection_dir_path.name
|
|
56
56
|
collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
57
57
|
try:
|
|
58
58
|
collection_context = read_json_file(collection_context_file_path)
|
|
59
59
|
data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
|
|
60
60
|
except Exception as e:
|
|
61
|
-
msg = f'
|
|
61
|
+
msg = f'unable to read project context file {collection_context_file_path}'
|
|
62
62
|
_LOGGER.fatal(msg)
|
|
63
|
-
raise
|
|
63
|
+
raise EsgvocDbError(msg) from e
|
|
64
64
|
# [KEEP]
|
|
65
65
|
collection = Collection(
|
|
66
66
|
id=collection_id,
|
|
67
67
|
context=collection_context,
|
|
68
68
|
project=project,
|
|
69
69
|
data_descriptor_id=data_descriptor_id,
|
|
70
|
-
term_kind="")
|
|
70
|
+
term_kind="") # We ll know it only when we ll add a term
|
|
71
|
+
# (hypothesis all term have the same kind in a collection) # noqa E116
|
|
71
72
|
term_kind_collection = None
|
|
72
73
|
|
|
73
74
|
for term_file_path in collection_dir_path.iterdir():
|
|
74
75
|
_LOGGER.debug(f"found term path : {term_file_path}")
|
|
75
|
-
if term_file_path.is_file() and term_file_path.suffix==".json":
|
|
76
|
+
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
76
77
|
try:
|
|
77
|
-
|
|
78
|
+
locally_avail = {"https://espri-mod.github.io/mip-cmor-tables":
|
|
79
|
+
service.current_state.universe.local_path}
|
|
80
|
+
json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
|
|
78
81
|
# locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
|
|
79
|
-
locally_available=
|
|
80
|
-
|
|
82
|
+
locally_available=locally_avail).merge_linked_json()[-1]
|
|
81
83
|
term_kind = infer_term_kind(json_specs)
|
|
82
84
|
term_id = json_specs["id"]
|
|
83
85
|
|
|
84
86
|
if term_kind_collection is None:
|
|
85
87
|
term_kind_collection = term_kind
|
|
86
|
-
|
|
88
|
+
|
|
87
89
|
except Exception as e:
|
|
88
90
|
_LOGGER.warning(f'Unable to read term {term_file_path}. Skip.\n{str(e)}')
|
|
89
91
|
continue
|
|
@@ -105,51 +107,63 @@ def ingest_collection(collection_dir_path: Path,
|
|
|
105
107
|
collection.term_kind = term_kind_collection
|
|
106
108
|
project_db_session.add(collection)
|
|
107
109
|
|
|
110
|
+
|
|
108
111
|
def ingest_project(project_dir_path: Path,
|
|
109
112
|
project_db_file_path: Path,
|
|
110
|
-
git_hash
|
|
113
|
+
git_hash: str
|
|
111
114
|
):
|
|
112
115
|
try:
|
|
113
116
|
project_connection = db.DBConnection(project_db_file_path)
|
|
114
117
|
except Exception as e:
|
|
115
|
-
msg = f'
|
|
118
|
+
msg = f'unable to read project SQLite file at {project_db_file_path}'
|
|
116
119
|
_LOGGER.fatal(msg)
|
|
117
|
-
raise
|
|
118
|
-
|
|
120
|
+
raise EsgvocDbError(msg) from e
|
|
121
|
+
|
|
119
122
|
with project_connection.create_session() as project_db_session:
|
|
123
|
+
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
120
124
|
try:
|
|
121
|
-
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
122
125
|
project_json_specs = read_json_file(project_specs_file_path)
|
|
123
126
|
project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
|
|
124
127
|
except Exception as e:
|
|
125
|
-
msg = f'
|
|
128
|
+
msg = f'unable to read project specs file {project_specs_file_path}'
|
|
126
129
|
_LOGGER.fatal(msg)
|
|
127
|
-
raise
|
|
128
|
-
|
|
129
|
-
project = Project(id=project_id, specs=project_json_specs,git_hash=git_hash)
|
|
130
|
+
raise EsgvocDbError(msg) from e
|
|
131
|
+
|
|
132
|
+
project = Project(id=project_id, specs=project_json_specs, git_hash=git_hash)
|
|
130
133
|
project_db_session.add(project)
|
|
131
|
-
|
|
132
134
|
|
|
133
135
|
for collection_dir_path in project_dir_path.iterdir():
|
|
134
|
-
|
|
136
|
+
# TODO maybe put that in settings
|
|
137
|
+
if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists():
|
|
135
138
|
_LOGGER.debug(f"found collection dir : {collection_dir_path}")
|
|
136
139
|
try:
|
|
137
140
|
ingest_collection(collection_dir_path,
|
|
138
141
|
project,
|
|
139
142
|
project_db_session)
|
|
140
143
|
except Exception as e:
|
|
141
|
-
msg = f'
|
|
144
|
+
msg = f'unexpected error while ingesting collection {collection_dir_path}'
|
|
142
145
|
_LOGGER.fatal(msg)
|
|
143
|
-
raise
|
|
146
|
+
raise EsgvocDbError(msg) from e
|
|
144
147
|
project_db_session.commit()
|
|
145
148
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
149
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
150
|
+
# Read: https://sqlite.org/fts5.html
|
|
151
|
+
try:
|
|
152
|
+
sql_query = 'INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) ' + \
|
|
153
|
+
'SELECT pk, id, specs, kind, collection_pk FROM pterms;' # noqa: S608
|
|
154
|
+
project_db_session.exec(text(sql_query)) # type: ignore
|
|
155
|
+
except Exception as e:
|
|
156
|
+
msg = f'unable to insert rows into pterms_fts5 table for {project_db_file_path}'
|
|
157
|
+
_LOGGER.fatal(msg)
|
|
158
|
+
raise EsgvocDbError(msg) from e
|
|
159
|
+
project_db_session.commit()
|
|
160
|
+
try:
|
|
161
|
+
sql_query = 'INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, ' + \
|
|
162
|
+
'project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, ' + \
|
|
163
|
+
'project_pk, term_kind FROM collections;' # noqa: S608
|
|
164
|
+
project_db_session.exec(text(sql_query)) # type: ignore
|
|
165
|
+
except Exception as e:
|
|
166
|
+
msg = f'unable to insert rows into pcollections_fts5 table for {project_db_file_path}'
|
|
167
|
+
_LOGGER.fatal(msg)
|
|
168
|
+
raise EsgvocDbError(msg) from e
|
|
169
|
+
project_db_session.commit()
|
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
-
from esgvoc.core.service.data_merger import DataMerger
|
|
4
|
+
from sqlalchemy import text
|
|
7
5
|
from sqlmodel import Session, select
|
|
8
6
|
|
|
7
|
+
import esgvoc.core.constants
|
|
9
8
|
import esgvoc.core.db.connection as db
|
|
9
|
+
import esgvoc.core.service as service
|
|
10
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
10
11
|
from esgvoc.core.db.connection import read_json_file
|
|
11
12
|
from esgvoc.core.db.models.mixins import TermKind
|
|
12
|
-
from esgvoc.core.db.models.universe import
|
|
13
|
-
from esgvoc.core.
|
|
14
|
-
|
|
13
|
+
from esgvoc.core.db.models.universe import UDataDescriptor, Universe, UTerm, universe_create_db
|
|
14
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
15
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
15
16
|
|
|
16
17
|
_LOGGER = logging.getLogger(__name__)
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
19
21
|
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
20
22
|
return TermKind.PATTERN
|
|
@@ -32,25 +34,48 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
|
|
|
32
34
|
_LOGGER.fatal(msg)
|
|
33
35
|
raise IOError(msg) from e
|
|
34
36
|
|
|
35
|
-
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
36
|
-
if data_descriptor_dir_path.is_dir() and
|
|
37
|
+
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
38
|
+
if data_descriptor_dir_path.is_dir() and \
|
|
39
|
+
(data_descriptor_dir_path / "000_context.jsonld").exists(): # TODO may be put that in setting
|
|
37
40
|
try:
|
|
38
41
|
ingest_data_descriptor(data_descriptor_dir_path, connection)
|
|
39
42
|
except Exception as e:
|
|
40
|
-
msg = f'
|
|
43
|
+
msg = f'unexpected error while processing data descriptor {data_descriptor_dir_path}'
|
|
41
44
|
_LOGGER.fatal(msg)
|
|
42
|
-
raise
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
raise EsgvocDbError(msg) from e
|
|
46
|
+
|
|
47
|
+
with connection.create_session() as session:
|
|
48
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
49
|
+
# Read: https://sqlite.org/fts5.html
|
|
50
|
+
try:
|
|
51
|
+
sql_query = 'INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) ' + \
|
|
52
|
+
'SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;' # noqa: S608
|
|
53
|
+
session.exec(text(sql_query)) # type: ignore
|
|
54
|
+
except Exception as e:
|
|
55
|
+
msg = f'unable to insert rows into uterms_fts5 table for {universe_db_file_path}'
|
|
56
|
+
_LOGGER.fatal(msg)
|
|
57
|
+
raise EsgvocDbError(msg) from e
|
|
58
|
+
session.commit()
|
|
59
|
+
try:
|
|
60
|
+
sql_query = 'INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) ' + \
|
|
61
|
+
'SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;' # noqa: S608
|
|
62
|
+
session.exec(text(sql_query)) # type: ignore
|
|
63
|
+
except Exception as e:
|
|
64
|
+
msg = f'unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}'
|
|
65
|
+
_LOGGER.fatal(msg)
|
|
66
|
+
raise EsgvocDbError(msg) from e
|
|
67
|
+
session.commit()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def ingest_metadata_universe(connection, git_hash):
|
|
45
71
|
with connection.create_session() as session:
|
|
46
72
|
universe = Universe(git_hash=git_hash)
|
|
47
|
-
session.add(universe)
|
|
73
|
+
session.add(universe)
|
|
48
74
|
session.commit()
|
|
49
75
|
|
|
76
|
+
|
|
50
77
|
def ingest_data_descriptor(data_descriptor_path: Path,
|
|
51
78
|
connection: db.DBConnection) -> None:
|
|
52
|
-
|
|
53
|
-
|
|
54
79
|
data_descriptor_id = data_descriptor_path.name
|
|
55
80
|
|
|
56
81
|
context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
@@ -60,12 +85,13 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
60
85
|
msg = f'Unable to read the context file {context_file_path} of data descriptor \
|
|
61
86
|
{data_descriptor_id}. Skip.\n{str(e)}'
|
|
62
87
|
_LOGGER.warning(msg)
|
|
63
|
-
return
|
|
88
|
+
return
|
|
64
89
|
|
|
65
90
|
with connection.create_session() as session:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
91
|
+
# We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
|
|
92
|
+
data_descriptor = UDataDescriptor(id=data_descriptor_id,
|
|
93
|
+
context=context,
|
|
94
|
+
term_kind="")
|
|
69
95
|
term_kind_dd = None
|
|
70
96
|
|
|
71
97
|
_LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
|
|
@@ -73,8 +99,10 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
73
99
|
_LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
|
|
74
100
|
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
75
101
|
try:
|
|
76
|
-
|
|
77
|
-
|
|
102
|
+
locally_available = {"https://espri-mod.github.io/mip-cmor-tables":
|
|
103
|
+
service.current_state.universe.local_path}
|
|
104
|
+
json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
|
|
105
|
+
locally_available=locally_available).merge_linked_json()[-1]
|
|
78
106
|
term_kind = infer_term_kind(json_specs)
|
|
79
107
|
term_id = json_specs["id"]
|
|
80
108
|
|
|
@@ -82,7 +110,8 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
82
110
|
term_kind_dd = term_kind
|
|
83
111
|
|
|
84
112
|
except Exception as e:
|
|
85
|
-
_LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor
|
|
113
|
+
_LOGGER.warning(f'Unable to read term {term_file_path} for data descriptor ' +
|
|
114
|
+
f'{data_descriptor_path}. Skip.\n{str(e)}')
|
|
86
115
|
continue
|
|
87
116
|
if term_id and json_specs and data_descriptor and term_kind:
|
|
88
117
|
_LOGGER.debug("adding {term_id}")
|
|
@@ -98,13 +127,14 @@ def ingest_data_descriptor(data_descriptor_path: Path,
|
|
|
98
127
|
session.add(data_descriptor)
|
|
99
128
|
session.commit()
|
|
100
129
|
|
|
130
|
+
|
|
101
131
|
def get_universe_term(data_descriptor_id: str,
|
|
102
|
-
|
|
103
|
-
|
|
132
|
+
term_id: str,
|
|
133
|
+
universe_db_session: Session) -> tuple[TermKind, dict]:
|
|
104
134
|
statement = (
|
|
105
135
|
select(UTerm)
|
|
106
|
-
.join(
|
|
107
|
-
.where(
|
|
136
|
+
.join(UDataDescriptor)
|
|
137
|
+
.where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
108
138
|
)
|
|
109
139
|
results = universe_db_session.exec(statement)
|
|
110
140
|
term = results.one()
|
|
@@ -112,9 +142,8 @@ def get_universe_term(data_descriptor_id: str,
|
|
|
112
142
|
|
|
113
143
|
|
|
114
144
|
if __name__ == "__main__":
|
|
115
|
-
#ingest_universe(db.UNIVERSE_DIR_PATH, db.UNIVERSE_DB_FILE_PATH)
|
|
116
145
|
import os
|
|
117
146
|
root_dir = Path(str(os.getcwd())).parent.parent
|
|
118
147
|
print(root_dir)
|
|
119
|
-
universe_create_db(root_dir /
|
|
120
|
-
ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"),root_dir /
|
|
148
|
+
universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
|
|
149
|
+
ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"), root_dir / Path(".cache/dbs/universe.sqlite"))
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class EsgvocException(Exception):
|
|
2
|
+
"""
|
|
3
|
+
Class base of all ESGVOC errors.
|
|
4
|
+
"""
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EsgvocNotFoundError(EsgvocException):
|
|
9
|
+
"""
|
|
10
|
+
Represents the not found errors.
|
|
11
|
+
"""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EsgvocValueError(EsgvocException):
|
|
16
|
+
"""
|
|
17
|
+
Represents value errors.
|
|
18
|
+
"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EsgvocDbError(EsgvocException):
|
|
23
|
+
"""
|
|
24
|
+
Represents errors relative to data base management.
|
|
25
|
+
"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EsgvocNotImplementedError(EsgvocException):
|
|
30
|
+
"""
|
|
31
|
+
Represents not implemented errors.
|
|
32
|
+
"""
|
|
33
|
+
pass
|
esgvoc/core/logging_handler.py
CHANGED
esgvoc/core/repo_fetcher.py
CHANGED
|
@@ -153,7 +153,7 @@ class RepoFetcher:
|
|
|
153
153
|
files = [item['name'] for item in contents if item['type'] == 'file']
|
|
154
154
|
return files
|
|
155
155
|
|
|
156
|
-
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None):
|
|
156
|
+
def clone_repository(self, owner: str, repo: str, branch: Optional[str] = None, local_path: str|None = None):
|
|
157
157
|
"""
|
|
158
158
|
Clone a GitHub repository to a target directory.
|
|
159
159
|
:param owner: Repository owner
|
|
@@ -162,15 +162,16 @@ class RepoFetcher:
|
|
|
162
162
|
:param branch: (Optional) The branch to clone. Clones the default branch if None.
|
|
163
163
|
"""
|
|
164
164
|
repo_url = f"https://github.com/{owner}/{repo}.git"
|
|
165
|
+
destination = local_path if local_path else f"{self.repo_dir}/{repo}"
|
|
165
166
|
|
|
166
|
-
command = ["git", "clone", repo_url,
|
|
167
|
+
command = ["git", "clone", repo_url, destination]
|
|
167
168
|
if branch:
|
|
168
169
|
command.extend(["--branch", branch])
|
|
169
170
|
with redirect_stdout_to_log():
|
|
170
171
|
|
|
171
172
|
try:
|
|
172
173
|
subprocess.run(command, check=True)
|
|
173
|
-
_LOGGER.debug(f"Repository cloned successfully into {
|
|
174
|
+
_LOGGER.debug(f"Repository cloned successfully into {destination}")
|
|
174
175
|
except subprocess.CalledProcessError:
|
|
175
176
|
try:
|
|
176
177
|
current_work_dir = os.getcwd()
|
esgvoc/core/service/__init__.py
CHANGED
|
@@ -1,8 +1,40 @@
|
|
|
1
|
-
from esgvoc.core.service.
|
|
1
|
+
# from esgvoc.core.service.config_register import ConfigManager
|
|
2
|
+
# from esgvoc.core.service.settings import ServiceSettings
|
|
3
|
+
# from esgvoc.core.service.state import StateService
|
|
4
|
+
#
|
|
5
|
+
# config_manager = ConfigManager()
|
|
6
|
+
# active_setting = config_manager.get_active_config()
|
|
7
|
+
# active_setting["base_dir"] = str(config_manager.config_dir / config_manager.get_active_config_name())
|
|
8
|
+
# service_settings = ServiceSettings.from_config(active_setting)
|
|
9
|
+
# state_service = StateService(service_settings)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from esgvoc.core.service.configuration.config_manager import ConfigManager
|
|
13
|
+
from esgvoc.core.service.configuration.setting import ServiceSettings
|
|
2
14
|
from esgvoc.core.service.state import StateService
|
|
3
|
-
from pathlib import Path
|
|
4
15
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
16
|
+
config_manager : ConfigManager | None = None
|
|
17
|
+
current_state : StateService | None = None
|
|
18
|
+
|
|
19
|
+
def get_config_manager():
|
|
20
|
+
global config_manager
|
|
21
|
+
if config_manager is None:
|
|
22
|
+
|
|
23
|
+
config_manager = ConfigManager(ServiceSettings, app_name="esgvoc", app_author="ipsl", default_settings=ServiceSettings.DEFAULT_SETTINGS)
|
|
24
|
+
active_config_name= config_manager.get_active_config_name()
|
|
25
|
+
config_manager.data_config_dir = config_manager.data_dir / active_config_name
|
|
26
|
+
config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
|
|
28
|
+
return config_manager
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_state():
|
|
32
|
+
global current_state
|
|
33
|
+
if config_manager is not None:
|
|
34
|
+
current_state = StateService(config_manager.get_active_config())
|
|
35
|
+
return current_state
|
|
36
|
+
|
|
37
|
+
# Singleton Access Function
|
|
38
|
+
config_manager = get_config_manager()
|
|
39
|
+
current_state = get_state()
|
|
8
40
|
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import toml
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from platformdirs import PlatformDirs
|
|
5
|
+
from typing import Type, TypeVar, Generic, Protocol
|
|
6
|
+
|
|
7
|
+
# Setup logging
|
|
8
|
+
logging.basicConfig(level=logging.INFO)
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
# Define a generic type for configuration
|
|
12
|
+
T = TypeVar("T", bound="ConfigSchema")
|
|
13
|
+
|
|
14
|
+
class ConfigSchema(Protocol):
|
|
15
|
+
"""Protocol for application-specific configuration classes."""
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def load_from_file(cls, file_path: str): ...
|
|
19
|
+
|
|
20
|
+
def save_to_file(self, file_path: str): ...
|
|
21
|
+
|
|
22
|
+
class ConfigManager(Generic[T]):
|
|
23
|
+
def __init__(self, config_cls: Type[T], app_name: str, app_author: str, default_settings : dict | None = None ):
|
|
24
|
+
"""
|
|
25
|
+
Initialize the configuration manager.
|
|
26
|
+
- config_cls: A class that implements `ConfigSchema` (e.g., ServiceSettings).
|
|
27
|
+
- app_name: Name of the application (used for directory paths).
|
|
28
|
+
- app_author: Name of the author/organization (used for directory paths).
|
|
29
|
+
"""
|
|
30
|
+
self.config_cls = config_cls
|
|
31
|
+
self.dirs = PlatformDirs(app_name, app_author)
|
|
32
|
+
|
|
33
|
+
# Define standard paths
|
|
34
|
+
self.config_dir = Path(self.dirs.user_config_path).expanduser().resolve()
|
|
35
|
+
self.data_dir = Path(self.dirs.user_data_path).expanduser().resolve()
|
|
36
|
+
self.data_config_dir = None # depends on loaded settings
|
|
37
|
+
|
|
38
|
+
self.cache_dir = Path(self.dirs.user_cache_path).expanduser().resolve()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
self.config_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
|
|
45
|
+
self.registry_path = self.config_dir / "config_registry.toml"
|
|
46
|
+
self.default_config_path = self.config_dir / "default_setting.toml"
|
|
47
|
+
self.default_settings = default_settings
|
|
48
|
+
self._init_registry()
|
|
49
|
+
|
|
50
|
+
def _init_registry(self):
|
|
51
|
+
"""Initialize the registry file if it doesn't exist."""
|
|
52
|
+
if not self.registry_path.exists():
|
|
53
|
+
logger.info("Initializing configuration registry...")
|
|
54
|
+
registry = {"configs": {"default": str(self.default_config_path)}, "active": "default"}
|
|
55
|
+
self._save_toml(self.registry_path, registry)
|
|
56
|
+
# Ensure the default settings file exists and save it if necessary
|
|
57
|
+
if not self.default_config_path.exists():
|
|
58
|
+
if self.default_settings:
|
|
59
|
+
logger.info("Saving default settings...")
|
|
60
|
+
self._save_toml(self.default_config_path, self.default_settings)
|
|
61
|
+
else:
|
|
62
|
+
logger.warning("No default settings provided.")
|
|
63
|
+
|
|
64
|
+
def _load_toml(self, path: Path) -> dict:
|
|
65
|
+
"""Load TOML data from a file."""
|
|
66
|
+
if not path.exists():
|
|
67
|
+
logger.error(f"Configuration file not found: {path}")
|
|
68
|
+
raise FileNotFoundError(f"Configuration file not found: {path}")
|
|
69
|
+
with open(path, "r") as f:
|
|
70
|
+
return toml.load(f)
|
|
71
|
+
|
|
72
|
+
def _save_toml(self, path: Path, data: dict) -> None:
|
|
73
|
+
"""Save TOML data to a file."""
|
|
74
|
+
with open(path, "w") as f:
|
|
75
|
+
toml.dump(data, f)
|
|
76
|
+
|
|
77
|
+
def _get_active_config_path(self) -> Path:
|
|
78
|
+
"""Retrieve the path of the active configuration file."""
|
|
79
|
+
registry = self._load_toml(self.registry_path)
|
|
80
|
+
active_config_name = registry["active"]
|
|
81
|
+
return Path(registry["configs"][active_config_name])
|
|
82
|
+
|
|
83
|
+
def get_config(self, config_name:str) -> T:
|
|
84
|
+
"""Load the configuration as an instance of the given config schema."""
|
|
85
|
+
registry = self._load_toml(self.registry_path)
|
|
86
|
+
if config_name not in registry["configs"]:
|
|
87
|
+
logger.error(f"Config '{config_name}' not found in registry.")
|
|
88
|
+
raise ValueError(f"Config '{config_name}' not found in registry.")
|
|
89
|
+
config_path = self.config_cls.load_from_file(registry["configs"][config_name])
|
|
90
|
+
return self.config_cls.load_from_file(str(config_path))
|
|
91
|
+
|
|
92
|
+
def get_active_config(self) -> T:
|
|
93
|
+
"""Load the active configuration as an instance of the given config schema."""
|
|
94
|
+
active_config_path = self._get_active_config_path()
|
|
95
|
+
|
|
96
|
+
return self.config_cls.load_from_file(str(active_config_path))
|
|
97
|
+
|
|
98
|
+
def get_active_config_name(self) -> str:
|
|
99
|
+
"""Retrieve the config name from the registry"""
|
|
100
|
+
registry = self._load_toml(self.registry_path)
|
|
101
|
+
return registry["active"]
|
|
102
|
+
|
|
103
|
+
def save_config(self, config_data: dict, name: str | None = None) -> None:
|
|
104
|
+
"""Save the modified configuration to the corresponding file and update the registry."""
|
|
105
|
+
|
|
106
|
+
if name:
|
|
107
|
+
# If a name is provided, save the configuration with that name
|
|
108
|
+
config_path = self.config_dir / f"{name}.toml"
|
|
109
|
+
self._save_toml(config_path, config_data)
|
|
110
|
+
|
|
111
|
+
# Update the registry with the new config name
|
|
112
|
+
registry = self._load_toml(self.registry_path)
|
|
113
|
+
registry["configs"][name] = str(config_path)
|
|
114
|
+
registry["active"] = name
|
|
115
|
+
self._save_toml(self.registry_path, registry)
|
|
116
|
+
|
|
117
|
+
logger.info(f"Saved configuration to {config_path} and updated registry.")
|
|
118
|
+
else:
|
|
119
|
+
# If no name is provided, give the user a default name, like "user_config"
|
|
120
|
+
default_name = "user_config"
|
|
121
|
+
config_path = self.config_dir / f"{default_name}.toml"
|
|
122
|
+
|
|
123
|
+
# Check if the user_config already exists, if so, warn them
|
|
124
|
+
if config_path.exists():
|
|
125
|
+
logger.warning(f"{default_name}.toml already exists. Overwriting with the new config.")
|
|
126
|
+
|
|
127
|
+
# Save the configuration with the default name
|
|
128
|
+
self._save_toml(config_path, config_data)
|
|
129
|
+
|
|
130
|
+
# Update the registry with the new config name
|
|
131
|
+
registry = self._load_toml(self.registry_path)
|
|
132
|
+
registry["configs"][default_name] = str(config_path)
|
|
133
|
+
registry["active"] = default_name
|
|
134
|
+
self._save_toml(self.registry_path, registry)
|
|
135
|
+
|
|
136
|
+
logger.info(f"Saved new configuration to {config_path} and updated registry.")
|
|
137
|
+
|
|
138
|
+
def save_active_config(self, config: T):
|
|
139
|
+
"""Save the current configuration to the active file."""
|
|
140
|
+
active_config_path = self._get_active_config_path()
|
|
141
|
+
config.save_to_file(str(active_config_path))
|
|
142
|
+
|
|
143
|
+
def switch_config(self, config_name: str):
|
|
144
|
+
"""Switch to a different configuration."""
|
|
145
|
+
registry = self._load_toml(self.registry_path)
|
|
146
|
+
if config_name not in registry["configs"]:
|
|
147
|
+
logger.error(f"Config '{config_name}' not found in registry.")
|
|
148
|
+
raise ValueError(f"Config '{config_name}' not found in registry.")
|
|
149
|
+
registry["active"] = config_name
|
|
150
|
+
|
|
151
|
+
self._save_toml(self.registry_path, registry)
|
|
152
|
+
logger.info(f"Switched to configuration: {config_name}")
|
|
153
|
+
|
|
154
|
+
def list_configs(self) -> dict:
|
|
155
|
+
"""Return a list of available configurations."""
|
|
156
|
+
return self._load_toml(self.registry_path)["configs"]
|
|
157
|
+
|
|
158
|
+
def add_config(self, config_name: str, config_data: dict):
|
|
159
|
+
"""Add a new configuration."""
|
|
160
|
+
registry = self._load_toml(self.registry_path)
|
|
161
|
+
if config_name in registry["configs"]:
|
|
162
|
+
raise ValueError(f"Config '{config_name}' already exists.")
|
|
163
|
+
config_path = self.config_dir / f"{config_name}.toml"
|
|
164
|
+
self._save_toml(config_path, config_data)
|
|
165
|
+
registry["configs"][config_name] = str(config_path)
|
|
166
|
+
self._save_toml(self.registry_path, registry)
|
|
167
|
+
|
|
168
|
+
def remove_config(self, config_name: str):
|
|
169
|
+
"""Remove a configuration."""
|
|
170
|
+
registry = self._load_toml(self.registry_path)
|
|
171
|
+
if config_name == "default":
|
|
172
|
+
raise ValueError("Cannot remove the default configuration.")
|
|
173
|
+
if config_name not in registry["configs"]:
|
|
174
|
+
raise ValueError(f"Config '{config_name}' not found.")
|
|
175
|
+
del registry["configs"][config_name]
|
|
176
|
+
config_path = self.config_dir / f"{config_name}.toml"
|
|
177
|
+
config_path.unlink()
|
|
178
|
+
|
|
179
|
+
self._save_toml(self.registry_path, registry)
|
|
180
|
+
logger.info(f"Removed configuration: {config_name}")
|
|
181
|
+
if registry["active"] not in registry["configs"]:
|
|
182
|
+
self.switch_config("default")
|
|
183
|
+
logger.info("active configuration doesnot exist anymore : Switch to default configuration")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
|