esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +95 -60
- esgvoc/api/data_descriptors/__init__.py +50 -28
- esgvoc/api/data_descriptors/activity.py +3 -3
- esgvoc/api/data_descriptors/area_label.py +16 -1
- esgvoc/api/data_descriptors/branded_suffix.py +20 -0
- esgvoc/api/data_descriptors/branded_variable.py +12 -0
- esgvoc/api/data_descriptors/consortium.py +14 -13
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +6 -0
- esgvoc/api/data_descriptors/creation_date.py +5 -0
- esgvoc/api/data_descriptors/data_descriptor.py +14 -9
- esgvoc/api/data_descriptors/data_specs_version.py +5 -0
- esgvoc/api/data_descriptors/date.py +1 -1
- esgvoc/api/data_descriptors/directory_date.py +1 -1
- esgvoc/api/data_descriptors/experiment.py +13 -11
- esgvoc/api/data_descriptors/forcing_index.py +1 -1
- esgvoc/api/data_descriptors/frequency.py +3 -3
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid_label.py +2 -2
- esgvoc/api/data_descriptors/horizontal_label.py +15 -1
- esgvoc/api/data_descriptors/initialisation_index.py +1 -1
- esgvoc/api/data_descriptors/institution.py +8 -5
- esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
- esgvoc/api/data_descriptors/license.py +3 -3
- esgvoc/api/data_descriptors/mip_era.py +1 -1
- esgvoc/api/data_descriptors/model_component.py +1 -1
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +1 -1
- esgvoc/api/data_descriptors/physic_index.py +1 -1
- esgvoc/api/data_descriptors/product.py +2 -2
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realisation_index.py +1 -1
- esgvoc/api/data_descriptors/realm.py +1 -1
- esgvoc/api/data_descriptors/region.py +5 -0
- esgvoc/api/data_descriptors/resolution.py +3 -3
- esgvoc/api/data_descriptors/source.py +9 -5
- esgvoc/api/data_descriptors/source_type.py +1 -1
- esgvoc/api/data_descriptors/table.py +3 -2
- esgvoc/api/data_descriptors/temporal_label.py +15 -1
- esgvoc/api/data_descriptors/time_range.py +4 -3
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +5 -0
- esgvoc/api/data_descriptors/variable.py +25 -12
- esgvoc/api/data_descriptors/variant_label.py +3 -3
- esgvoc/api/data_descriptors/vertical_label.py +14 -0
- esgvoc/api/project_specs.py +120 -4
- esgvoc/api/projects.py +733 -505
- esgvoc/api/py.typed +0 -0
- esgvoc/api/report.py +12 -8
- esgvoc/api/search.py +168 -98
- esgvoc/api/universe.py +368 -157
- esgvoc/apps/drs/constants.py +1 -1
- esgvoc/apps/drs/generator.py +51 -69
- esgvoc/apps/drs/report.py +60 -15
- esgvoc/apps/drs/validator.py +60 -71
- esgvoc/apps/jsg/cmip6_template.json +74 -0
- esgvoc/apps/jsg/cmip6plus_template.json +74 -0
- esgvoc/apps/jsg/json_schema_generator.py +185 -0
- esgvoc/apps/py.typed +0 -0
- esgvoc/cli/config.py +500 -0
- esgvoc/cli/drs.py +3 -2
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +46 -38
- esgvoc/cli/main.py +10 -3
- esgvoc/cli/status.py +27 -18
- esgvoc/cli/valid.py +10 -15
- esgvoc/core/constants.py +1 -1
- esgvoc/core/db/__init__.py +2 -4
- esgvoc/core/db/connection.py +5 -3
- esgvoc/core/db/models/project.py +57 -15
- esgvoc/core/db/models/universe.py +49 -10
- esgvoc/core/db/project_ingestion.py +79 -65
- esgvoc/core/db/universe_ingestion.py +71 -40
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +24 -2
- esgvoc/core/repo_fetcher.py +61 -59
- esgvoc/core/service/data_merger.py +47 -34
- esgvoc/core/service/state.py +107 -83
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
- esgvoc-1.0.0.dist-info/RECORD +95 -0
- esgvoc/api/_utils.py +0 -53
- esgvoc/core/logging.conf +0 -21
- esgvoc-0.3.0.dist-info/RECORD +0 -78
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
- {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
import esgvoc.core.constants
|
|
5
|
-
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
-
from esgvoc.core.db.connection import DBConnection
|
|
7
|
-
from esgvoc.core.service.data_merger import DataMerger
|
|
8
|
-
from esgvoc.core.db.models.mixins import TermKind
|
|
9
4
|
from pydantic import BaseModel
|
|
5
|
+
from sqlalchemy import text
|
|
10
6
|
|
|
7
|
+
import esgvoc.core.constants
|
|
11
8
|
import esgvoc.core.db.connection as db
|
|
12
|
-
|
|
13
|
-
from esgvoc.core.
|
|
14
|
-
|
|
9
|
+
import esgvoc.core.service as service
|
|
10
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
11
|
+
from esgvoc.core.db.connection import DBConnection, read_json_file
|
|
12
|
+
from esgvoc.core.db.models.mixins import TermKind
|
|
13
|
+
from esgvoc.core.db.models.project import PCollection, Project, PTerm
|
|
14
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
15
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
15
16
|
|
|
17
|
+
_LOGGER = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
|
-
_LOGGER = logging.getLogger("project_ingestion")
|
|
18
19
|
|
|
19
20
|
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
20
21
|
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
@@ -25,67 +26,69 @@ def infer_term_kind(json_specs: dict) -> TermKind:
|
|
|
25
26
|
return TermKind.PLAIN
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
def ingest_metadata_project(connection:DBConnection,git_hash):
|
|
29
|
+
def ingest_metadata_project(connection: DBConnection, git_hash):
|
|
29
30
|
with connection.create_session() as session:
|
|
30
|
-
project = Project(id=str(connection.file_path.stem), git_hash=git_hash,specs={})
|
|
31
|
-
session.add(project)
|
|
31
|
+
project = Project(id=str(connection.file_path.stem), git_hash=git_hash, specs={})
|
|
32
|
+
session.add(project)
|
|
32
33
|
session.commit()
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
def get_data_descriptor_id_from_context(collection_context: dict) -> str:
|
|
36
|
-
data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][
|
|
37
|
+
data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][
|
|
38
|
+
esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY
|
|
39
|
+
] # noqa E211
|
|
37
40
|
return Path(data_descriptor_url).name
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
def instantiate_project_term(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
def instantiate_project_term(
|
|
44
|
+
universe_term_json_specs: dict, project_term_json_specs_update: dict, pydantic_class: type[BaseModel]
|
|
45
|
+
) -> dict:
|
|
43
46
|
term_from_universe = pydantic_class(**universe_term_json_specs)
|
|
44
|
-
updated_term = term_from_universe.model_copy(
|
|
45
|
-
update=project_term_json_specs_update, deep=True
|
|
46
|
-
)
|
|
47
|
+
updated_term = term_from_universe.model_copy(update=project_term_json_specs_update, deep=True)
|
|
47
48
|
return updated_term.model_dump()
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
def ingest_collection(collection_dir_path: Path,
|
|
51
|
-
project: Project,
|
|
52
|
-
project_db_session) -> None:
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
def ingest_collection(collection_dir_path: Path, project: Project, project_db_session) -> None:
|
|
55
52
|
collection_id = collection_dir_path.name
|
|
56
53
|
collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
57
54
|
try:
|
|
58
55
|
collection_context = read_json_file(collection_context_file_path)
|
|
59
56
|
data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
|
|
60
57
|
except Exception as e:
|
|
61
|
-
msg = f
|
|
58
|
+
msg = f"unable to read project context file {collection_context_file_path}"
|
|
62
59
|
_LOGGER.fatal(msg)
|
|
63
|
-
raise
|
|
60
|
+
raise EsgvocDbError(msg) from e
|
|
64
61
|
# [KEEP]
|
|
65
|
-
collection =
|
|
62
|
+
collection = PCollection(
|
|
66
63
|
id=collection_id,
|
|
67
64
|
context=collection_context,
|
|
68
65
|
project=project,
|
|
69
66
|
data_descriptor_id=data_descriptor_id,
|
|
70
|
-
term_kind=""
|
|
67
|
+
term_kind="",
|
|
68
|
+
) # We ll know it only when we ll add a term
|
|
69
|
+
# (hypothesis all term have the same kind in a collection) # noqa E116
|
|
71
70
|
term_kind_collection = None
|
|
72
71
|
|
|
73
72
|
for term_file_path in collection_dir_path.iterdir():
|
|
74
73
|
_LOGGER.debug(f"found term path : {term_file_path}")
|
|
75
|
-
if term_file_path.is_file() and term_file_path.suffix==".json":
|
|
74
|
+
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
76
75
|
try:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
76
|
+
locally_avail = {
|
|
77
|
+
"https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
|
|
78
|
+
}
|
|
79
|
+
json_specs = DataMerger(
|
|
80
|
+
data=JsonLdResource(uri=str(term_file_path)),
|
|
81
|
+
# locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
|
|
82
|
+
locally_available=locally_avail,
|
|
83
|
+
).merge_linked_json()[-1]
|
|
81
84
|
term_kind = infer_term_kind(json_specs)
|
|
82
85
|
term_id = json_specs["id"]
|
|
83
86
|
|
|
84
87
|
if term_kind_collection is None:
|
|
85
88
|
term_kind_collection = term_kind
|
|
86
|
-
|
|
89
|
+
|
|
87
90
|
except Exception as e:
|
|
88
|
-
_LOGGER.warning(f
|
|
91
|
+
_LOGGER.warning(f"Unable to read term {term_file_path}. Skip.\n{str(e)}")
|
|
89
92
|
continue
|
|
90
93
|
try:
|
|
91
94
|
term = PTerm(
|
|
@@ -105,51 +108,62 @@ def ingest_collection(collection_dir_path: Path,
|
|
|
105
108
|
collection.term_kind = term_kind_collection
|
|
106
109
|
project_db_session.add(collection)
|
|
107
110
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
git_hash : str
|
|
111
|
-
):
|
|
111
|
+
|
|
112
|
+
def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
|
|
112
113
|
try:
|
|
113
114
|
project_connection = db.DBConnection(project_db_file_path)
|
|
114
115
|
except Exception as e:
|
|
115
|
-
msg = f
|
|
116
|
+
msg = f"unable to read project SQLite file at {project_db_file_path}"
|
|
116
117
|
_LOGGER.fatal(msg)
|
|
117
|
-
raise
|
|
118
|
-
|
|
118
|
+
raise EsgvocDbError(msg) from e
|
|
119
|
+
|
|
119
120
|
with project_connection.create_session() as project_db_session:
|
|
121
|
+
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
120
122
|
try:
|
|
121
|
-
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
122
123
|
project_json_specs = read_json_file(project_specs_file_path)
|
|
123
124
|
project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
|
|
124
125
|
except Exception as e:
|
|
125
|
-
msg = f
|
|
126
|
+
msg = f"unable to read project specs file {project_specs_file_path}"
|
|
126
127
|
_LOGGER.fatal(msg)
|
|
127
|
-
raise
|
|
128
|
-
|
|
129
|
-
project = Project(id=project_id, specs=project_json_specs,git_hash=git_hash)
|
|
128
|
+
raise EsgvocDbError(msg) from e
|
|
129
|
+
|
|
130
|
+
project = Project(id=project_id, specs=project_json_specs, git_hash=git_hash)
|
|
130
131
|
project_db_session.add(project)
|
|
131
|
-
|
|
132
132
|
|
|
133
133
|
for collection_dir_path in project_dir_path.iterdir():
|
|
134
|
-
|
|
134
|
+
# TODO maybe put that in settings
|
|
135
|
+
if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists():
|
|
135
136
|
_LOGGER.debug(f"found collection dir : {collection_dir_path}")
|
|
136
137
|
try:
|
|
137
|
-
ingest_collection(collection_dir_path,
|
|
138
|
-
project,
|
|
139
|
-
project_db_session)
|
|
138
|
+
ingest_collection(collection_dir_path, project, project_db_session)
|
|
140
139
|
except Exception as e:
|
|
141
|
-
msg = f
|
|
140
|
+
msg = f"unexpected error while ingesting collection {collection_dir_path}"
|
|
142
141
|
_LOGGER.fatal(msg)
|
|
143
|
-
raise
|
|
142
|
+
raise EsgvocDbError(msg) from e
|
|
144
143
|
project_db_session.commit()
|
|
145
144
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
145
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
146
|
+
# Read: https://sqlite.org/fts5.html
|
|
147
|
+
try:
|
|
148
|
+
sql_query = (
|
|
149
|
+
"INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) " # noqa: S608
|
|
150
|
+
+ "SELECT pk, id, specs, kind, collection_pk FROM pterms;"
|
|
151
|
+
)
|
|
152
|
+
project_db_session.exec(text(sql_query)) # type: ignore
|
|
153
|
+
except Exception as e:
|
|
154
|
+
msg = f"unable to insert rows into pterms_fts5 table for {project_db_file_path}"
|
|
155
|
+
_LOGGER.fatal(msg)
|
|
156
|
+
raise EsgvocDbError(msg) from e
|
|
157
|
+
project_db_session.commit()
|
|
158
|
+
try:
|
|
159
|
+
sql_query = (
|
|
160
|
+
"INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, " # noqa: S608
|
|
161
|
+
+ "project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, "
|
|
162
|
+
+ "project_pk, term_kind FROM pcollections;"
|
|
163
|
+
)
|
|
164
|
+
project_db_session.exec(text(sql_query)) # type: ignore
|
|
165
|
+
except Exception as e:
|
|
166
|
+
msg = f"unable to insert rows into pcollections_fts5 table for {project_db_file_path}"
|
|
167
|
+
_LOGGER.fatal(msg)
|
|
168
|
+
raise EsgvocDbError(msg) from e
|
|
169
|
+
project_db_session.commit()
|
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
from esgvoc.core.data_handler import JsonLdResource
|
|
6
|
-
from esgvoc.core.service.data_merger import DataMerger
|
|
4
|
+
from sqlalchemy import text
|
|
7
5
|
from sqlmodel import Session, select
|
|
8
6
|
|
|
7
|
+
import esgvoc.core.constants
|
|
9
8
|
import esgvoc.core.db.connection as db
|
|
9
|
+
import esgvoc.core.service as service
|
|
10
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
10
11
|
from esgvoc.core.db.connection import read_json_file
|
|
11
12
|
from esgvoc.core.db.models.mixins import TermKind
|
|
12
|
-
from esgvoc.core.db.models.universe import UDataDescriptor, UTerm,
|
|
13
|
-
from esgvoc.core.
|
|
14
|
-
|
|
13
|
+
from esgvoc.core.db.models.universe import UDataDescriptor, Universe, UTerm, universe_create_db
|
|
14
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
15
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
15
16
|
|
|
16
17
|
_LOGGER = logging.getLogger(__name__)
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
19
21
|
if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
|
|
20
22
|
return TermKind.PATTERN
|
|
@@ -28,94 +30,123 @@ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -
|
|
|
28
30
|
try:
|
|
29
31
|
connection = db.DBConnection(universe_db_file_path)
|
|
30
32
|
except Exception as e:
|
|
31
|
-
msg = f
|
|
33
|
+
msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
|
|
32
34
|
_LOGGER.fatal(msg)
|
|
33
35
|
raise IOError(msg) from e
|
|
34
36
|
|
|
35
|
-
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
36
|
-
if
|
|
37
|
+
for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
|
|
38
|
+
if (
|
|
39
|
+
data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
|
|
40
|
+
): # TODO may be put that in setting
|
|
37
41
|
try:
|
|
38
42
|
ingest_data_descriptor(data_descriptor_dir_path, connection)
|
|
39
43
|
except Exception as e:
|
|
40
|
-
msg = f
|
|
44
|
+
msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
|
|
41
45
|
_LOGGER.fatal(msg)
|
|
42
|
-
raise
|
|
43
|
-
|
|
44
|
-
def ingest_metadata_universe(connection,git_hash):
|
|
46
|
+
raise EsgvocDbError(msg) from e
|
|
47
|
+
|
|
45
48
|
with connection.create_session() as session:
|
|
46
|
-
|
|
47
|
-
|
|
49
|
+
# Well, the following instructions are not data duplication. It is more building an index.
|
|
50
|
+
# Read: https://sqlite.org/fts5.html
|
|
51
|
+
try:
|
|
52
|
+
sql_query = (
|
|
53
|
+
"INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
|
|
54
|
+
+ "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
|
|
55
|
+
) # noqa: S608
|
|
56
|
+
session.exec(text(sql_query)) # type: ignore
|
|
57
|
+
except Exception as e:
|
|
58
|
+
msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
|
|
59
|
+
_LOGGER.fatal(msg)
|
|
60
|
+
raise EsgvocDbError(msg) from e
|
|
61
|
+
session.commit()
|
|
62
|
+
try:
|
|
63
|
+
sql_query = (
|
|
64
|
+
"INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
|
|
65
|
+
+ "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
|
|
66
|
+
) # noqa: S608
|
|
67
|
+
session.exec(text(sql_query)) # type: ignore
|
|
68
|
+
except Exception as e:
|
|
69
|
+
msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
|
|
70
|
+
_LOGGER.fatal(msg)
|
|
71
|
+
raise EsgvocDbError(msg) from e
|
|
48
72
|
session.commit()
|
|
49
73
|
|
|
50
|
-
def ingest_data_descriptor(data_descriptor_path: Path,
|
|
51
|
-
connection: db.DBConnection) -> None:
|
|
52
74
|
|
|
75
|
+
def ingest_metadata_universe(connection, git_hash):
|
|
76
|
+
with connection.create_session() as session:
|
|
77
|
+
universe = Universe(git_hash=git_hash)
|
|
78
|
+
session.add(universe)
|
|
79
|
+
session.commit()
|
|
53
80
|
|
|
81
|
+
|
|
82
|
+
def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
|
|
54
83
|
data_descriptor_id = data_descriptor_path.name
|
|
55
84
|
|
|
56
85
|
context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
57
86
|
try:
|
|
58
87
|
context = read_json_file(context_file_path)
|
|
59
88
|
except Exception as e:
|
|
60
|
-
msg = f
|
|
61
|
-
{data_descriptor_id}. Skip.\n{str(e)}
|
|
89
|
+
msg = f"Unable to read the context file {context_file_path} of data descriptor \
|
|
90
|
+
{data_descriptor_id}. Skip.\n{str(e)}"
|
|
62
91
|
_LOGGER.warning(msg)
|
|
63
|
-
return
|
|
92
|
+
return
|
|
64
93
|
|
|
65
94
|
with connection.create_session() as session:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
term_kind="") # we ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
|
|
95
|
+
# We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
|
|
96
|
+
data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
|
|
69
97
|
term_kind_dd = None
|
|
70
98
|
|
|
71
99
|
_LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
|
|
72
100
|
for term_file_path in data_descriptor_path.iterdir():
|
|
73
101
|
_LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
|
|
74
102
|
if term_file_path.is_file() and term_file_path.suffix == ".json":
|
|
75
|
-
|
|
76
103
|
try:
|
|
77
|
-
|
|
78
|
-
|
|
104
|
+
locally_available = {
|
|
105
|
+
"https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
json_specs = DataMerger(
|
|
109
|
+
data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
|
|
110
|
+
).merge_linked_json()[-1]
|
|
111
|
+
|
|
79
112
|
term_kind = infer_term_kind(json_specs)
|
|
80
113
|
term_id = json_specs["id"]
|
|
81
114
|
|
|
82
115
|
if term_kind_dd is None:
|
|
83
116
|
term_kind_dd = term_kind
|
|
84
|
-
|
|
85
117
|
except Exception as e:
|
|
86
|
-
_LOGGER.warning(
|
|
118
|
+
_LOGGER.warning(
|
|
119
|
+
f"Unable to read term {term_file_path} for data descriptor "
|
|
120
|
+
+ f"{data_descriptor_path}. Skip.\n{str(e)}"
|
|
121
|
+
)
|
|
87
122
|
continue
|
|
88
123
|
if term_id and json_specs and data_descriptor and term_kind:
|
|
89
|
-
_LOGGER.debug("adding {term_id}")
|
|
124
|
+
_LOGGER.debug(f"adding {term_id}")
|
|
90
125
|
term = UTerm(
|
|
91
126
|
id=term_id,
|
|
92
127
|
specs=json_specs,
|
|
93
128
|
data_descriptor=data_descriptor,
|
|
94
129
|
kind=term_kind,
|
|
95
130
|
)
|
|
131
|
+
|
|
96
132
|
session.add(term)
|
|
97
133
|
if term_kind_dd is not None:
|
|
98
134
|
data_descriptor.term_kind = term_kind_dd
|
|
99
135
|
session.add(data_descriptor)
|
|
100
136
|
session.commit()
|
|
101
137
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
statement = (
|
|
106
|
-
select(UTerm)
|
|
107
|
-
.join(UDataDescriptor)
|
|
108
|
-
.where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
109
|
-
)
|
|
138
|
+
|
|
139
|
+
def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
|
|
140
|
+
statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
|
|
110
141
|
results = universe_db_session.exec(statement)
|
|
111
142
|
term = results.one()
|
|
112
143
|
return term.kind, term.specs
|
|
113
144
|
|
|
114
145
|
|
|
115
146
|
if __name__ == "__main__":
|
|
116
|
-
#ingest_universe(db.UNIVERSE_DIR_PATH, db.UNIVERSE_DB_FILE_PATH)
|
|
117
147
|
import os
|
|
148
|
+
|
|
118
149
|
root_dir = Path(str(os.getcwd())).parent.parent
|
|
119
150
|
print(root_dir)
|
|
120
|
-
universe_create_db(root_dir /
|
|
121
|
-
ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"),root_dir /
|
|
151
|
+
universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
|
|
152
|
+
ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"), root_dir / Path(".cache/dbs/universe.sqlite"))
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class EsgvocException(Exception):
|
|
2
|
+
"""
|
|
3
|
+
Class base of all ESGVOC errors.
|
|
4
|
+
"""
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EsgvocNotFoundError(EsgvocException):
|
|
9
|
+
"""
|
|
10
|
+
Represents the not found errors.
|
|
11
|
+
"""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EsgvocValueError(EsgvocException):
|
|
16
|
+
"""
|
|
17
|
+
Represents value errors.
|
|
18
|
+
"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EsgvocDbError(EsgvocException):
|
|
23
|
+
"""
|
|
24
|
+
Represents errors relative to data base management.
|
|
25
|
+
"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EsgvocNotImplementedError(EsgvocException):
|
|
30
|
+
"""
|
|
31
|
+
Represents not implemented errors.
|
|
32
|
+
"""
|
|
33
|
+
pass
|
esgvoc/core/logging_handler.py
CHANGED
|
@@ -1,4 +1,26 @@
|
|
|
1
1
|
import logging.config
|
|
2
|
-
from pathlib import Path
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
LOGGING_CONFIG = {
|
|
4
|
+
'version': 1,
|
|
5
|
+
'disable_existing_loggers': False,
|
|
6
|
+
'formatters': {
|
|
7
|
+
'esgvoc_formatter': {
|
|
8
|
+
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
|
9
|
+
},
|
|
10
|
+
},
|
|
11
|
+
'handlers': {
|
|
12
|
+
'esgvoc_stdout': {
|
|
13
|
+
'class': 'logging.StreamHandler',
|
|
14
|
+
'formatter': 'esgvoc_formatter',
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
'loggers': {
|
|
18
|
+
'esgvoc': {
|
|
19
|
+
'handlers': ['esgvoc_stdout'],
|
|
20
|
+
'level': 'ERROR',
|
|
21
|
+
'propagate': False,
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
logging.config.dictConfig(LOGGING_CONFIG)
|