esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,231 @@
1
+ import logging
2
+ import traceback
3
+ from pathlib import Path
4
+
5
+ from pydantic import BaseModel
6
+ from sqlalchemy import text
7
+
8
+ import esgvoc.core.constants
9
+ import esgvoc.core.db.connection as db
10
+ import esgvoc.core.service as service
11
+ from esgvoc.core.data_handler import JsonLdResource
12
+ from esgvoc.core.db.connection import DBConnection, read_json_file, read_yaml_file
13
+ from esgvoc.core.db.models.mixins import TermKind
14
+ from esgvoc.core.db.models.project import PCollection, Project, PTerm
15
+ from esgvoc.core.exceptions import EsgvocDbError
16
+ from esgvoc.core.service.data_merger import DataMerger
17
+
18
+ _LOGGER = logging.getLogger(__name__)
19
+
20
+
21
+ def infer_term_kind(json_specs: dict) -> TermKind:
22
+ if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
23
+ return TermKind.PATTERN
24
+ elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
25
+ return TermKind.COMPOSITE
26
+ else:
27
+ return TermKind.PLAIN
28
+
29
+
30
+ def ingest_metadata_project(connection: DBConnection, git_hash):
31
+ with connection.create_session() as session:
32
+ project = Project(id=str(connection.file_path.stem), git_hash=git_hash, specs={})
33
+ session.add(project)
34
+ session.commit()
35
+
36
+
37
+ def get_data_descriptor_id_from_context(collection_context: dict) -> str:
38
+ data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][
39
+ esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY
40
+ ] # noqa E211
41
+ return Path(data_descriptor_url).name
42
+
43
+
44
+ def instantiate_project_term(
45
+ universe_term_json_specs: dict, project_term_json_specs_update: dict, pydantic_class: type[BaseModel]
46
+ ) -> dict:
47
+ term_from_universe = pydantic_class(**universe_term_json_specs)
48
+ updated_term = term_from_universe.model_copy(update=project_term_json_specs_update, deep=True)
49
+ return updated_term.model_dump()
50
+
51
+
52
+ def ingest_collection(collection_dir_path: Path, project: Project, project_db_session) -> None:
53
+ collection_id = collection_dir_path.name
54
+ collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
55
+ try:
56
+ collection_context = read_json_file(collection_context_file_path)
57
+ data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
58
+ except Exception as e:
59
+ msg = f"unable to read project context file {collection_context_file_path}"
60
+ _LOGGER.fatal(msg)
61
+ raise EsgvocDbError(msg) from e
62
+ # [KEEP]
63
+ collection = PCollection(
64
+ id=collection_id,
65
+ context=collection_context,
66
+ project=project,
67
+ data_descriptor_id=data_descriptor_id,
68
+ term_kind="",
69
+ ) # We ll know it only when we ll add a term
70
+ # (hypothesis all term have the same kind in a collection) # noqa E116
71
+ term_kind_collection = None
72
+
73
+ for term_file_path in collection_dir_path.iterdir():
74
+ _LOGGER.debug(f"found term path : {term_file_path}")
75
+ if term_file_path.is_file() and term_file_path.suffix == ".json":
76
+ try:
77
+ # Map both universe and project URLs to their local paths
78
+ locally_avail = {
79
+ "https://esgvoc.ipsl.fr/resource/universe": service.current_state.universe.local_path,
80
+ f"https://esgvoc.ipsl.fr/resource/{project.id}": str(collection_dir_path.parent),
81
+ }
82
+ merger = DataMerger(
83
+ data=JsonLdResource(uri=str(term_file_path)),
84
+ locally_available=locally_avail,
85
+ allowed_base_uris={
86
+ "https://esgvoc.ipsl.fr/resource/universe",
87
+ f"https://esgvoc.ipsl.fr/resource/{project.id}",
88
+ },
89
+ )
90
+ merged_data = merger.merge_linked_json()[-1]
91
+ # Resolve all nested @id references using merged context
92
+ # IMPORTANT: Use universe path for context because:
93
+ # 1. Universe context defines the data structure and esgvoc_resolve_modes
94
+ # 2. Project terms are typically lightweight references to universe terms
95
+ # 3. Even when overriding, the type definition (and resolve modes) live in universe
96
+ json_specs = merger.resolve_merged_ids(
97
+ merged_data, context_base_path=service.current_state.universe.local_path
98
+ )
99
+
100
+ term_kind = infer_term_kind(json_specs)
101
+ term_id = json_specs["id"]
102
+
103
+ if term_kind_collection is None:
104
+ term_kind_collection = term_kind
105
+
106
+ except Exception as e:
107
+ _LOGGER.error(
108
+ f"❌ INGESTION FAILURE - Term skipped\n"
109
+ f" File: {term_file_path}\n"
110
+ f" Collection: {collection_id}\n"
111
+ f" Project: {project.id}\n"
112
+ f" Error Type: {type(e).__name__}\n"
113
+ f" Error Message: {str(e)}\n"
114
+ f" Full Traceback:\n{traceback.format_exc()}"
115
+ )
116
+ continue
117
+ try:
118
+ term = PTerm(
119
+ id=term_id,
120
+ specs=json_specs,
121
+ collection=collection,
122
+ kind=term_kind,
123
+ )
124
+ project_db_session.add(term)
125
+ except Exception as e:
126
+ _LOGGER.error(
127
+ f"❌ DATABASE INSERTION FAILURE\n"
128
+ f" Term ID: {term_id}\n"
129
+ f" Collection: {collection_id}\n"
130
+ f" Project: {project.id}\n"
131
+ f" Error Type: {type(e).__name__}\n"
132
+ f" Error Message: {str(e)}\n"
133
+ f" Full Traceback:\n{traceback.format_exc()}"
134
+ )
135
+ continue
136
+ # Report ingestion results for this collection
137
+ json_file_count = len([f for f in collection_dir_path.glob("*.json")])
138
+ ingested_term_count = len([t for t in collection.terms])
139
+ _LOGGER.info(
140
+ f"Collection '{collection_id}' in project '{project.id}': "
141
+ f"{ingested_term_count}/{json_file_count} terms ingested"
142
+ )
143
+ if ingested_term_count < json_file_count:
144
+ _LOGGER.warning(
145
+ f"⚠️ {json_file_count - ingested_term_count} term(s) failed to ingest "
146
+ f"in collection '{collection_id}'. See error messages above."
147
+ )
148
+ if term_kind_collection is not None:
149
+ collection.term_kind = term_kind_collection
150
+ else:
151
+ # If no terms were found, default to PLAIN
152
+ _LOGGER.warning(
153
+ f"TermKind was not auto-detected for collection '{collection_id}' in project '{project.id}'. "
154
+ f"No terms were successfully ingested. Defaulting to PLAIN."
155
+ )
156
+ collection.term_kind = TermKind.PLAIN
157
+ project_db_session.add(collection)
158
+
159
+
160
+ def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
161
+ try:
162
+ project_connection = db.DBConnection(project_db_file_path)
163
+ except Exception as e:
164
+ msg = f"unable to read project SQLite file at {project_db_file_path}"
165
+ _LOGGER.fatal(msg)
166
+ raise EsgvocDbError(msg) from e
167
+
168
+ with project_connection.create_session() as project_db_session:
169
+ project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
170
+
171
+ drs_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.DRS_SPECS_FILENAME)
172
+ catalog_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.CATALOG_SPECS_FILENAME)
173
+ attr_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.ATTRIBUTES_SPECS_FILENAME)
174
+ try:
175
+ raw_project_specs = read_yaml_file(project_specs_file_path)
176
+ project_id = raw_project_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
177
+ project_specs = raw_project_specs
178
+ if drs_specs_file_path.exists():
179
+ raw_drs_specs = read_yaml_file(drs_specs_file_path)
180
+ project_specs["drs_specs"] = raw_drs_specs
181
+ if catalog_specs_file_path.exists():
182
+ raw_catalog_specs = read_yaml_file(catalog_specs_file_path)
183
+ project_specs["catalog_specs"] = raw_catalog_specs
184
+ if attr_specs_file_path.exists():
185
+ raw_attr_specs = read_yaml_file(attr_specs_file_path)
186
+ project_specs["attr_specs"] = raw_attr_specs
187
+ except Exception as e:
188
+ msg = f"unable to read specs files in {project_dir_path}"
189
+ _LOGGER.fatal(msg)
190
+ raise EsgvocDbError(msg) from e
191
+
192
+ project = Project(id=project_id, specs=project_specs, git_hash=git_hash)
193
+ project_db_session.add(project)
194
+
195
+ for collection_dir_path in project_dir_path.iterdir():
196
+ # TODO maybe put that in settings
197
+ if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists():
198
+ _LOGGER.debug(f"found collection dir : {collection_dir_path}")
199
+ try:
200
+ ingest_collection(collection_dir_path, project, project_db_session)
201
+ except Exception as e:
202
+ msg = f"unexpected error while ingesting collection {collection_dir_path}"
203
+ _LOGGER.fatal(msg)
204
+ raise EsgvocDbError(msg) from e
205
+ project_db_session.commit()
206
+
207
+ # Well, the following instructions are not data duplication. It is more building an index.
208
+ # Read: https://sqlite.org/fts5.html
209
+ try:
210
+ sql_query = (
211
+ "INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) " # noqa: S608
212
+ + "SELECT pk, id, specs, kind, collection_pk FROM pterms;"
213
+ )
214
+ project_db_session.exec(text(sql_query)) # type: ignore
215
+ except Exception as e:
216
+ msg = f"unable to insert rows into pterms_fts5 table for {project_db_file_path}"
217
+ _LOGGER.fatal(msg)
218
+ raise EsgvocDbError(msg) from e
219
+ project_db_session.commit()
220
+ try:
221
+ sql_query = (
222
+ "INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, " # noqa: S608
223
+ + "project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, "
224
+ + "project_pk, term_kind FROM pcollections;"
225
+ )
226
+ project_db_session.exec(text(sql_query)) # type: ignore
227
+ except Exception as e:
228
+ msg = f"unable to insert rows into pcollections_fts5 table for {project_db_file_path}"
229
+ _LOGGER.fatal(msg)
230
+ raise EsgvocDbError(msg) from e
231
+ project_db_session.commit()
@@ -0,0 +1,172 @@
1
+ import logging
2
+ import traceback
3
+ from pathlib import Path
4
+
5
+ from sqlalchemy import text
6
+ from sqlmodel import Session, select
7
+
8
+ import esgvoc.core.constants
9
+ import esgvoc.core.db.connection as db
10
+ import esgvoc.core.service as service
11
+ from esgvoc.core.data_handler import JsonLdResource
12
+ from esgvoc.core.db.connection import read_json_file
13
+ from esgvoc.core.db.models.mixins import TermKind
14
+ from esgvoc.core.db.models.universe import UDataDescriptor, Universe, UTerm, universe_create_db
15
+ from esgvoc.core.exceptions import EsgvocDbError
16
+ from esgvoc.core.service.data_merger import DataMerger
17
+
18
+ _LOGGER = logging.getLogger(__name__)
19
+
20
+
21
+ def infer_term_kind(json_specs: dict) -> TermKind:
22
+ if esgvoc.core.constants.PATTERN_JSON_KEY in json_specs:
23
+ return TermKind.PATTERN
24
+ elif esgvoc.core.constants.COMPOSITE_PARTS_JSON_KEY in json_specs:
25
+ return TermKind.COMPOSITE
26
+ else:
27
+ return TermKind.PLAIN
28
+
29
+
30
+ def ingest_universe(universe_repo_dir_path: Path, universe_db_file_path: Path) -> None:
31
+ try:
32
+ connection = db.DBConnection(universe_db_file_path)
33
+ except Exception as e:
34
+ msg = f"Unable to read universe SQLite file at {universe_db_file_path}. Abort."
35
+ _LOGGER.fatal(msg)
36
+ raise IOError(msg) from e
37
+
38
+ for data_descriptor_dir_path in universe_repo_dir_path.iterdir():
39
+ if (
40
+ data_descriptor_dir_path.is_dir() and (data_descriptor_dir_path / "000_context.jsonld").exists()
41
+ ): # TODO may be put that in setting
42
+ try:
43
+ ingest_data_descriptor(data_descriptor_dir_path, connection)
44
+ except Exception as e:
45
+ msg = f"unexpected error while processing data descriptor {data_descriptor_dir_path}"
46
+ _LOGGER.fatal(msg)
47
+ raise EsgvocDbError(msg) from e
48
+
49
+ with connection.create_session() as session:
50
+ # Well, the following instructions are not data duplication. It is more building an index.
51
+ # Read: https://sqlite.org/fts5.html
52
+ try:
53
+ sql_query = (
54
+ "INSERT INTO uterms_fts5(pk, id, specs, kind, data_descriptor_pk) "
55
+ + "SELECT pk, id, specs, kind, data_descriptor_pk FROM uterms;"
56
+ ) # noqa: S608
57
+ session.exec(text(sql_query)) # type: ignore
58
+ except Exception as e:
59
+ msg = f"unable to insert rows into uterms_fts5 table for {universe_db_file_path}"
60
+ _LOGGER.fatal(msg)
61
+ raise EsgvocDbError(msg) from e
62
+ session.commit()
63
+ try:
64
+ sql_query = (
65
+ "INSERT INTO udata_descriptors_fts5(pk, id, universe_pk, context, term_kind) "
66
+ + "SELECT pk, id, universe_pk, context, term_kind FROM udata_descriptors;"
67
+ ) # noqa: S608
68
+ session.exec(text(sql_query)) # type: ignore
69
+ except Exception as e:
70
+ msg = f"unable to insert rows into udata_descriptors_fts5 table for {universe_db_file_path}"
71
+ _LOGGER.fatal(msg)
72
+ raise EsgvocDbError(msg) from e
73
+ session.commit()
74
+
75
+
76
+ def ingest_metadata_universe(connection, git_hash):
77
+ with connection.create_session() as session:
78
+ universe = Universe(git_hash=git_hash)
79
+ session.add(universe)
80
+ session.commit()
81
+
82
+
83
+ def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
84
+ data_descriptor_id = data_descriptor_path.name
85
+ context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
86
+ try:
87
+ context = read_json_file(context_file_path)
88
+ except Exception as e:
89
+ msg = f"Unable to read the context file {context_file_path} of data descriptor \
90
+ {data_descriptor_id}. Skip.\n{str(e)}"
91
+ _LOGGER.warning(msg)
92
+ return
93
+
94
+ with connection.create_session() as session:
95
+ # We ll know it only when we ll add a term (hypothesis all term have the same kind in a data_descriptor)
96
+ data_descriptor = UDataDescriptor(id=data_descriptor_id, context=context, term_kind="")
97
+ term_kind_dd = None
98
+
99
+ _LOGGER.debug(f"add data_descriptor : {data_descriptor_id}")
100
+ for term_file_path in data_descriptor_path.iterdir():
101
+ _LOGGER.debug(f"found term path : {term_file_path}, {term_file_path.suffix}")
102
+ if term_file_path.is_file() and term_file_path.suffix == ".json":
103
+ try:
104
+ locally_available = {
105
+ "https://esgvoc.ipsl.fr/resource/universe": service.current_state.universe.local_path
106
+ }
107
+
108
+ merger = DataMerger(
109
+ data=JsonLdResource(uri=str(term_file_path)),
110
+ locally_available=locally_available,
111
+ allowed_base_uris={"https://esgvoc.ipsl.fr/resource/universe"},
112
+ )
113
+ merged_data = merger.merge_linked_json()[-1]
114
+ # Resolve all nested @id references to full objects
115
+ # Use resolve_merged_ids to properly handle merged data with correct context
116
+ json_specs = merger.resolve_merged_ids(
117
+ merged_data,
118
+ context_base_path=service.current_state.universe.local_path
119
+ )
120
+
121
+ term_kind = infer_term_kind(json_specs)
122
+ term_id = json_specs["id"]
123
+
124
+ if term_kind_dd is None:
125
+ term_kind_dd = term_kind
126
+ except Exception as e:
127
+ _LOGGER.error(
128
+ f"❌ UNIVERSE INGESTION FAILURE - Term skipped\n"
129
+ f" File: {term_file_path}\n"
130
+ f" Descriptor: {data_descriptor_id}\n"
131
+ f" Error Type: {type(e).__name__}\n"
132
+ f" Error Message: {str(e)}\n"
133
+ f" Full Traceback:\n{traceback.format_exc()}"
134
+ )
135
+ continue
136
+ if term_id and json_specs and data_descriptor and term_kind:
137
+ _LOGGER.debug(f"adding {term_id}")
138
+ term = UTerm(
139
+ id=term_id,
140
+ specs=json_specs,
141
+ data_descriptor=data_descriptor,
142
+ kind=term_kind,
143
+ )
144
+
145
+ session.add(term)
146
+ if term_kind_dd is not None:
147
+ data_descriptor.term_kind = term_kind_dd
148
+ else:
149
+ # If no terms were found, default to PLAIN
150
+ _LOGGER.warning(
151
+ f"TermKind was not auto-detected for data descriptor '{data_descriptor_id}'. "
152
+ f"No terms were successfully ingested. Defaulting to PLAIN."
153
+ )
154
+ data_descriptor.term_kind = TermKind.PLAIN
155
+ session.add(data_descriptor)
156
+ session.commit()
157
+
158
+
159
+ def get_universe_term(data_descriptor_id: str, term_id: str, universe_db_session: Session) -> tuple[TermKind, dict]:
160
+ statement = select(UTerm).join(UDataDescriptor).where(UDataDescriptor.id == data_descriptor_id, UTerm.id == term_id)
161
+ results = universe_db_session.exec(statement)
162
+ term = results.one()
163
+ return term.kind, term.specs
164
+
165
+
166
+ if __name__ == "__main__":
167
+ import os
168
+
169
+ root_dir = Path(str(os.getcwd())).parent.parent
170
+ print(root_dir)
171
+ universe_create_db(root_dir / Path(".cache/dbs/universe.sqlite"))
172
+ ingest_universe(root_dir / Path(".cache/repos/mip-cmor-tables"), root_dir / Path(".cache/dbs/universe.sqlite"))
@@ -0,0 +1,33 @@
1
+ class EsgvocException(Exception):
2
+ """
3
+ Class base of all ESGVOC errors.
4
+ """
5
+ pass
6
+
7
+
8
+ class EsgvocNotFoundError(EsgvocException):
9
+ """
10
+ Represents the not found errors.
11
+ """
12
+ pass
13
+
14
+
15
+ class EsgvocValueError(EsgvocException):
16
+ """
17
+ Represents value errors.
18
+ """
19
+ pass
20
+
21
+
22
+ class EsgvocDbError(EsgvocException):
23
+ """
24
+ Represents errors relative to data base management.
25
+ """
26
+ pass
27
+
28
+
29
+ class EsgvocNotImplementedError(EsgvocException):
30
+ """
31
+ Represents not implemented errors.
32
+ """
33
+ pass
@@ -0,0 +1,26 @@
1
+ import logging.config
2
+
3
+ LOGGING_CONFIG = {
4
+ 'version': 1,
5
+ 'disable_existing_loggers': False,
6
+ 'formatters': {
7
+ 'esgvoc_formatter': {
8
+ 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
9
+ },
10
+ },
11
+ 'handlers': {
12
+ 'esgvoc_stdout': {
13
+ 'class': 'logging.StreamHandler',
14
+ 'formatter': 'esgvoc_formatter',
15
+ },
16
+ },
17
+ 'loggers': {
18
+ 'esgvoc': {
19
+ 'handlers': ['esgvoc_stdout'],
20
+ 'level': 'ERROR',
21
+ 'propagate': False,
22
+ }
23
+ }
24
+ }
25
+
26
+ logging.config.dictConfig(LOGGING_CONFIG)