esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
esgvoc/cli/valid.py ADDED
@@ -0,0 +1,147 @@
1
+
2
+ import re
3
+ from typing import List
4
+
5
+ import typer
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from esgvoc.api.projects import valid_term, valid_term_in_all_projects, valid_term_in_collection, valid_term_in_project
10
+
11
+ app = typer.Typer()
12
+ console = Console()
13
+
14
+
15
+ @app.command()
16
+ def valid(
17
+ strings_targets: List[str] = typer.Argument(
18
+ ...,
19
+ help=(
20
+ "Pairs of strings to validate against a key in the form '<StringToValidate> <Project:Collection:Term>'.\n"
21
+ "Multiple pairs can be provided. The key '<Project:Collection:Term>' consists of three parts:\n"
22
+ "- 'Project' (optional)\n"
23
+ "- 'Collection' (optional)\n"
24
+ "- 'Term' (optional)\n"
25
+ "Only the ':' separators are mandatory. For example:\n"
26
+ " - 'my_string ::'\n"
27
+ " - 'my_string Project::'\n"
28
+ " - 'my_string Project:Collection:'\n"
29
+ " - 'my_string Project:Collection:Term'\n"
30
+ "The function validates based on the provided parts."
31
+ )
32
+ ),
33
+ verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results")
34
+ ):
35
+ """
36
+ Validates one or more strings against specified Project:Collection:Term configurations.\n
37
+ \n
38
+ Depending on the provided key structure, the function performs different validation operations:\n
39
+ - If all are None (e.g., "::"), validates the term across all projects (`valid_term_in_all_projects`).\n
40
+ - If Term is None (e.g., "Project:Collection:"), validates the term in the specified collection (`valid_term_in_collection`).\n
41
+ - If Term and Collection are None (e.g., "Project::"), validates the term in the specified project (`valid_term_in_project`).\n
42
+ - If all are specified (e.g., "Project:Collection:Term"), validates the term exactly (`valid_term`).\n
43
+ \n
44
+ Parameters:\n
45
+ \tstrings_targets (List[str]): A list of validation pairs, where each pair consists of:\n
46
+ \t\t- A string to validate.\n
47
+ \t\t- A key in the form '<Project:Collection:Term>'.\n
48
+ Usage :\n
49
+ \tValid one:\n
50
+ \tesgvocab valid IPSL cmip6plus:institution_id:ipsl\n
51
+ \tesgvocab valid IPSL cmip6plus:institution_id:\n
52
+ \tesgvocab valid IPSL cmip6plus::\n
53
+ \tesgvocab valid IPSL ::\n
54
+ \n
55
+ \tUnvalid one:\n
56
+ \tesgvocab valid IPSL_invalid cmip6plus:institution_id:ipsl\n
57
+ \tesgvocab valid IPSL cmip6plus:institution_id:isl <= term cant be found\n
58
+ \tesgvocab valid IPSL cmip6plus:institutin_id:ispl <= collection cant be found\n
59
+ \tesgvocab valid IPSL cmip6pls:institution_id:ispl <= project cant be found\n
60
+ \n
61
+ \tMultiple validation for all known projects: \n
62
+ \tesgvocab valid IPSL :: IPS :: \n
63
+ \t\tresult will be [True, False]\n
64
+ \n
65
+ \tesgvocab valid --verbose IPS :: IPSL ::\n
66
+ \tresult will be \n
67
+ \t\t┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n
68
+ \t\t┃ String ┃ Key ┃ Result ┃ Errors ┃\n
69
+ \t\t┡━━━━━━━━╇━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n
70
+ \t\t│ IPS │ :: │ ❌ Invalid │ did not found matching term │\n
71
+ \t\t│ IPSL │ :: │ ✅ Valid │ None │\n
72
+ \t\t└────────┴─────┴────────────┴─────────────────────────────┘\n
73
+ Returns:\n
74
+ \tList[bool]: Validation results for each pair in the input.\n
75
+ """
76
+ results = []
77
+ detailed_results = []
78
+
79
+ # Combine string and target into pairs
80
+ pairs = [strings_targets[i] + " " + strings_targets[i + 1] for i in range(0, len(strings_targets), 2)]
81
+
82
+ # Validate each string against each target
83
+ for validation in pairs:
84
+ match = re.match(r"(.+)\s+([^:]*):([^:]*):([^:]*)", validation)
85
+ if not match:
86
+ console.print(f"[red]Invalid input format: {validation}[/red]")
87
+ results.append(False)
88
+ detailed_results.append({"validation": validation, "errors": ["Invalid input format"]})
89
+ continue
90
+
91
+ string_to_validate, project, collection, term = match.groups()
92
+ exception_message= None
93
+ try:
94
+ # Perform the appropriate validation
95
+ if project and collection and term:
96
+ validation_result = valid_term(string_to_validate, project, collection, term)
97
+ elif project and collection:
98
+ validation_result = valid_term_in_collection(string_to_validate, project, collection)
99
+ elif project:
100
+ validation_result = valid_term_in_project(string_to_validate, project)
101
+ else:
102
+ validation_result = valid_term_in_all_projects(string_to_validate)
103
+
104
+ except Exception as e:
105
+ validation_result=False
106
+ exception_message = repr(e)
107
+
108
+ # Handle validation result
109
+
110
+ if validation_result:
111
+ results.append(True)
112
+ detailed_results.append({"validation": validation, "errors": []})
113
+ else:
114
+ # Parse and collect errors for verbose mode
115
+ if validation_result == []:
116
+ detailed_results.append({"validation":validation, "errors":["did not found matching term"]})
117
+ results.append(False)
118
+ if project and collection and term and exception_message is None:
119
+ errors = [str(error) for error in validation_result.errors]
120
+ detailed_results.append({"validation": validation, "errors": errors})
121
+ if exception_message is not None:
122
+ detailed_results.append({"validation": validation, "errors": [exception_message]})
123
+
124
+
125
+ # Output results
126
+ if verbose:
127
+ table = Table(title="Validation Results")
128
+ table.add_column("String", style="cyan")
129
+ table.add_column("Key", style="magenta")
130
+ table.add_column("Result", style="green" if all(results) else "red")
131
+ table.add_column("Errors", style="red")
132
+
133
+ for detail in detailed_results:
134
+ validation = detail["validation"]
135
+ validation_parts = validation.split()
136
+ string = validation_parts[0]
137
+ key = validation_parts[1] if len(validation_parts) > 1 else "::"
138
+ result = "✅ Valid" if detail["errors"] == [] else "❌ Invalid"
139
+ print(detail)
140
+ errors = "\n".join(detail["errors"]) if detail["errors"] else "None"
141
+ table.add_row(string, key, result, errors)
142
+
143
+ console.print(table)
144
+ else:
145
+ console.print(results)
146
+
147
+ return results
@@ -0,0 +1,17 @@
1
+ DIRNAME_AND_FILENAME_SEPARATOR = "_"
2
+ PROJECT_SPECS_FILENAME = "project_specs.yaml"
3
+ DRS_SPECS_FILENAME = "drs_specs.yaml"
4
+ CATALOG_SPECS_FILENAME = "catalog_specs.yaml"
5
+ ATTRIBUTES_SPECS_FILENAME = "attr_specs.yaml"
6
+ PROJECT_ID_JSON_KEY = "project_id"
7
+ CONTEXT_FILENAME = "000_context.jsonld"
8
+ CONTEXT_JSON_KEY = "@context"
9
+ TERM_ID_JSON_KEY = "id"
10
+ COMPOSITE_PARTS_JSON_KEY = "parts"
11
+ COMPOSITE_SEPARATOR_JSON_KEY = "separator"
12
+ COMPOSITE_REQUIRED_KEY = "is_required"
13
+ PATTERN_JSON_KEY = "regex"
14
+ TERM_TYPE_JSON_KEY = "type"
15
+ DRS_SPECS_JSON_KEY = "drs_name"
16
+ SQLITE_FIRST_PK = 1
17
+ DATA_DESCRIPTOR_JSON_KEY = "@base"
esgvoc/core/convert.py ADDED
File without changes
@@ -0,0 +1,206 @@
1
+ import os
2
+ import json
3
+ import logging
4
+ from functools import cached_property
5
+ from typing import Any, Optional, Dict
6
+ import requests
7
+ from pyld import jsonld
8
+ from pydantic import BaseModel, model_validator, ConfigDict
9
+
10
+ # Configure logging
11
+ _LOGGER = logging.getLogger(__name__)
12
+
13
+
14
+ def unified_document_loader(uri: str) -> Dict:
15
+ """Load a document from a local file or a remote URI."""
16
+ if uri.startswith(("http://", "https://")):
17
+ response = requests.get(uri, headers={"accept": "application/json"}, verify=False)
18
+ if response.status_code == 200:
19
+ return response.json()
20
+ else:
21
+ _LOGGER.error(f"Failed to fetch remote document: {response.status_code} - {response.text}")
22
+ return {}
23
+ else:
24
+ with open(uri, "r") as f:
25
+ return json.load(f)
26
+
27
+
28
+ class JsonLdResource(BaseModel):
29
+ uri: str
30
+ local_path: Optional[str] = None
31
+
32
+ model_config = ConfigDict(arbitrary_types_allowed=True)
33
+
34
+ @model_validator(mode="before")
35
+ @classmethod
36
+ def set_local_path(cls, values: Dict[str, Any]) -> Dict[str, Any]:
37
+ """Set the local path to an absolute path if provided."""
38
+ local_path = values.get("local_path")
39
+ if local_path:
40
+ values["local_path"] = os.path.abspath(local_path) + "/"
41
+ jsonld.set_document_loader(
42
+ lambda uri, options: {
43
+ "contextUrl": None, # No special context URL
44
+ "documentUrl": uri, # The document's actual URL
45
+ # The parsed JSON-LD document
46
+ "document": unified_document_loader(uri),
47
+ }
48
+ )
49
+ return values
50
+
51
+ @cached_property
52
+ def json_dict(self) -> Dict:
53
+ """Fetch the original JSON data."""
54
+ _LOGGER.debug(f"Fetching JSON data from {self.uri}")
55
+ return unified_document_loader(self.uri)
56
+
57
+ def _preprocess_nested_contexts(self, data: dict, context: dict) -> dict:
58
+ """
59
+ Pre-process data to resolve @base in nested @context definitions.
60
+ This works around pyld's limitation with scoped contexts.
61
+
62
+ Args:
63
+ data: The JSON-LD data to preprocess
64
+ context: The @context dictionary
65
+
66
+ Returns:
67
+ Preprocessed data with resolved nested contexts
68
+ """
69
+ if not isinstance(data, dict):
70
+ return data
71
+
72
+ result = {}
73
+
74
+ for key, value in data.items():
75
+ if key == "@context":
76
+ result[key] = value
77
+ continue
78
+
79
+ # Check if this term has a nested @context with @base
80
+ term_def = context.get(key, {})
81
+ if isinstance(term_def, dict) and "@context" in term_def:
82
+ nested_context = term_def["@context"]
83
+ base_url = nested_context.get("@base", "")
84
+
85
+ # If the value is a string and we have a @base, prepend it
86
+ if isinstance(value, str) and base_url and term_def.get("@type") == "@id":
87
+ # Don't prepend if it's already an absolute URL
88
+ if not value.startswith("http://") and not value.startswith("https://"):
89
+ # Return as {"@id": "full_url"} to preserve @id semantics
90
+ result[key] = {"@id": base_url + value}
91
+ else:
92
+ result[key] = {"@id": value}
93
+ elif isinstance(value, list):
94
+ # Process each item in the list
95
+ result[key] = []
96
+ for item in value:
97
+ if isinstance(item, dict):
98
+ result[key].append(self._preprocess_nested_contexts(item, context))
99
+ elif isinstance(item, str) and base_url and term_def.get("@type") == "@id":
100
+ # Convert string items to {"@id": "..."} when @type is @id
101
+ if not item.startswith("http://") and not item.startswith("https://"):
102
+ result[key].append({"@id": base_url + item})
103
+ else:
104
+ result[key].append({"@id": item})
105
+ else:
106
+ result[key].append(item)
107
+ elif isinstance(value, dict):
108
+ result[key] = self._preprocess_nested_contexts(value, context)
109
+ else:
110
+ result[key] = value
111
+ elif isinstance(value, list):
112
+ # Process each item in the list
113
+ result[key] = []
114
+ for item in value:
115
+ if isinstance(item, dict):
116
+ result[key].append(self._preprocess_nested_contexts(item, context))
117
+ else:
118
+ result[key].append(item)
119
+ elif isinstance(value, dict):
120
+ result[key] = self._preprocess_nested_contexts(value, context)
121
+ else:
122
+ result[key] = value
123
+
124
+ return result
125
+
126
+ @cached_property
127
+ def expanded(self) -> Any:
128
+ """Expand the JSON-LD data with preprocessing for nested contexts."""
129
+ _LOGGER.debug(f"Expanding JSON-LD data for {self.uri}")
130
+
131
+ # Get the data and context
132
+ data = self.json_dict
133
+
134
+ # Get the context - it should already be the inner dictionary
135
+ context_dict = self.context
136
+ if isinstance(context_dict, dict) and "@context" in context_dict:
137
+ context_dict = context_dict["@context"]
138
+
139
+ # Preprocess to handle nested contexts with @base
140
+ preprocessed = self._preprocess_nested_contexts(data, context_dict)
141
+
142
+ # Add the context back if it was in the original data
143
+ if "@context" in data:
144
+ preprocessed["@context"] = data["@context"]
145
+
146
+ # Expand the preprocessed data
147
+ return jsonld.expand(preprocessed, options={"base": self.uri})
148
+
149
+ @cached_property
150
+ def context(self) -> Dict:
151
+ """Fetch and return the JSON content of the '@context'."""
152
+
153
+ context_data = JsonLdResource(uri="/".join(self.uri.split("/")[:-1]) + "/" + self.json_dict["@context"])
154
+ # Works only in relative path declaration
155
+
156
+ context_value = context_data.json_dict
157
+ if isinstance(context_value, str):
158
+ # It's a URI, fetch it
159
+ _LOGGER.info(f"Fetching context from URI: {context_value}")
160
+ return unified_document_loader(context_value)
161
+ elif isinstance(context_value, dict):
162
+ # Embedded context
163
+ _LOGGER.info("Using embedded context.")
164
+ return context_value
165
+ else:
166
+ _LOGGER.warning("No valid '@context' found.")
167
+ return {}
168
+
169
+ @cached_property
170
+ def normalized(self) -> str:
171
+ """Normalize the JSON-LD data."""
172
+ _LOGGER.info(f"Normalizing JSON-LD data for {self.uri}")
173
+ return jsonld.normalize(self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"})
174
+
175
+ def _extract_model_key(self, uri: str) -> Optional[str]:
176
+ """Extract a model key from the URI."""
177
+ parts = uri.strip("/").split("/")
178
+ if len(parts) >= 2:
179
+ return parts[-2]
180
+ return None
181
+
182
+ @property
183
+ def info(self) -> str:
184
+ """Return a detailed summary of the data."""
185
+ res = f"{'#' * 100}\n"
186
+ res += f"### {self.uri.split('/')[-1]} ###\n"
187
+ res += f"JSON Version:\n {json.dumps(self.json_dict, indent=2)}\n"
188
+ res += f"URI: {self.uri}\n"
189
+ res += f"JSON Version:\n {json.dumps(self.json_dict, indent=2)}\n"
190
+ res += f"Expanded Version:\n {json.dumps(self.expanded, indent=2)}\n"
191
+ res += f"Normalized Version:\n {self.normalized}\n"
192
+ return res
193
+
194
+
195
+ if __name__ == "__main__":
196
+ # For Universe
197
+ # online
198
+ # d = Data(uri = "https://espri-mod.github.io/mip-cmor-tables/activity/cmip.json")
199
+ # print(d.info)
200
+ # offline
201
+ # print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
202
+ # for Project
203
+ # d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
204
+ # print(d.info)
205
+ # offline
206
+ print(JsonLdResource(uri=".cache/repos/CMIP6Plus_CVs/activity_id/cmip.json").info)
@@ -0,0 +1,3 @@
1
+ from esgvoc.core.db.connection import DBConnection, read_json_file
2
+
3
+ __all__ = ["DBConnection", "read_json_file"]
@@ -0,0 +1,40 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import yaml
5
+ from sqlalchemy import Engine
6
+ from sqlmodel import Session, create_engine
7
+
8
+
9
+ class DBConnection:
10
+ SQLITE_URL_PREFIX = 'sqlite://'
11
+
12
+ def __init__(self, db_file_path: Path, echo: bool = False) -> None:
13
+ self.engine = create_engine(f'{DBConnection.SQLITE_URL_PREFIX}/{db_file_path}', echo=echo)
14
+ self.name = db_file_path.stem
15
+ self.file_path = db_file_path.absolute()
16
+
17
+ def set_echo(self, echo: bool) -> None:
18
+ self.engine.echo = echo
19
+
20
+ def get_engine(self) -> Engine:
21
+ return self.engine
22
+
23
+ def create_session(self) -> Session:
24
+ return Session(self.engine)
25
+
26
+ def get_name(self) -> str | None:
27
+ return self.name
28
+
29
+ def get_file_path(self) -> Path:
30
+ return self.file_path
31
+
32
+
33
+ def read_json_file(json_file_path: Path) -> dict:
34
+ return json.loads(json_file_path.read_text())
35
+
36
+
37
+ def read_yaml_file(yaml_file_path: Path) -> dict:
38
+ with open(yaml_file_path, 'r') as file:
39
+ result = yaml.safe_load(file)
40
+ return result
@@ -0,0 +1,25 @@
1
+ from enum import Enum
2
+
3
+ from sqlmodel import Field
4
+
5
+
6
+ class TermKind(Enum):
7
+ """
8
+ The kinds of term.
9
+ """
10
+ PLAIN = "plain"
11
+ """End written term."""
12
+ PATTERN = "pattern"
13
+ """Regex based terms"""
14
+ COMPOSITE = "composite"
15
+ """Term composed of terms."""
16
+ MIXED = 'mixed'
17
+ """To be defined."""
18
+
19
+
20
+ class PkMixin:
21
+ pk: int | None = Field(default=None, primary_key=True)
22
+
23
+
24
+ class IdMixin:
25
+ id: str = Field(index=True)
@@ -0,0 +1,102 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ import sqlalchemy as sa
5
+ from sqlalchemy import text
6
+ from sqlalchemy.dialects.sqlite import JSON
7
+ from sqlmodel import Column, Field, Relationship, SQLModel
8
+
9
+ import esgvoc.core.db.connection as db
10
+ from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
+ from esgvoc.core.exceptions import EsgvocDbError
12
+
13
+ _LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ class Project(SQLModel, PkMixin, IdMixin, table=True):
17
+ __tablename__ = "projects"
18
+ specs: dict = Field(sa_column=sa.Column(JSON))
19
+ git_hash: str
20
+ collections: list["PCollection"] = Relationship(back_populates="project")
21
+
22
+
23
+ class PCollection(SQLModel, PkMixin, IdMixin, table=True):
24
+ __tablename__ = "pcollections"
25
+ data_descriptor_id: str = Field(index=True)
26
+ context: dict = Field(sa_column=sa.Column(JSON))
27
+ project_pk: int | None = Field(default=None, foreign_key="projects.pk")
28
+ project: Project = Relationship(back_populates="collections")
29
+ terms: list["PTerm"] = Relationship(back_populates="collection")
30
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
31
+
32
+
33
+ # Well, the following instructions are not data duplication. It is more building an index.
34
+ # Read: https://sqlite.org/fts5.html
35
+ class PCollectionFTS5(SQLModel, PkMixin, IdMixin, table=True):
36
+ __tablename__ = "pcollections_fts5"
37
+ data_descriptor_id: str
38
+ context: dict = Field(sa_column=sa.Column(JSON))
39
+ project_pk: int | None = Field(default=None, foreign_key="projects.pk")
40
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
41
+
42
+
43
+ class PTerm(SQLModel, PkMixin, IdMixin, table=True):
44
+ __tablename__ = "pterms"
45
+ specs: dict = Field(sa_column=sa.Column(JSON))
46
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
47
+ collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
48
+ collection: PCollection = Relationship(back_populates="terms")
49
+ __table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), ) # type: ignore
50
+
51
+
52
+ # Well, the following instructions are not data duplication. It is more building an index.
53
+ # Read: https://sqlite.org/fts5.html
54
+ class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
55
+ __tablename__ = "pterms_fts5"
56
+ specs: dict = Field(sa_column=sa.Column(JSON))
57
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
58
+ collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
59
+
60
+
61
+ def project_create_db(db_file_path: Path):
62
+ try:
63
+ connection = db.DBConnection(db_file_path)
64
+ except Exception as e:
65
+ msg = f'unable to create SQlite file at {db_file_path}'
66
+ _LOGGER.fatal(msg)
67
+ raise EsgvocDbError(msg) from e
68
+ try:
69
+ # Do not include pterms_fts5 table: it is build from a raw SQL query.
70
+ tables_to_be_created = [SQLModel.metadata.tables['projects'],
71
+ SQLModel.metadata.tables['pcollections'],
72
+ SQLModel.metadata.tables['pterms']]
73
+ SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
74
+ except Exception as e:
75
+ msg = f'unable to create tables in SQLite database at {db_file_path}'
76
+ _LOGGER.fatal(msg)
77
+ raise EsgvocDbError(msg) from e
78
+ try:
79
+ with connection.create_session() as session:
80
+ sql_query = "CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING " + \
81
+ "fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk, prefix=3);"
82
+ session.exec(text(sql_query)) # type: ignore
83
+ session.commit()
84
+ except Exception as e:
85
+ msg = f'unable to create table pterms_fts5 for {db_file_path}'
86
+ _LOGGER.fatal(msg)
87
+ raise EsgvocDbError(msg) from e
88
+ try:
89
+ with connection.create_session() as session:
90
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
91
+ 'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
92
+ 'term_kind, content=pcollections, content_rowid=pk, prefix=3);'
93
+ session.exec(text(sql_query)) # type: ignore
94
+ session.commit()
95
+ except Exception as e:
96
+ msg = f'unable to create table pcollections_fts5 for {db_file_path}'
97
+ _LOGGER.fatal(msg)
98
+ raise EsgvocDbError(msg) from e
99
+
100
+
101
+ if __name__ == "__main__":
102
+ pass
@@ -0,0 +1,98 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ import sqlalchemy as sa
5
+ from sqlalchemy import text
6
+ from sqlalchemy.dialects.sqlite import JSON
7
+ from sqlmodel import Column, Field, Relationship, SQLModel
8
+
9
+ import esgvoc.core.db.connection as db
10
+ from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
+ from esgvoc.core.exceptions import EsgvocDbError
12
+
13
+ _LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ class Universe(SQLModel, PkMixin, table=True):
17
+ __tablename__ = "universes"
18
+ git_hash: str
19
+ data_descriptors: list["UDataDescriptor"] = Relationship(back_populates="universe")
20
+
21
+
22
+ class UDataDescriptor(SQLModel, PkMixin, IdMixin, table=True):
23
+ __tablename__ = "udata_descriptors"
24
+ context: dict = Field(sa_column=sa.Column(JSON))
25
+ universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
26
+ universe: Universe = Relationship(back_populates="data_descriptors")
27
+ terms: list["UTerm"] = Relationship(back_populates="data_descriptor")
28
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
29
+
30
+
31
+ # Well, the following instructions are not data duplication. It is more building an index.
32
+ # Read: https://sqlite.org/fts5.html
33
+ class UDataDescriptorFTS5(SQLModel, PkMixin, IdMixin, table=True):
34
+ __tablename__ = "udata_descriptors_fts5"
35
+ context: dict = Field(sa_column=sa.Column(JSON))
36
+ universe_pk: int | None = Field(default=None, foreign_key="universes.pk")
37
+ term_kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
38
+
39
+
40
+ class UTerm(SQLModel, PkMixin, IdMixin, table=True):
41
+ __tablename__ = "uterms"
42
+ specs: dict = Field(sa_column=sa.Column(JSON))
43
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
44
+ data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
45
+ data_descriptor: UDataDescriptor = Relationship(back_populates="terms")
46
+
47
+
48
+ # Well, the following instructions are not data duplication. It is more building an index.
49
+ # Read: https://sqlite.org/fts5.html
50
+ class UTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
51
+ __tablename__ = "uterms_fts5"
52
+ specs: dict = Field(sa_column=sa.Column(JSON))
53
+ kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
54
+ data_descriptor_pk: int | None = Field(default=None, foreign_key="udata_descriptors.pk")
55
+
56
+
57
+ def universe_create_db(db_file_path: Path) -> None:
58
+ try:
59
+ connection = db.DBConnection(db_file_path)
60
+ except Exception as e:
61
+ msg = f'unable to create SQLite file at {db_file_path}'
62
+ _LOGGER.fatal(msg)
63
+ raise EsgvocDbError(msg) from e
64
+ try:
65
+ # Avoid creating project tables.
66
+ tables_to_be_created = [SQLModel.metadata.tables['uterms'],
67
+ SQLModel.metadata.tables['udata_descriptors'],
68
+ SQLModel.metadata.tables['universes']]
69
+ SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
70
+ except Exception as e:
71
+ msg = f'unable to create tables in SQLite database at {db_file_path}'
72
+ _LOGGER.fatal(msg)
73
+ raise EsgvocDbError(msg) from e
74
+ try:
75
+ with connection.create_session() as session:
76
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
77
+ 'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk, prefix=3);'
78
+ session.exec(text(sql_query)) # type: ignore
79
+ session.commit()
80
+ except Exception as e:
81
+ msg = f'unable to create table uterms_fts5 for {db_file_path}'
82
+ _LOGGER.fatal(msg)
83
+ raise EsgvocDbError(msg) from e
84
+ try:
85
+ with connection.create_session() as session:
86
+ sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
87
+ 'fts5(pk, id, universe_pk, context, ' + \
88
+ 'term_kind, content=udata_descriptors, content_rowid=pk, prefix=3);'
89
+ session.exec(text(sql_query)) # type: ignore
90
+ session.commit()
91
+ except Exception as e:
92
+ msg = f'unable to create table udata_descriptors_fts5 for {db_file_path}'
93
+ _LOGGER.fatal(msg)
94
+ raise EsgvocDbError(msg) from e
95
+
96
+
97
+ if __name__ == "__main__":
98
+ pass