esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main validator interface for NetCDF global attributes.
|
|
3
|
+
|
|
4
|
+
This module provides the high-level API for validating NetCDF global attributes
|
|
5
|
+
against project specifications loaded from the esgvoc database.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional, Dict, Any, List
|
|
9
|
+
|
|
10
|
+
import esgvoc.api.projects as projects
|
|
11
|
+
from esgvoc.api.project_specs import AttributeSpecification
|
|
12
|
+
from esgvoc.core.exceptions import EsgvocNotFoundError
|
|
13
|
+
from .models import (
|
|
14
|
+
NetCDFHeader,
|
|
15
|
+
NetCDFHeaderParser,
|
|
16
|
+
ValidationReport,
|
|
17
|
+
ValidationSeverity,
|
|
18
|
+
)
|
|
19
|
+
from .models.validator import GlobalAttributeValidator
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GAValidator:
|
|
23
|
+
"""
|
|
24
|
+
Main validator class for the GA (Global Attributes) application.
|
|
25
|
+
|
|
26
|
+
This class provides a high-level interface for validating NetCDF global
|
|
27
|
+
attributes against project specifications loaded from the esgvoc database.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, project_id: str = "cmip6"):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the GA validator.
|
|
33
|
+
|
|
34
|
+
:param project_id: Project identifier for validation
|
|
35
|
+
"""
|
|
36
|
+
self.project_id = project_id
|
|
37
|
+
|
|
38
|
+
# Load attribute specifications from database
|
|
39
|
+
self.attribute_specs = self._load_from_database()
|
|
40
|
+
|
|
41
|
+
# Initialize the validator
|
|
42
|
+
self.validator = GlobalAttributeValidator(self.attribute_specs, project_id)
|
|
43
|
+
|
|
44
|
+
def _load_from_database(self) -> AttributeSpecification:
|
|
45
|
+
"""Load attribute specifications from the esgvoc database."""
|
|
46
|
+
project = projects.get_project(self.project_id)
|
|
47
|
+
|
|
48
|
+
if project is None:
|
|
49
|
+
raise EsgvocNotFoundError(f"Project '{self.project_id}' not found in database")
|
|
50
|
+
|
|
51
|
+
if project.attr_specs is None:
|
|
52
|
+
raise ValueError(f"Project '{self.project_id}' has no attribute specifications")
|
|
53
|
+
|
|
54
|
+
return project.attr_specs
|
|
55
|
+
|
|
56
|
+
def validate_from_ncdump(self, ncdump_output: str, filename: Optional[str] = None) -> ValidationReport:
|
|
57
|
+
"""
|
|
58
|
+
Validate global attributes from ncdump command output.
|
|
59
|
+
|
|
60
|
+
:param ncdump_output: Output from ncdump -h command
|
|
61
|
+
:param filename: Optional filename for reporting
|
|
62
|
+
:return: Validation report
|
|
63
|
+
"""
|
|
64
|
+
# Parse the NetCDF header
|
|
65
|
+
try:
|
|
66
|
+
header = NetCDFHeaderParser.parse_from_ncdump(ncdump_output)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# Return error report if parsing fails
|
|
69
|
+
report = ValidationReport(filename=filename, project_id=self.project_id, is_valid=False)
|
|
70
|
+
report.add_issue(
|
|
71
|
+
{
|
|
72
|
+
"attribute_name": "parse_error",
|
|
73
|
+
"severity": ValidationSeverity.ERROR,
|
|
74
|
+
"message": f"Failed to parse ncdump output: {str(e)}",
|
|
75
|
+
"actual_value": None,
|
|
76
|
+
"expected_value": None,
|
|
77
|
+
"source_collection": None,
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
return report
|
|
81
|
+
|
|
82
|
+
# Set filename if provided
|
|
83
|
+
if filename:
|
|
84
|
+
header.filename = filename
|
|
85
|
+
|
|
86
|
+
# Validate global attributes
|
|
87
|
+
return self.validator.validate(header.global_attributes, header.filename)
|
|
88
|
+
|
|
89
|
+
def validate_from_attributes_dict(
|
|
90
|
+
self, attributes: Dict[str, Any], filename: Optional[str] = None
|
|
91
|
+
) -> ValidationReport:
|
|
92
|
+
"""
|
|
93
|
+
Validate global attributes from a dictionary.
|
|
94
|
+
|
|
95
|
+
:param attributes: Dictionary of global attributes
|
|
96
|
+
:param filename: Optional filename for reporting
|
|
97
|
+
:return: Validation report
|
|
98
|
+
"""
|
|
99
|
+
from .models.netcdf_header import NetCDFGlobalAttributes
|
|
100
|
+
|
|
101
|
+
global_attrs = NetCDFGlobalAttributes(attributes=attributes)
|
|
102
|
+
return self.validator.validate(global_attrs, filename)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_required_attributes(self) -> List[str]:
|
|
106
|
+
"""
|
|
107
|
+
Get list of required attribute names.
|
|
108
|
+
|
|
109
|
+
:return: List of required attribute names
|
|
110
|
+
"""
|
|
111
|
+
return [
|
|
112
|
+
spec.field_name or spec.source_collection
|
|
113
|
+
for spec in self.attribute_specs
|
|
114
|
+
if spec.is_required
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
def get_optional_attributes(self) -> List[str]:
|
|
118
|
+
"""
|
|
119
|
+
Get list of optional attribute names.
|
|
120
|
+
|
|
121
|
+
:return: List of optional attribute names
|
|
122
|
+
"""
|
|
123
|
+
return [
|
|
124
|
+
spec.field_name or spec.source_collection
|
|
125
|
+
for spec in self.attribute_specs
|
|
126
|
+
if not spec.is_required
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
def get_attribute_info(self, attribute_name: str) -> Optional[Dict[str, Any]]:
|
|
130
|
+
"""
|
|
131
|
+
Get information about a specific attribute.
|
|
132
|
+
|
|
133
|
+
:param attribute_name: Name of the attribute
|
|
134
|
+
:return: Attribute information dictionary or None if not found
|
|
135
|
+
"""
|
|
136
|
+
spec = None
|
|
137
|
+
for s in self.attribute_specs:
|
|
138
|
+
field_name = s.field_name or s.source_collection
|
|
139
|
+
if field_name == attribute_name:
|
|
140
|
+
spec = s
|
|
141
|
+
break
|
|
142
|
+
|
|
143
|
+
if spec is None:
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"name": attribute_name,
|
|
148
|
+
"source_collection": spec.source_collection,
|
|
149
|
+
"value_type": spec.value_type,
|
|
150
|
+
"required": spec.is_required,
|
|
151
|
+
"default_value": spec.default_value,
|
|
152
|
+
"specific_key": spec.specific_key,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def list_attributes(self) -> List[str]:
|
|
156
|
+
"""
|
|
157
|
+
Get list of all defined attribute names.
|
|
158
|
+
|
|
159
|
+
:return: List of all attribute names
|
|
160
|
+
"""
|
|
161
|
+
return [spec.field_name or spec.source_collection for spec in self.attribute_specs]
|
|
162
|
+
|
|
163
|
+
def reload_config(self) -> None:
|
|
164
|
+
"""
|
|
165
|
+
Reload attribute specifications from the database.
|
|
166
|
+
"""
|
|
167
|
+
self.attribute_specs = self._load_from_database()
|
|
168
|
+
self.validator = GlobalAttributeValidator(self.attribute_specs, self.project_id)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class GAValidatorFactory:
|
|
172
|
+
"""
|
|
173
|
+
Factory for creating GA validators for different projects.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def create_cmip6_validator() -> GAValidator:
|
|
178
|
+
"""
|
|
179
|
+
Create a validator configured for CMIP6.
|
|
180
|
+
|
|
181
|
+
:return: GAValidator instance for CMIP6
|
|
182
|
+
"""
|
|
183
|
+
return GAValidator(project_id="cmip6")
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def create_cmip7_validator() -> GAValidator:
|
|
187
|
+
"""
|
|
188
|
+
Create a validator configured for CMIP7.
|
|
189
|
+
|
|
190
|
+
:return: GAValidator instance for CMIP7
|
|
191
|
+
"""
|
|
192
|
+
return GAValidator(project_id="cmip7")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def validate_netcdf_attributes(
|
|
196
|
+
ncdump_output: str, project_id: str = "cmip6", filename: Optional[str] = None
|
|
197
|
+
) -> ValidationReport:
|
|
198
|
+
"""
|
|
199
|
+
Convenience function to validate NetCDF global attributes.
|
|
200
|
+
|
|
201
|
+
Loads attribute specifications from the esgvoc database for the specified project.
|
|
202
|
+
|
|
203
|
+
:param ncdump_output: Output from ncdump -h command
|
|
204
|
+
:param project_id: Project identifier for validation
|
|
205
|
+
:param filename: Optional filename for reporting
|
|
206
|
+
:return: Validation report
|
|
207
|
+
"""
|
|
208
|
+
validator = GAValidator(project_id)
|
|
209
|
+
return validator.validate_from_ncdump(ncdump_output, filename)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def create_validation_summary(report: ValidationReport) -> str:
|
|
213
|
+
"""
|
|
214
|
+
Create a human-readable summary of a validation report.
|
|
215
|
+
|
|
216
|
+
:param report: Validation report to summarize
|
|
217
|
+
:return: Formatted summary string
|
|
218
|
+
"""
|
|
219
|
+
lines = []
|
|
220
|
+
lines.append("=" * 60)
|
|
221
|
+
lines.append("NetCDF Global Attributes Validation Report")
|
|
222
|
+
lines.append("=" * 60)
|
|
223
|
+
|
|
224
|
+
if report.filename:
|
|
225
|
+
lines.append(f"File: {report.filename}")
|
|
226
|
+
lines.append(f"Project: {report.project_id}")
|
|
227
|
+
lines.append(f"Status: {'VALID' if report.is_valid else 'INVALID'}")
|
|
228
|
+
lines.append("")
|
|
229
|
+
|
|
230
|
+
# Summary statistics
|
|
231
|
+
lines.append("Summary:")
|
|
232
|
+
lines.append(f" • Errors: {report.error_count}")
|
|
233
|
+
lines.append(f" • Warnings: {report.warning_count}")
|
|
234
|
+
lines.append(f" • Info messages: {report.info_count}")
|
|
235
|
+
lines.append(f" • Validated attributes: {len(report.validated_attributes)}")
|
|
236
|
+
lines.append(f" • Missing required attributes: {len(report.missing_attributes)}")
|
|
237
|
+
lines.append(f" • Extra attributes: {len(report.extra_attributes)}")
|
|
238
|
+
lines.append("")
|
|
239
|
+
|
|
240
|
+
# Issues by severity
|
|
241
|
+
if report.issues:
|
|
242
|
+
lines.append("Issues:")
|
|
243
|
+
lines.append("")
|
|
244
|
+
|
|
245
|
+
for severity in [ValidationSeverity.ERROR, ValidationSeverity.WARNING, ValidationSeverity.INFO]:
|
|
246
|
+
severity_issues = report.get_issues_by_severity(severity)
|
|
247
|
+
if severity_issues:
|
|
248
|
+
lines.append(f"{severity.value.upper()}S:")
|
|
249
|
+
for i, issue in enumerate(severity_issues):
|
|
250
|
+
lines.append(f" • {issue.attribute_name}: {issue.message}")
|
|
251
|
+
if issue.expected_value is not None:
|
|
252
|
+
lines.append(f" Expected: {issue.expected_value}")
|
|
253
|
+
if issue.actual_value is not None:
|
|
254
|
+
lines.append(f" Actual: {issue.actual_value}")
|
|
255
|
+
|
|
256
|
+
# Add separator between errors (except for the last one)
|
|
257
|
+
if i < len(severity_issues) - 1:
|
|
258
|
+
lines.append(" " + "-" * 50)
|
|
259
|
+
lines.append("")
|
|
260
|
+
lines.append("")
|
|
261
|
+
|
|
262
|
+
# Missing attributes
|
|
263
|
+
if report.missing_attributes:
|
|
264
|
+
lines.append("Missing Required Attributes:")
|
|
265
|
+
for attr in report.missing_attributes:
|
|
266
|
+
lines.append(f" • {attr}")
|
|
267
|
+
lines.append("")
|
|
268
|
+
|
|
269
|
+
# Extra attributes
|
|
270
|
+
if report.extra_attributes:
|
|
271
|
+
lines.append("Extra Attributes (not in specification):")
|
|
272
|
+
for attr in report.extra_attributes:
|
|
273
|
+
lines.append(f" • {attr}")
|
|
274
|
+
lines.append("")
|
|
275
|
+
|
|
276
|
+
lines.append("=" * 60)
|
|
277
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from itertools import combinations, product
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
|
|
7
|
+
from jinja2 import Environment, FileSystemLoader
|
|
8
|
+
from sqlmodel import Session
|
|
9
|
+
|
|
10
|
+
from esgvoc.api import projects, search
|
|
11
|
+
from esgvoc.api.project_specs import CatalogProperty, DrsType
|
|
12
|
+
from esgvoc.core.constants import COMPOSITE_REQUIRED_KEY, DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
|
|
13
|
+
from esgvoc.core.db.models.project import PCollection, PTerm, TermKind
|
|
14
|
+
from esgvoc.core.db.models.universe import UTerm
|
|
15
|
+
from esgvoc.core.exceptions import EsgvocException, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
|
|
16
|
+
|
|
17
|
+
KEY_SEPARATOR = ':'
|
|
18
|
+
TEMPLATE_DIR_NAME = 'templates'
|
|
19
|
+
TEMPLATE_DIR_PATH = Path(__file__).parent.joinpath(TEMPLATE_DIR_NAME)
|
|
20
|
+
TEMPLATE_FILE_NAME = 'template.jinja'
|
|
21
|
+
JSON_INDENTATION = 2
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class _CatalogProperty:
|
|
26
|
+
field_name: str
|
|
27
|
+
field_value: dict
|
|
28
|
+
is_required: bool
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _process_col_plain_terms(collection: PCollection, source_collection_key: str) -> tuple[str, list[str]]:
|
|
32
|
+
property_values: set[str] = set()
|
|
33
|
+
for term in collection.terms:
|
|
34
|
+
property_key, property_value = _process_plain_term(term, source_collection_key)
|
|
35
|
+
property_values.add(property_value)
|
|
36
|
+
# Filter out None values before sorting to avoid TypeError
|
|
37
|
+
filtered_values = [v for v in property_values if v is not None]
|
|
38
|
+
return property_key, sorted(filtered_values) # type: ignore
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _process_plain_term(term: PTerm, source_collection_key: str) -> tuple[str, str]:
|
|
42
|
+
if source_collection_key in term.specs:
|
|
43
|
+
property_value = term.specs[source_collection_key]
|
|
44
|
+
else:
|
|
45
|
+
raise EsgvocNotFoundError(f'missing key {source_collection_key} for term {term.id} in ' +
|
|
46
|
+
f'collection {term.collection.id}')
|
|
47
|
+
return 'enum', property_value
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _process_col_composite_terms(collection: PCollection, universe_session: Session,
|
|
51
|
+
project_session: Session) -> tuple[str, list[str | dict], bool]:
|
|
52
|
+
result: list[str | dict] = list()
|
|
53
|
+
property_key = ""
|
|
54
|
+
has_pattern = False
|
|
55
|
+
for term in collection.terms:
|
|
56
|
+
property_key, property_value, _has_pattern = _process_composite_term(term, universe_session,
|
|
57
|
+
project_session)
|
|
58
|
+
if isinstance(property_value, list):
|
|
59
|
+
result.extend(property_value)
|
|
60
|
+
else:
|
|
61
|
+
result.append(property_value)
|
|
62
|
+
has_pattern |= _has_pattern
|
|
63
|
+
return property_key, result, has_pattern
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _inner_process_composite_term(resolved_term: UTerm | PTerm,
|
|
67
|
+
universe_session: Session,
|
|
68
|
+
project_session: Session) -> tuple[str | list, bool]:
|
|
69
|
+
is_pattern = False
|
|
70
|
+
match resolved_term.kind:
|
|
71
|
+
case TermKind.PLAIN:
|
|
72
|
+
result = resolved_term.specs[DRS_SPECS_JSON_KEY]
|
|
73
|
+
case TermKind.PATTERN:
|
|
74
|
+
result = resolved_term.specs[PATTERN_JSON_KEY].replace('^', '').replace('$', '')
|
|
75
|
+
is_pattern = True
|
|
76
|
+
case TermKind.COMPOSITE:
|
|
77
|
+
_, result, is_pattern = _process_composite_term(resolved_term, universe_session,
|
|
78
|
+
project_session)
|
|
79
|
+
case _:
|
|
80
|
+
msg = f"unsupported term kind '{resolved_term.kind}'"
|
|
81
|
+
raise EsgvocNotImplementedError(msg)
|
|
82
|
+
return result, is_pattern
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _accumulate_resolved_part(resolved_part: list,
|
|
86
|
+
resolved_term: UTerm | PTerm,
|
|
87
|
+
universe_session: Session,
|
|
88
|
+
project_session: Session) -> bool:
|
|
89
|
+
tmp, has_pattern = _inner_process_composite_term(resolved_term, universe_session,
|
|
90
|
+
project_session)
|
|
91
|
+
if isinstance(tmp, list):
|
|
92
|
+
resolved_part.extend(tmp)
|
|
93
|
+
else:
|
|
94
|
+
resolved_part.append(tmp)
|
|
95
|
+
return has_pattern
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _generate_combinations(items_parts: list[list], required_parts: list[bool]) -> list[list]:
|
|
99
|
+
number_of_parts = len(items_parts)
|
|
100
|
+
required_indexes = {index for index, required in enumerate(required_parts) if required}
|
|
101
|
+
result = list()
|
|
102
|
+
# Generate all the combination of item lists.
|
|
103
|
+
for r in range(1, number_of_parts + 1): # Some optional list may or may not be included.
|
|
104
|
+
# According to the doc, combination respect the list order.
|
|
105
|
+
for index_subset in combinations(range(number_of_parts), r):
|
|
106
|
+
# Only keep combinations with the required item lists.
|
|
107
|
+
if required_indexes.issubset(index_subset):
|
|
108
|
+
result.append([items_parts[index] for index in index_subset])
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _process_composite_term(term: UTerm | PTerm, universe_session: Session,
|
|
113
|
+
project_session: Session) -> tuple[str, list[str | dict], bool]:
|
|
114
|
+
items_parts: list[list[str]] = list()
|
|
115
|
+
required_parts: list[bool] = list()
|
|
116
|
+
separator, parts = projects._get_composite_term_separator_parts(term)
|
|
117
|
+
has_pattern = False
|
|
118
|
+
for part in parts:
|
|
119
|
+
resolved_term = projects._resolve_composite_term_part(part, universe_session, project_session)
|
|
120
|
+
resolved_part = list()
|
|
121
|
+
if isinstance(resolved_term, Sequence):
|
|
122
|
+
for r_term in resolved_term:
|
|
123
|
+
has_pattern |= _accumulate_resolved_part(resolved_part, r_term, universe_session,
|
|
124
|
+
project_session)
|
|
125
|
+
else:
|
|
126
|
+
has_pattern = _accumulate_resolved_part(resolved_part, resolved_term, universe_session,
|
|
127
|
+
project_session)
|
|
128
|
+
items_parts.append(resolved_part)
|
|
129
|
+
required_parts.append(part[COMPOSITE_REQUIRED_KEY])
|
|
130
|
+
property_values: list[str | dict] = list()
|
|
131
|
+
combinations = _generate_combinations(items_parts, required_parts)
|
|
132
|
+
for combination in combinations:
|
|
133
|
+
for product_result in product(*combination):
|
|
134
|
+
# Patterns terms are meant to be validated individually.
|
|
135
|
+
# So their regex are defined as a whole (begins by a ^, ends by a $).
|
|
136
|
+
# As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
|
|
137
|
+
# The later, must be removed.
|
|
138
|
+
tmp = separator.join(product_result)
|
|
139
|
+
if has_pattern:
|
|
140
|
+
tmp = f'^{tmp}$'
|
|
141
|
+
tmp = {'pattern': tmp}
|
|
142
|
+
property_values.append(tmp)
|
|
143
|
+
property_key = 'anyOf' if has_pattern else 'enum'
|
|
144
|
+
return property_key, property_values, has_pattern
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _process_col_pattern_terms(collection: PCollection) -> tuple[str, str | list[dict]]:
|
|
148
|
+
if len(collection.terms) == 1:
|
|
149
|
+
term = collection.terms[0]
|
|
150
|
+
property_key, property_value = _process_pattern_term(term)
|
|
151
|
+
else:
|
|
152
|
+
property_key = 'anyOf'
|
|
153
|
+
property_value = list()
|
|
154
|
+
for term in collection.terms:
|
|
155
|
+
pkey, pvalue = _process_pattern_term(term)
|
|
156
|
+
property_value.append({pkey: pvalue})
|
|
157
|
+
return property_key, property_value
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _process_pattern_term(term: PTerm) -> tuple[str, str]:
|
|
161
|
+
return 'pattern', term.specs[PATTERN_JSON_KEY]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class CatalogPropertiesJsonTranslator:
|
|
165
|
+
def __init__(self, project_id: str) -> None:
|
|
166
|
+
self.project_id = project_id
|
|
167
|
+
# Project session can't be None here.
|
|
168
|
+
self.universe_session: Session = search.get_universe_session()
|
|
169
|
+
self.project_session: Session = projects._get_project_session_with_exception(project_id)
|
|
170
|
+
self.collections: dict[str, PCollection] = dict()
|
|
171
|
+
for collection in projects._get_all_collections_in_project(self.project_session):
|
|
172
|
+
self.collections[collection.id] = collection
|
|
173
|
+
|
|
174
|
+
def __exit__(self, exception_type, exception_value, exception_traceback):
|
|
175
|
+
self.project_session.close()
|
|
176
|
+
self.universe_session.close()
|
|
177
|
+
if exception_type is not None:
|
|
178
|
+
raise exception_value
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
def _translate_property_value(self, catalog_property: CatalogProperty) \
|
|
182
|
+
-> tuple[str | None, str | list[str] | list[str | dict] | None]:
|
|
183
|
+
property_key: str | None
|
|
184
|
+
property_value: str | list[str] | list[str | dict] | None
|
|
185
|
+
|
|
186
|
+
# Properties unrelated to collections of project.
|
|
187
|
+
if catalog_property.source_collection is None:
|
|
188
|
+
property_key = None
|
|
189
|
+
property_value = None
|
|
190
|
+
elif catalog_property.source_collection not in self.collections:
|
|
191
|
+
raise EsgvocNotFoundError(f"collection '{catalog_property.source_collection}' is not found")
|
|
192
|
+
else:
|
|
193
|
+
if catalog_property.source_collection_key is None:
|
|
194
|
+
source_collection_key = DRS_SPECS_JSON_KEY
|
|
195
|
+
else:
|
|
196
|
+
source_collection_key = catalog_property.source_collection_key
|
|
197
|
+
|
|
198
|
+
if catalog_property.source_collection_term is None:
|
|
199
|
+
collection = self.collections[catalog_property.source_collection]
|
|
200
|
+
match collection.term_kind:
|
|
201
|
+
case TermKind.PLAIN:
|
|
202
|
+
property_key, property_value = _process_col_plain_terms(
|
|
203
|
+
collection=collection,
|
|
204
|
+
source_collection_key=source_collection_key)
|
|
205
|
+
case TermKind.COMPOSITE:
|
|
206
|
+
property_key, property_value, _ = _process_col_composite_terms(
|
|
207
|
+
collection=collection,
|
|
208
|
+
universe_session=self.universe_session,
|
|
209
|
+
project_session=self.project_session)
|
|
210
|
+
case TermKind.PATTERN:
|
|
211
|
+
property_key, property_value = _process_col_pattern_terms(collection)
|
|
212
|
+
case _:
|
|
213
|
+
msg = f"unsupported term kind '{collection.term_kind}'"
|
|
214
|
+
raise EsgvocNotImplementedError(msg)
|
|
215
|
+
else:
|
|
216
|
+
pterm_found = projects._get_term_in_collection(
|
|
217
|
+
session=self.project_session,
|
|
218
|
+
collection_id=catalog_property.source_collection,
|
|
219
|
+
term_id=catalog_property.source_collection_term)
|
|
220
|
+
if pterm_found is None:
|
|
221
|
+
raise EsgvocValueError(f"term '{catalog_property.source_collection_term}' is not " +
|
|
222
|
+
f"found in collection '{catalog_property.source_collection}'")
|
|
223
|
+
match pterm_found.kind:
|
|
224
|
+
case TermKind.PLAIN:
|
|
225
|
+
property_key, property_value = _process_plain_term(
|
|
226
|
+
term=pterm_found,
|
|
227
|
+
source_collection_key=source_collection_key)
|
|
228
|
+
case TermKind.COMPOSITE:
|
|
229
|
+
property_key, property_value, _ = _process_composite_term(
|
|
230
|
+
term=pterm_found,
|
|
231
|
+
universe_session=self.universe_session,
|
|
232
|
+
project_session=self.project_session)
|
|
233
|
+
case TermKind.PATTERN:
|
|
234
|
+
property_key, property_value = _process_pattern_term(term=pterm_found)
|
|
235
|
+
case _:
|
|
236
|
+
msg = f"unsupported term kind '{pterm_found.kind}'"
|
|
237
|
+
raise EsgvocNotImplementedError(msg)
|
|
238
|
+
return property_key, property_value
|
|
239
|
+
|
|
240
|
+
def translate_property(self, catalog_property: CatalogProperty) -> _CatalogProperty:
|
|
241
|
+
property_key, property_value = self._translate_property_value(catalog_property)
|
|
242
|
+
field_value = dict()
|
|
243
|
+
if 'array' in catalog_property.catalog_field_value_type:
|
|
244
|
+
field_value['type'] = 'array'
|
|
245
|
+
root_property = dict()
|
|
246
|
+
field_value['items'] = root_property
|
|
247
|
+
root_property['type'] = catalog_property.catalog_field_value_type.split('_')[0]
|
|
248
|
+
root_property['minItems'] = 1
|
|
249
|
+
else:
|
|
250
|
+
field_value['type'] = catalog_property.catalog_field_value_type
|
|
251
|
+
root_property = field_value
|
|
252
|
+
|
|
253
|
+
if (property_key is not None) and (property_value is not None):
|
|
254
|
+
root_property[property_key] = property_value
|
|
255
|
+
|
|
256
|
+
if catalog_property.catalog_field_name is None:
|
|
257
|
+
attribute_name = catalog_property.source_collection
|
|
258
|
+
else:
|
|
259
|
+
attribute_name = catalog_property.catalog_field_name
|
|
260
|
+
field_name = CatalogPropertiesJsonTranslator._translate_field_name(self.project_id,
|
|
261
|
+
attribute_name)
|
|
262
|
+
return _CatalogProperty(field_name=field_name,
|
|
263
|
+
field_value=field_value,
|
|
264
|
+
is_required=catalog_property.is_required)
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def _translate_field_name(project_id: str, attribute_name) -> str:
|
|
268
|
+
return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _catalog_properties_json_processor(property_translator: CatalogPropertiesJsonTranslator,
|
|
272
|
+
properties: list[CatalogProperty]) -> list[_CatalogProperty]:
|
|
273
|
+
result: list[_CatalogProperty] = list()
|
|
274
|
+
for dataset_property_spec in properties:
|
|
275
|
+
catalog_property = property_translator.translate_property(dataset_property_spec)
|
|
276
|
+
result.append(catalog_property)
|
|
277
|
+
return result
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def generate_json_schema(project_id: str) -> dict:
|
|
281
|
+
"""
|
|
282
|
+
Generate json schema for the given project.
|
|
283
|
+
|
|
284
|
+
:param project_id: The id of the given project.
|
|
285
|
+
:type project_id: str
|
|
286
|
+
:returns: The root node of a json schema.
|
|
287
|
+
:rtype: dict
|
|
288
|
+
:raises EsgvocValueError: On wrong information in catalog_specs.
|
|
289
|
+
:raises EsgvocNotFoundError: On missing information in catalog_specs.
|
|
290
|
+
:raises EsgvocNotImplementedError: On unexpected operations resulted in wrong information in catalog_specs).
|
|
291
|
+
:raises EsgvocException: On json compliance error.
|
|
292
|
+
"""
|
|
293
|
+
project_specs = projects.get_project(project_id)
|
|
294
|
+
if project_specs is not None:
|
|
295
|
+
catalog_specs = project_specs.catalog_specs
|
|
296
|
+
if catalog_specs is not None:
|
|
297
|
+
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) # noqa: S701
|
|
298
|
+
template = env.get_template(TEMPLATE_FILE_NAME)
|
|
299
|
+
extension_specs = dict()
|
|
300
|
+
for catalog_extension in catalog_specs.catalog_properties.extensions:
|
|
301
|
+
catalog_extension_name = catalog_extension.name.replace('-', '_')
|
|
302
|
+
extension_specs[f'{catalog_extension_name}_extension_version'] = catalog_extension.version
|
|
303
|
+
drs_dataset_id_regex = project_specs.drs_specs[DrsType.DATASET_ID].regex
|
|
304
|
+
property_translator = CatalogPropertiesJsonTranslator(project_id)
|
|
305
|
+
catalog_dataset_properties = \
|
|
306
|
+
_catalog_properties_json_processor(property_translator,
|
|
307
|
+
catalog_specs.dataset_properties)
|
|
308
|
+
|
|
309
|
+
catalog_file_properties = \
|
|
310
|
+
_catalog_properties_json_processor(property_translator,
|
|
311
|
+
catalog_specs.file_properties)
|
|
312
|
+
del property_translator
|
|
313
|
+
json_raw_str = template.render(project_id=project_id,
|
|
314
|
+
catalog_version=catalog_specs.version,
|
|
315
|
+
drs_dataset_id_regex=drs_dataset_id_regex,
|
|
316
|
+
catalog_dataset_properties=catalog_dataset_properties,
|
|
317
|
+
catalog_file_properties=catalog_file_properties,
|
|
318
|
+
**extension_specs)
|
|
319
|
+
# Json compliance checking.
|
|
320
|
+
try:
|
|
321
|
+
result = json.loads(json_raw_str)
|
|
322
|
+
return result
|
|
323
|
+
except Exception as e:
|
|
324
|
+
raise EsgvocException(f'JSON error: {e}. Dump raw:\n{json_raw_str}') from e
|
|
325
|
+
else:
|
|
326
|
+
raise EsgvocNotFoundError(f"catalog properties for the project '{project_id}' " +
|
|
327
|
+
"are missing")
|
|
328
|
+
else:
|
|
329
|
+
raise EsgvocNotFoundError(f"unknown project '{project_id}'")
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def pretty_print_json_node(obj: dict) -> str:
|
|
333
|
+
"""
|
|
334
|
+
Serialize a dictionary into json format.
|
|
335
|
+
|
|
336
|
+
:param obj: The dictionary.
|
|
337
|
+
:type obj: dict
|
|
338
|
+
:returns: a string that represents the dictionary in json format.
|
|
339
|
+
:rtype: str
|
|
340
|
+
"""
|
|
341
|
+
return json.dumps(obj, indent=JSON_INDENTATION)
|