esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validation models and logic for NetCDF global attributes.
|
|
3
|
+
|
|
4
|
+
This module provides the core validation functionality for verifying
|
|
5
|
+
NetCDF global attributes against project specifications using the esgvoc API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional, Union
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
import esgvoc.api.projects as projects
|
|
13
|
+
import esgvoc.api.search as search
|
|
14
|
+
from esgvoc.api.data_descriptors.data_descriptor import ConfiguredBaseModel
|
|
15
|
+
from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
|
|
16
|
+
from esgvoc.api.report import ValidationReport as EsgvocValidationReport
|
|
17
|
+
from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocDbError
|
|
18
|
+
from .netcdf_header import NetCDFGlobalAttributes
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ValidationSeverity(str, Enum):
|
|
22
|
+
"""Severity levels for validation issues."""
|
|
23
|
+
|
|
24
|
+
ERROR = "error"
|
|
25
|
+
WARNING = "warning"
|
|
26
|
+
INFO = "info"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ValidationIssue(ConfiguredBaseModel):
|
|
30
|
+
"""
|
|
31
|
+
Represents a validation issue found during attribute validation.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
attribute_name: str = Field(..., description="Name of the attribute with the issue")
|
|
35
|
+
severity: ValidationSeverity = Field(..., description="Severity level of the issue")
|
|
36
|
+
message: str = Field(..., description="Human-readable description of the issue")
|
|
37
|
+
expected_value: Optional[Any] = Field(default=None, description="Expected value if applicable")
|
|
38
|
+
actual_value: Optional[Any] = Field(default=None, description="Actual value found")
|
|
39
|
+
source_collection: Optional[str] = Field(default=None, description="Source collection for the attribute")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ValidationReport(ConfiguredBaseModel):
|
|
43
|
+
"""
|
|
44
|
+
Complete validation report for a NetCDF file's global attributes.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
filename: Optional[str] = Field(default=None, description="NetCDF filename")
|
|
48
|
+
project_id: str = Field(..., description="Project ID used for validation")
|
|
49
|
+
is_valid: bool = Field(..., description="Overall validation status")
|
|
50
|
+
issues: List[ValidationIssue] = Field(default_factory=list, description="List of validation issues")
|
|
51
|
+
validated_attributes: Dict[str, Any] = Field(default_factory=dict, description="Successfully validated attributes")
|
|
52
|
+
mapping_used: Dict[str, str] = Field(default_factory=dict, description="Mapping of attribute names to validated term values")
|
|
53
|
+
missing_attributes: List[str] = Field(default_factory=list, description="Required attributes that are missing")
|
|
54
|
+
extra_attributes: List[str] = Field(default_factory=list, description="Extra attributes not in specification")
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def error_count(self) -> int:
|
|
58
|
+
"""Number of error-level issues."""
|
|
59
|
+
return len([issue for issue in self.issues if issue.severity == ValidationSeverity.ERROR])
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def warning_count(self) -> int:
|
|
63
|
+
"""Number of warning-level issues."""
|
|
64
|
+
return len([issue for issue in self.issues if issue.severity == ValidationSeverity.WARNING])
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def info_count(self) -> int:
|
|
68
|
+
"""Number of info-level issues."""
|
|
69
|
+
return len([issue for issue in self.issues if issue.severity == ValidationSeverity.INFO])
|
|
70
|
+
|
|
71
|
+
def add_issue(self, issue: ValidationIssue) -> None:
|
|
72
|
+
"""Add a validation issue to the report."""
|
|
73
|
+
self.issues.append(issue)
|
|
74
|
+
if issue.severity == ValidationSeverity.ERROR:
|
|
75
|
+
self.is_valid = False
|
|
76
|
+
|
|
77
|
+
def get_issues_by_severity(self, severity: ValidationSeverity) -> List[ValidationIssue]:
|
|
78
|
+
"""Get all issues of a specific severity level."""
|
|
79
|
+
return [issue for issue in self.issues if issue.severity == severity]
|
|
80
|
+
|
|
81
|
+
def summary(self) -> str:
|
|
82
|
+
"""Get a summary of the validation report."""
|
|
83
|
+
status = "VALID" if self.is_valid else "INVALID"
|
|
84
|
+
return (
|
|
85
|
+
f"Validation {status}: {self.error_count} errors, "
|
|
86
|
+
f"{self.warning_count} warnings, {self.info_count} info messages"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ESGVocAttributeValidator:
|
|
91
|
+
"""
|
|
92
|
+
Validator to validate attributes against ESGVOC controlled vocabularies.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, project_id: str = "cmip6"):
|
|
96
|
+
"""
|
|
97
|
+
Initialize the validator.
|
|
98
|
+
|
|
99
|
+
:param project_id: Project identifier for ESGVOC queries
|
|
100
|
+
"""
|
|
101
|
+
self.project_id = project_id
|
|
102
|
+
self.validation_results = {}
|
|
103
|
+
|
|
104
|
+
def visit_base_attribute(
|
|
105
|
+
self, attribute_name: str, attribute: AttributeProperty, attribute_value: Any
|
|
106
|
+
) -> Dict[str, Any]:
|
|
107
|
+
"""
|
|
108
|
+
Validate a base global attribute against ESGVOC using proper validation logic.
|
|
109
|
+
|
|
110
|
+
:param attribute_name: Name of the attribute
|
|
111
|
+
:param attribute: Attribute specification
|
|
112
|
+
:param attribute_value: The actual attribute value to validate
|
|
113
|
+
:return: Validation result
|
|
114
|
+
"""
|
|
115
|
+
try:
|
|
116
|
+
# Convert value to string for validation
|
|
117
|
+
value_str = str(attribute_value).strip()
|
|
118
|
+
|
|
119
|
+
# Use esgvoc's proper validation function that handles Plain, Pattern, and Composite terms
|
|
120
|
+
matching_terms = projects.valid_term_in_collection(
|
|
121
|
+
value=value_str, project_id=self.project_id, collection_id=attribute.source_collection
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Validation is successful if we have any matching terms
|
|
125
|
+
is_valid = len(matching_terms) > 0
|
|
126
|
+
|
|
127
|
+
# Get available terms for error reporting if validation failed
|
|
128
|
+
available_examples = []
|
|
129
|
+
available_terms_full = []
|
|
130
|
+
if not is_valid:
|
|
131
|
+
try:
|
|
132
|
+
# Get some example terms from the collection for error reporting
|
|
133
|
+
all_terms = projects.get_all_terms_in_collection(
|
|
134
|
+
project_id=self.project_id,
|
|
135
|
+
collection_id=attribute.source_collection,
|
|
136
|
+
selected_term_fields=None,
|
|
137
|
+
)
|
|
138
|
+
available_examples = [term.id for term in all_terms[:3]] # Just IDs for quick reference
|
|
139
|
+
available_terms_full = [term.model_dump() for term in all_terms[:3]] # Full term objects
|
|
140
|
+
except:
|
|
141
|
+
available_examples = []
|
|
142
|
+
available_terms_full = []
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
"attribute_name": attribute_name,
|
|
146
|
+
"source_collection": attribute.source_collection,
|
|
147
|
+
"value_type": attribute.value_type,
|
|
148
|
+
"validation_method": "esgvoc_validation",
|
|
149
|
+
"is_valid": is_valid,
|
|
150
|
+
"actual_value": value_str,
|
|
151
|
+
"matching_terms": [
|
|
152
|
+
{"project_id": term.project_id, "collection_id": term.collection_id, "term_id": term.term_id}
|
|
153
|
+
for term in matching_terms
|
|
154
|
+
],
|
|
155
|
+
"available_examples": available_examples,
|
|
156
|
+
"available_terms_full": available_terms_full,
|
|
157
|
+
"total_matches": len(matching_terms),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
except (EsgvocNotFoundError, EsgvocDbError) as e:
|
|
161
|
+
return {
|
|
162
|
+
"attribute_name": attribute_name,
|
|
163
|
+
"source_collection": attribute.source_collection,
|
|
164
|
+
"validation_method": "esgvoc_validation",
|
|
165
|
+
"is_valid": False,
|
|
166
|
+
"error": str(e),
|
|
167
|
+
"error_type": type(e).__name__,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
def visit_specific_attribute(
|
|
171
|
+
self, attribute_name: str, attribute: AttributeProperty, attribute_value: Any
|
|
172
|
+
) -> Dict[str, Any]:
|
|
173
|
+
"""
|
|
174
|
+
Validate a specific key attribute against ESGVOC.
|
|
175
|
+
|
|
176
|
+
For specific_key attributes, we need to validate the value against the specific field
|
|
177
|
+
of terms in the collection (e.g., validate experiment description against experiment_id collection).
|
|
178
|
+
|
|
179
|
+
:param attribute_name: Name of the attribute
|
|
180
|
+
:param attribute: Attribute specification with specific_key
|
|
181
|
+
:param attribute_value: The actual attribute value to validate
|
|
182
|
+
:return: Validation result
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
specific_key = getattr(attribute, "specific_key", None)
|
|
186
|
+
if not specific_key:
|
|
187
|
+
return {
|
|
188
|
+
"attribute_name": attribute_name,
|
|
189
|
+
"validation_method": "specific_key_lookup",
|
|
190
|
+
"is_valid": False,
|
|
191
|
+
"error": "No specific_key defined in attribute specification",
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
value_str = str(attribute_value).strip()
|
|
195
|
+
|
|
196
|
+
# Get all terms from the source collection
|
|
197
|
+
all_terms = projects.get_all_terms_in_collection(
|
|
198
|
+
project_id=self.project_id,
|
|
199
|
+
collection_id=attribute.source_collection,
|
|
200
|
+
selected_term_fields=None,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Check if the value matches the specific_key field of any term
|
|
204
|
+
found_match = False
|
|
205
|
+
matched_terms = []
|
|
206
|
+
available_values = []
|
|
207
|
+
available_terms_full = []
|
|
208
|
+
|
|
209
|
+
for term in all_terms:
|
|
210
|
+
# Access the specific field from the term
|
|
211
|
+
term_dict = term.model_dump()
|
|
212
|
+
specific_value = term_dict.get(specific_key)
|
|
213
|
+
|
|
214
|
+
if specific_value:
|
|
215
|
+
available_values.append(str(specific_value))
|
|
216
|
+
# Check if the specific value matches our attribute value
|
|
217
|
+
if str(specific_value).strip() == value_str:
|
|
218
|
+
found_match = True
|
|
219
|
+
matched_terms.append({"term_id": term.id, "specific_value": specific_value})
|
|
220
|
+
|
|
221
|
+
# Get a few full term examples for error reporting
|
|
222
|
+
if not found_match and all_terms:
|
|
223
|
+
available_terms_full = [term.model_dump() for term in all_terms[:3]]
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
"attribute_name": attribute_name,
|
|
227
|
+
"source_collection": attribute.source_collection,
|
|
228
|
+
"specific_key": specific_key,
|
|
229
|
+
"value_type": attribute.value_type,
|
|
230
|
+
"validation_method": "specific_key_lookup",
|
|
231
|
+
"is_valid": found_match,
|
|
232
|
+
"actual_value": value_str,
|
|
233
|
+
"matching_terms": matched_terms,
|
|
234
|
+
"available_examples": list(set(available_values))[:3], # Unique values, limited to 3
|
|
235
|
+
"available_terms_full": available_terms_full,
|
|
236
|
+
"total_available": len(set(available_values)),
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
except (EsgvocNotFoundError, EsgvocDbError) as e:
|
|
240
|
+
return {
|
|
241
|
+
"attribute_name": attribute_name,
|
|
242
|
+
"source_collection": attribute.source_collection,
|
|
243
|
+
"specific_key": getattr(attribute, "specific_key", None),
|
|
244
|
+
"validation_method": "specific_key_lookup",
|
|
245
|
+
"is_valid": False,
|
|
246
|
+
"error": str(e),
|
|
247
|
+
"error_type": type(e).__name__,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class GlobalAttributeValidator:
|
|
252
|
+
"""
|
|
253
|
+
Main validator class for NetCDF global attributes.
|
|
254
|
+
"""
|
|
255
|
+
|
|
256
|
+
def __init__(self, attribute_specs: AttributeSpecification, project_id: str = "cmip6"):
|
|
257
|
+
"""
|
|
258
|
+
Initialize the validator with attribute specifications.
|
|
259
|
+
|
|
260
|
+
:param attribute_specs: Global attribute specifications (list of AttributeProperty)
|
|
261
|
+
:param project_id: Project identifier
|
|
262
|
+
"""
|
|
263
|
+
self.attribute_specs = attribute_specs
|
|
264
|
+
self.project_id = project_id
|
|
265
|
+
self.esgvoc_validator = ESGVocAttributeValidator(project_id)
|
|
266
|
+
|
|
267
|
+
def _get_field_name(self, spec: AttributeProperty) -> str:
|
|
268
|
+
"""Get the effective field name for an AttributeProperty."""
|
|
269
|
+
return spec.field_name or spec.source_collection
|
|
270
|
+
|
|
271
|
+
def _get_spec_by_field_name(self, field_name: str) -> Optional[AttributeProperty]:
|
|
272
|
+
"""Find an AttributeProperty by its field name."""
|
|
273
|
+
for spec in self.attribute_specs:
|
|
274
|
+
if self._get_field_name(spec) == field_name:
|
|
275
|
+
return spec
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
def validate(self, global_attributes: NetCDFGlobalAttributes, filename: Optional[str] = None) -> ValidationReport:
|
|
279
|
+
"""
|
|
280
|
+
Validate global attributes against specifications.
|
|
281
|
+
|
|
282
|
+
:param global_attributes: NetCDF global attributes to validate
|
|
283
|
+
:param filename: Optional filename for reporting
|
|
284
|
+
:return: Validation report
|
|
285
|
+
"""
|
|
286
|
+
report = ValidationReport(filename=filename, project_id=self.project_id, is_valid=True)
|
|
287
|
+
|
|
288
|
+
# Check for missing required attributes
|
|
289
|
+
self._check_missing_attributes(global_attributes, report)
|
|
290
|
+
|
|
291
|
+
# Validate present attributes
|
|
292
|
+
self._validate_present_attributes(global_attributes, report)
|
|
293
|
+
|
|
294
|
+
# Check for extra attributes
|
|
295
|
+
self._check_extra_attributes(global_attributes, report)
|
|
296
|
+
|
|
297
|
+
return report
|
|
298
|
+
|
|
299
|
+
def _check_missing_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
|
|
300
|
+
"""Check for missing required attributes."""
|
|
301
|
+
for spec in self.attribute_specs:
|
|
302
|
+
field_name = self._get_field_name(spec)
|
|
303
|
+
if spec.is_required and not global_attributes.has_attribute(field_name):
|
|
304
|
+
report.missing_attributes.append(field_name)
|
|
305
|
+
report.add_issue(
|
|
306
|
+
ValidationIssue(
|
|
307
|
+
attribute_name=field_name,
|
|
308
|
+
severity=ValidationSeverity.ERROR,
|
|
309
|
+
message=f"Required attribute '{field_name}' is missing",
|
|
310
|
+
source_collection=spec.source_collection,
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def _validate_present_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
|
|
315
|
+
"""Validate attributes that are present."""
|
|
316
|
+
for attr_name in global_attributes.list_attributes():
|
|
317
|
+
spec = self._get_spec_by_field_name(attr_name)
|
|
318
|
+
if spec is not None:
|
|
319
|
+
attr_value = global_attributes.get_attribute(attr_name)
|
|
320
|
+
|
|
321
|
+
# Validate value type
|
|
322
|
+
self._validate_value_type(attr_name, attr_value, spec, report)
|
|
323
|
+
|
|
324
|
+
# Use visitor pattern for ESGVOC validation
|
|
325
|
+
if spec.specific_key is not None:
|
|
326
|
+
validation_result = self.esgvoc_validator.visit_specific_attribute(attr_name, spec, attr_value)
|
|
327
|
+
else:
|
|
328
|
+
validation_result = self.esgvoc_validator.visit_base_attribute(attr_name, spec, attr_value)
|
|
329
|
+
|
|
330
|
+
# Process validation result and add any issues to report
|
|
331
|
+
self._process_esgvoc_validation_result(validation_result, report)
|
|
332
|
+
|
|
333
|
+
# If validation passes, add to validated attributes
|
|
334
|
+
if not any(
|
|
335
|
+
issue.attribute_name == attr_name and issue.severity == ValidationSeverity.ERROR
|
|
336
|
+
for issue in report.issues
|
|
337
|
+
):
|
|
338
|
+
report.validated_attributes[attr_name] = attr_value
|
|
339
|
+
report.mapping_used[attr_name] = str(attr_value)
|
|
340
|
+
|
|
341
|
+
def _check_extra_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
|
|
342
|
+
"""Check for extra attributes not in specifications."""
|
|
343
|
+
for attr_name in global_attributes.list_attributes():
|
|
344
|
+
if self._get_spec_by_field_name(attr_name) is None:
|
|
345
|
+
report.extra_attributes.append(attr_name)
|
|
346
|
+
report.add_issue(
|
|
347
|
+
ValidationIssue(
|
|
348
|
+
attribute_name=attr_name,
|
|
349
|
+
severity=ValidationSeverity.INFO,
|
|
350
|
+
message=f"Extra attribute '{attr_name}' not defined in specifications",
|
|
351
|
+
actual_value=global_attributes.get_attribute(attr_name),
|
|
352
|
+
)
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def _validate_value_type(
|
|
356
|
+
self, attr_name: str, value: Any, spec: AttributeProperty, report: ValidationReport
|
|
357
|
+
) -> None:
|
|
358
|
+
"""Validate the type of an attribute value."""
|
|
359
|
+
expected_type = spec.value_type
|
|
360
|
+
|
|
361
|
+
# Type validation logic
|
|
362
|
+
type_valid = False
|
|
363
|
+
|
|
364
|
+
if expected_type == "string":
|
|
365
|
+
type_valid = isinstance(value, str)
|
|
366
|
+
elif expected_type == "integer":
|
|
367
|
+
type_valid = isinstance(value, int)
|
|
368
|
+
elif expected_type == "float":
|
|
369
|
+
type_valid = isinstance(value, (int, float))
|
|
370
|
+
|
|
371
|
+
if not type_valid:
|
|
372
|
+
report.add_issue(
|
|
373
|
+
ValidationIssue(
|
|
374
|
+
attribute_name=attr_name,
|
|
375
|
+
severity=ValidationSeverity.ERROR,
|
|
376
|
+
message=f"Attribute '{attr_name}' has incorrect type. Expected {expected_type}, got {type(value).__name__}",
|
|
377
|
+
expected_value=expected_type,
|
|
378
|
+
actual_value=type(value).__name__,
|
|
379
|
+
source_collection=spec.source_collection,
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
def _process_esgvoc_validation_result(self, validation_result: Dict[str, Any], report: ValidationReport) -> None:
|
|
384
|
+
"""Process the result from ESGVOC validation and add issues to report."""
|
|
385
|
+
attr_name = validation_result.get("attribute_name")
|
|
386
|
+
is_valid = validation_result.get("is_valid", False)
|
|
387
|
+
|
|
388
|
+
if not is_valid:
|
|
389
|
+
severity = ValidationSeverity.ERROR
|
|
390
|
+
error_msg = validation_result.get("error")
|
|
391
|
+
actual_value = validation_result.get("actual_value", "N/A")
|
|
392
|
+
|
|
393
|
+
if error_msg:
|
|
394
|
+
# Error during validation (e.g., collection not found)
|
|
395
|
+
error_type = validation_result.get("error_type", "ValidationError")
|
|
396
|
+
message = f"ESGVOC validation failed for '{attr_name}' (value: '{actual_value}'): {error_msg}"
|
|
397
|
+
else:
|
|
398
|
+
# Value not found in controlled vocabulary
|
|
399
|
+
source_collection = validation_result.get("source_collection")
|
|
400
|
+
validation_method = validation_result.get("validation_method")
|
|
401
|
+
|
|
402
|
+
if validation_method == "specific_key_lookup":
|
|
403
|
+
specific_key = validation_result.get("specific_key")
|
|
404
|
+
message = (
|
|
405
|
+
f"Value '{actual_value}' not found in controlled vocabulary. "
|
|
406
|
+
f"Looking for '{specific_key}' field in collection '{source_collection}'"
|
|
407
|
+
)
|
|
408
|
+
else:
|
|
409
|
+
message = f"Value '{actual_value}' not found in controlled vocabulary '{source_collection}'"
|
|
410
|
+
|
|
411
|
+
# Add available examples with full term information
|
|
412
|
+
available_terms_full = validation_result.get("available_terms_full", [])
|
|
413
|
+
if available_terms_full:
|
|
414
|
+
message += f"\n\nExample valid terms (showing {len(available_terms_full)}):"
|
|
415
|
+
for i, term in enumerate(available_terms_full, 1):
|
|
416
|
+
term_id = term.get("id", "N/A")
|
|
417
|
+
term_type = term.get("type", "N/A")
|
|
418
|
+
message += f"\n {i}. ID: '{term_id}' (type: {term_type})"
|
|
419
|
+
|
|
420
|
+
# Show relevant fields based on validation method
|
|
421
|
+
if validation_method == "specific_key_lookup":
|
|
422
|
+
specific_key = validation_result.get("specific_key")
|
|
423
|
+
specific_value = term.get(specific_key, "N/A")
|
|
424
|
+
message += f"\n {specific_key}: '{specific_value}'"
|
|
425
|
+
|
|
426
|
+
# Show a few other useful fields
|
|
427
|
+
for field in ["drs_name", "description", "name"]:
|
|
428
|
+
if field in term and term[field]:
|
|
429
|
+
message += f"\n {field}: '{term[field]}'"
|
|
430
|
+
message += "\n"
|
|
431
|
+
|
|
432
|
+
report.add_issue(
|
|
433
|
+
ValidationIssue(
|
|
434
|
+
attribute_name=attr_name,
|
|
435
|
+
severity=severity,
|
|
436
|
+
message=message,
|
|
437
|
+
expected_value=validation_result.get("available_examples"),
|
|
438
|
+
actual_value=actual_value,
|
|
439
|
+
source_collection=validation_result.get("source_collection"),
|
|
440
|
+
)
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class ValidatorFactory:
|
|
445
|
+
"""
|
|
446
|
+
Factory class for creating validators with different configurations.
|
|
447
|
+
"""
|
|
448
|
+
|
|
449
|
+
@staticmethod
|
|
450
|
+
def create_from_yaml_file(yaml_file_path: str, project_id: str = "cmip6") -> GlobalAttributeValidator:
|
|
451
|
+
"""
|
|
452
|
+
Create validator from YAML configuration file.
|
|
453
|
+
|
|
454
|
+
:param yaml_file_path: Path to YAML configuration file
|
|
455
|
+
:param project_id: Project identifier
|
|
456
|
+
:return: Configured GlobalAttributeValidator
|
|
457
|
+
"""
|
|
458
|
+
import yaml
|
|
459
|
+
|
|
460
|
+
with open(yaml_file_path, "r") as f:
|
|
461
|
+
yaml_data = yaml.safe_load(f)
|
|
462
|
+
|
|
463
|
+
# Parse YAML data into list of AttributeProperty
|
|
464
|
+
if isinstance(yaml_data, list):
|
|
465
|
+
attribute_specs = [AttributeProperty(**item) for item in yaml_data]
|
|
466
|
+
elif isinstance(yaml_data, dict) and "specs" in yaml_data:
|
|
467
|
+
# Legacy dict format support
|
|
468
|
+
if isinstance(yaml_data["specs"], list):
|
|
469
|
+
attribute_specs = [AttributeProperty(**item) for item in yaml_data["specs"]]
|
|
470
|
+
else:
|
|
471
|
+
# Old dict-based format - convert to list
|
|
472
|
+
specs_list = []
|
|
473
|
+
for attr_name, attr_config in yaml_data["specs"].items():
|
|
474
|
+
spec_data = {
|
|
475
|
+
"source_collection": attr_config.get("source_collection"),
|
|
476
|
+
"is_required": attr_config.get("required", True),
|
|
477
|
+
"value_type": attr_config.get("value_type", "string"),
|
|
478
|
+
}
|
|
479
|
+
if attr_name != attr_config.get("source_collection"):
|
|
480
|
+
spec_data["field_name"] = attr_name
|
|
481
|
+
if "specific_key" in attr_config:
|
|
482
|
+
spec_data["specific_key"] = attr_config["specific_key"]
|
|
483
|
+
if "default_value" in attr_config:
|
|
484
|
+
spec_data["default_value"] = attr_config["default_value"]
|
|
485
|
+
specs_list.append(AttributeProperty(**spec_data))
|
|
486
|
+
attribute_specs = specs_list
|
|
487
|
+
else:
|
|
488
|
+
raise ValueError(f"Unsupported YAML format: {type(yaml_data)}")
|
|
489
|
+
|
|
490
|
+
return GlobalAttributeValidator(attribute_specs, project_id)
|
|
491
|
+
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for the GA (Global Attributes) validator.
|
|
3
|
+
|
|
4
|
+
Run with: python -m pytest src/esgvoc/apps/ga/test_ga.py
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
|
|
10
|
+
from .models import (
|
|
11
|
+
NetCDFHeaderParser,
|
|
12
|
+
ValidationSeverity,
|
|
13
|
+
)
|
|
14
|
+
from .validator import GAValidator
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestNetCDFHeaderParser:
|
|
18
|
+
"""Test the NetCDF header parser."""
|
|
19
|
+
|
|
20
|
+
def test_parse_simple_header(self):
|
|
21
|
+
"""Test parsing a simple NetCDF header."""
|
|
22
|
+
ncdump_output = """netcdf test_file {
|
|
23
|
+
dimensions:
|
|
24
|
+
time = UNLIMITED ; // (12 currently)
|
|
25
|
+
lat = 180 ;
|
|
26
|
+
lon = 360 ;
|
|
27
|
+
variables:
|
|
28
|
+
double time(time) ;
|
|
29
|
+
time:units = "days since 1850-01-01" ;
|
|
30
|
+
time:calendar = "gregorian" ;
|
|
31
|
+
|
|
32
|
+
// global attributes:
|
|
33
|
+
:Conventions = "CF-1.7" ;
|
|
34
|
+
:title = "Test NetCDF file" ;
|
|
35
|
+
:institution = "Test Institution" ;
|
|
36
|
+
}"""
|
|
37
|
+
|
|
38
|
+
header = NetCDFHeaderParser.parse_from_ncdump(ncdump_output)
|
|
39
|
+
|
|
40
|
+
assert header.filename == "test_file"
|
|
41
|
+
assert len(header.dimensions) == 3
|
|
42
|
+
assert "time" in header.dimensions
|
|
43
|
+
assert header.dimensions["time"].is_unlimited
|
|
44
|
+
assert header.dimensions["lat"].size == 180
|
|
45
|
+
|
|
46
|
+
assert len(header.variables) == 1
|
|
47
|
+
assert "time" in header.variables
|
|
48
|
+
assert header.variables["time"].data_type == "double"
|
|
49
|
+
|
|
50
|
+
assert len(header.global_attributes.attributes) == 3
|
|
51
|
+
assert header.global_attributes.get_attribute("Conventions") == "CF-1.7"
|
|
52
|
+
assert header.global_attributes.get_attribute("title") == "Test NetCDF file"
|
|
53
|
+
assert header.global_attributes.has_attribute("institution")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestGAValidator:
|
|
57
|
+
"""Test the GA validator."""
|
|
58
|
+
|
|
59
|
+
def test_validator_initialization(self):
|
|
60
|
+
"""Test validator initialization from database."""
|
|
61
|
+
validator = GAValidator(project_id="cmip6")
|
|
62
|
+
assert validator.project_id == "cmip6"
|
|
63
|
+
assert validator.attribute_specs is not None
|
|
64
|
+
|
|
65
|
+
def test_validation_with_simple_attributes(self):
|
|
66
|
+
"""Test validation with a simple attributes dictionary."""
|
|
67
|
+
validator = GAValidator(project_id="cmip6")
|
|
68
|
+
|
|
69
|
+
# Test with minimal required attributes
|
|
70
|
+
attributes = {
|
|
71
|
+
"Conventions": "CF-1.7 CMIP-6.2",
|
|
72
|
+
"activity_id": "CMIP",
|
|
73
|
+
"creation_date": "2019-04-30T17:44:13Z",
|
|
74
|
+
"data_specs_version": "01.00.29",
|
|
75
|
+
"experiment_id": "historical",
|
|
76
|
+
"forcing_index": 1,
|
|
77
|
+
"frequency": "mon",
|
|
78
|
+
"grid_label": "gn",
|
|
79
|
+
"initialization_index": 1,
|
|
80
|
+
"institution_id": "CCCma",
|
|
81
|
+
"mip_era": "CMIP6",
|
|
82
|
+
"nominal_resolution": "500 km",
|
|
83
|
+
"physics_index": 1,
|
|
84
|
+
"realization_index": 11,
|
|
85
|
+
"source_id": "CanESM5",
|
|
86
|
+
"table_id": "Amon",
|
|
87
|
+
"tracking_id": "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e",
|
|
88
|
+
"variable_id": "tas",
|
|
89
|
+
"variant_label": "r11i1p1f1",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
report = validator.validate_from_attributes_dict(attributes, "test.nc")
|
|
93
|
+
|
|
94
|
+
# Basic checks
|
|
95
|
+
assert report is not None
|
|
96
|
+
assert report.project_id == "cmip6"
|
|
97
|
+
assert report.filename == "test.nc"
|
|
98
|
+
assert isinstance(report.is_valid, bool)
|
|
99
|
+
assert isinstance(report.issues, list)
|
|
100
|
+
assert isinstance(report.error_count, int)
|
|
101
|
+
assert isinstance(report.warning_count, int)
|
|
102
|
+
|
|
103
|
+
def test_get_required_attributes(self):
|
|
104
|
+
"""Test getting required attributes list."""
|
|
105
|
+
validator = GAValidator(project_id="cmip6")
|
|
106
|
+
required_attrs = validator.get_required_attributes()
|
|
107
|
+
|
|
108
|
+
assert isinstance(required_attrs, list)
|
|
109
|
+
assert len(required_attrs) > 0
|
|
110
|
+
|
|
111
|
+
# Should include some standard CMIP6 required attributes
|
|
112
|
+
expected_attrs = ["Conventions", "activity_id", "experiment_id", "variable_id"]
|
|
113
|
+
for attr in expected_attrs:
|
|
114
|
+
if attr in validator.list_attributes():
|
|
115
|
+
# Only check if the attribute is defined in the specs
|
|
116
|
+
info = validator.get_attribute_info(attr)
|
|
117
|
+
if info and info.get("required"):
|
|
118
|
+
assert attr in required_attrs
|
|
119
|
+
|
|
120
|
+
def test_attribute_info(self):
|
|
121
|
+
"""Test getting attribute information."""
|
|
122
|
+
validator = GAValidator(project_id="cmip6")
|
|
123
|
+
|
|
124
|
+
# Test with a common attribute
|
|
125
|
+
if "activity_id" in validator.list_attributes():
|
|
126
|
+
info = validator.get_attribute_info("activity_id")
|
|
127
|
+
assert info is not None
|
|
128
|
+
assert "name" in info
|
|
129
|
+
assert "source_collection" in info
|
|
130
|
+
assert "value_type" in info
|
|
131
|
+
assert "required" in info
|
|
132
|
+
|
|
133
|
+
# Test with non-existent attribute
|
|
134
|
+
info = validator.get_attribute_info("non_existent_attribute")
|
|
135
|
+
assert info is None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
if __name__ == "__main__":
|
|
139
|
+
# Run basic tests when executed directly
|
|
140
|
+
print("Running basic GA validator tests...")
|
|
141
|
+
|
|
142
|
+
# Test 1: Parse NetCDF header
|
|
143
|
+
print("Test 1: NetCDF header parsing")
|
|
144
|
+
test = TestNetCDFHeaderParser()
|
|
145
|
+
try:
|
|
146
|
+
test.test_parse_simple_header()
|
|
147
|
+
print(" ✓ PASSED")
|
|
148
|
+
except Exception as e:
|
|
149
|
+
print(f" ✗ FAILED: {e}")
|
|
150
|
+
|
|
151
|
+
# Test 2: Validator initialization
|
|
152
|
+
print("Test 2: Validator initialization")
|
|
153
|
+
test_validator = TestGAValidator()
|
|
154
|
+
try:
|
|
155
|
+
test_validator.test_validator_initialization()
|
|
156
|
+
print(" ✓ PASSED")
|
|
157
|
+
except Exception as e:
|
|
158
|
+
print(f" ✗ FAILED: {e}")
|
|
159
|
+
|
|
160
|
+
print("Basic tests completed!")
|
|
161
|
+
|