esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,491 @@
1
+ """
2
+ Validation models and logic for NetCDF global attributes.
3
+
4
+ This module provides the core validation functionality for verifying
5
+ NetCDF global attributes against project specifications using the esgvoc API.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, Union
9
+ from enum import Enum
10
+ from pydantic import BaseModel, Field
11
+
12
+ import esgvoc.api.projects as projects
13
+ import esgvoc.api.search as search
14
+ from esgvoc.api.data_descriptors.data_descriptor import ConfiguredBaseModel
15
+ from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
16
+ from esgvoc.api.report import ValidationReport as EsgvocValidationReport
17
+ from esgvoc.core.exceptions import EsgvocNotFoundError, EsgvocDbError
18
+ from .netcdf_header import NetCDFGlobalAttributes
19
+
20
+
21
+ class ValidationSeverity(str, Enum):
22
+ """Severity levels for validation issues."""
23
+
24
+ ERROR = "error"
25
+ WARNING = "warning"
26
+ INFO = "info"
27
+
28
+
29
+ class ValidationIssue(ConfiguredBaseModel):
30
+ """
31
+ Represents a validation issue found during attribute validation.
32
+ """
33
+
34
+ attribute_name: str = Field(..., description="Name of the attribute with the issue")
35
+ severity: ValidationSeverity = Field(..., description="Severity level of the issue")
36
+ message: str = Field(..., description="Human-readable description of the issue")
37
+ expected_value: Optional[Any] = Field(default=None, description="Expected value if applicable")
38
+ actual_value: Optional[Any] = Field(default=None, description="Actual value found")
39
+ source_collection: Optional[str] = Field(default=None, description="Source collection for the attribute")
40
+
41
+
42
+ class ValidationReport(ConfiguredBaseModel):
43
+ """
44
+ Complete validation report for a NetCDF file's global attributes.
45
+ """
46
+
47
+ filename: Optional[str] = Field(default=None, description="NetCDF filename")
48
+ project_id: str = Field(..., description="Project ID used for validation")
49
+ is_valid: bool = Field(..., description="Overall validation status")
50
+ issues: List[ValidationIssue] = Field(default_factory=list, description="List of validation issues")
51
+ validated_attributes: Dict[str, Any] = Field(default_factory=dict, description="Successfully validated attributes")
52
+ mapping_used: Dict[str, str] = Field(default_factory=dict, description="Mapping of attribute names to validated term values")
53
+ missing_attributes: List[str] = Field(default_factory=list, description="Required attributes that are missing")
54
+ extra_attributes: List[str] = Field(default_factory=list, description="Extra attributes not in specification")
55
+
56
+ @property
57
+ def error_count(self) -> int:
58
+ """Number of error-level issues."""
59
+ return len([issue for issue in self.issues if issue.severity == ValidationSeverity.ERROR])
60
+
61
+ @property
62
+ def warning_count(self) -> int:
63
+ """Number of warning-level issues."""
64
+ return len([issue for issue in self.issues if issue.severity == ValidationSeverity.WARNING])
65
+
66
+ @property
67
+ def info_count(self) -> int:
68
+ """Number of info-level issues."""
69
+ return len([issue for issue in self.issues if issue.severity == ValidationSeverity.INFO])
70
+
71
+ def add_issue(self, issue: ValidationIssue) -> None:
72
+ """Add a validation issue to the report."""
73
+ self.issues.append(issue)
74
+ if issue.severity == ValidationSeverity.ERROR:
75
+ self.is_valid = False
76
+
77
+ def get_issues_by_severity(self, severity: ValidationSeverity) -> List[ValidationIssue]:
78
+ """Get all issues of a specific severity level."""
79
+ return [issue for issue in self.issues if issue.severity == severity]
80
+
81
+ def summary(self) -> str:
82
+ """Get a summary of the validation report."""
83
+ status = "VALID" if self.is_valid else "INVALID"
84
+ return (
85
+ f"Validation {status}: {self.error_count} errors, "
86
+ f"{self.warning_count} warnings, {self.info_count} info messages"
87
+ )
88
+
89
+
90
+ class ESGVocAttributeValidator:
91
+ """
92
+ Validator to validate attributes against ESGVOC controlled vocabularies.
93
+ """
94
+
95
+ def __init__(self, project_id: str = "cmip6"):
96
+ """
97
+ Initialize the validator.
98
+
99
+ :param project_id: Project identifier for ESGVOC queries
100
+ """
101
+ self.project_id = project_id
102
+ self.validation_results = {}
103
+
104
+ def visit_base_attribute(
105
+ self, attribute_name: str, attribute: AttributeProperty, attribute_value: Any
106
+ ) -> Dict[str, Any]:
107
+ """
108
+ Validate a base global attribute against ESGVOC using proper validation logic.
109
+
110
+ :param attribute_name: Name of the attribute
111
+ :param attribute: Attribute specification
112
+ :param attribute_value: The actual attribute value to validate
113
+ :return: Validation result
114
+ """
115
+ try:
116
+ # Convert value to string for validation
117
+ value_str = str(attribute_value).strip()
118
+
119
+ # Use esgvoc's proper validation function that handles Plain, Pattern, and Composite terms
120
+ matching_terms = projects.valid_term_in_collection(
121
+ value=value_str, project_id=self.project_id, collection_id=attribute.source_collection
122
+ )
123
+
124
+ # Validation is successful if we have any matching terms
125
+ is_valid = len(matching_terms) > 0
126
+
127
+ # Get available terms for error reporting if validation failed
128
+ available_examples = []
129
+ available_terms_full = []
130
+ if not is_valid:
131
+ try:
132
+ # Get some example terms from the collection for error reporting
133
+ all_terms = projects.get_all_terms_in_collection(
134
+ project_id=self.project_id,
135
+ collection_id=attribute.source_collection,
136
+ selected_term_fields=None,
137
+ )
138
+ available_examples = [term.id for term in all_terms[:3]] # Just IDs for quick reference
139
+ available_terms_full = [term.model_dump() for term in all_terms[:3]] # Full term objects
140
+ except:
141
+ available_examples = []
142
+ available_terms_full = []
143
+
144
+ return {
145
+ "attribute_name": attribute_name,
146
+ "source_collection": attribute.source_collection,
147
+ "value_type": attribute.value_type,
148
+ "validation_method": "esgvoc_validation",
149
+ "is_valid": is_valid,
150
+ "actual_value": value_str,
151
+ "matching_terms": [
152
+ {"project_id": term.project_id, "collection_id": term.collection_id, "term_id": term.term_id}
153
+ for term in matching_terms
154
+ ],
155
+ "available_examples": available_examples,
156
+ "available_terms_full": available_terms_full,
157
+ "total_matches": len(matching_terms),
158
+ }
159
+
160
+ except (EsgvocNotFoundError, EsgvocDbError) as e:
161
+ return {
162
+ "attribute_name": attribute_name,
163
+ "source_collection": attribute.source_collection,
164
+ "validation_method": "esgvoc_validation",
165
+ "is_valid": False,
166
+ "error": str(e),
167
+ "error_type": type(e).__name__,
168
+ }
169
+
170
+ def visit_specific_attribute(
171
+ self, attribute_name: str, attribute: AttributeProperty, attribute_value: Any
172
+ ) -> Dict[str, Any]:
173
+ """
174
+ Validate a specific key attribute against ESGVOC.
175
+
176
+ For specific_key attributes, we need to validate the value against the specific field
177
+ of terms in the collection (e.g., validate experiment description against experiment_id collection).
178
+
179
+ :param attribute_name: Name of the attribute
180
+ :param attribute: Attribute specification with specific_key
181
+ :param attribute_value: The actual attribute value to validate
182
+ :return: Validation result
183
+ """
184
+ try:
185
+ specific_key = getattr(attribute, "specific_key", None)
186
+ if not specific_key:
187
+ return {
188
+ "attribute_name": attribute_name,
189
+ "validation_method": "specific_key_lookup",
190
+ "is_valid": False,
191
+ "error": "No specific_key defined in attribute specification",
192
+ }
193
+
194
+ value_str = str(attribute_value).strip()
195
+
196
+ # Get all terms from the source collection
197
+ all_terms = projects.get_all_terms_in_collection(
198
+ project_id=self.project_id,
199
+ collection_id=attribute.source_collection,
200
+ selected_term_fields=None,
201
+ )
202
+
203
+ # Check if the value matches the specific_key field of any term
204
+ found_match = False
205
+ matched_terms = []
206
+ available_values = []
207
+ available_terms_full = []
208
+
209
+ for term in all_terms:
210
+ # Access the specific field from the term
211
+ term_dict = term.model_dump()
212
+ specific_value = term_dict.get(specific_key)
213
+
214
+ if specific_value:
215
+ available_values.append(str(specific_value))
216
+ # Check if the specific value matches our attribute value
217
+ if str(specific_value).strip() == value_str:
218
+ found_match = True
219
+ matched_terms.append({"term_id": term.id, "specific_value": specific_value})
220
+
221
+ # Get a few full term examples for error reporting
222
+ if not found_match and all_terms:
223
+ available_terms_full = [term.model_dump() for term in all_terms[:3]]
224
+
225
+ return {
226
+ "attribute_name": attribute_name,
227
+ "source_collection": attribute.source_collection,
228
+ "specific_key": specific_key,
229
+ "value_type": attribute.value_type,
230
+ "validation_method": "specific_key_lookup",
231
+ "is_valid": found_match,
232
+ "actual_value": value_str,
233
+ "matching_terms": matched_terms,
234
+ "available_examples": list(set(available_values))[:3], # Unique values, limited to 3
235
+ "available_terms_full": available_terms_full,
236
+ "total_available": len(set(available_values)),
237
+ }
238
+
239
+ except (EsgvocNotFoundError, EsgvocDbError) as e:
240
+ return {
241
+ "attribute_name": attribute_name,
242
+ "source_collection": attribute.source_collection,
243
+ "specific_key": getattr(attribute, "specific_key", None),
244
+ "validation_method": "specific_key_lookup",
245
+ "is_valid": False,
246
+ "error": str(e),
247
+ "error_type": type(e).__name__,
248
+ }
249
+
250
+
251
+ class GlobalAttributeValidator:
252
+ """
253
+ Main validator class for NetCDF global attributes.
254
+ """
255
+
256
+ def __init__(self, attribute_specs: AttributeSpecification, project_id: str = "cmip6"):
257
+ """
258
+ Initialize the validator with attribute specifications.
259
+
260
+ :param attribute_specs: Global attribute specifications (list of AttributeProperty)
261
+ :param project_id: Project identifier
262
+ """
263
+ self.attribute_specs = attribute_specs
264
+ self.project_id = project_id
265
+ self.esgvoc_validator = ESGVocAttributeValidator(project_id)
266
+
267
+ def _get_field_name(self, spec: AttributeProperty) -> str:
268
+ """Get the effective field name for an AttributeProperty."""
269
+ return spec.field_name or spec.source_collection
270
+
271
+ def _get_spec_by_field_name(self, field_name: str) -> Optional[AttributeProperty]:
272
+ """Find an AttributeProperty by its field name."""
273
+ for spec in self.attribute_specs:
274
+ if self._get_field_name(spec) == field_name:
275
+ return spec
276
+ return None
277
+
278
+ def validate(self, global_attributes: NetCDFGlobalAttributes, filename: Optional[str] = None) -> ValidationReport:
279
+ """
280
+ Validate global attributes against specifications.
281
+
282
+ :param global_attributes: NetCDF global attributes to validate
283
+ :param filename: Optional filename for reporting
284
+ :return: Validation report
285
+ """
286
+ report = ValidationReport(filename=filename, project_id=self.project_id, is_valid=True)
287
+
288
+ # Check for missing required attributes
289
+ self._check_missing_attributes(global_attributes, report)
290
+
291
+ # Validate present attributes
292
+ self._validate_present_attributes(global_attributes, report)
293
+
294
+ # Check for extra attributes
295
+ self._check_extra_attributes(global_attributes, report)
296
+
297
+ return report
298
+
299
+ def _check_missing_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
300
+ """Check for missing required attributes."""
301
+ for spec in self.attribute_specs:
302
+ field_name = self._get_field_name(spec)
303
+ if spec.is_required and not global_attributes.has_attribute(field_name):
304
+ report.missing_attributes.append(field_name)
305
+ report.add_issue(
306
+ ValidationIssue(
307
+ attribute_name=field_name,
308
+ severity=ValidationSeverity.ERROR,
309
+ message=f"Required attribute '{field_name}' is missing",
310
+ source_collection=spec.source_collection,
311
+ )
312
+ )
313
+
314
+ def _validate_present_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
315
+ """Validate attributes that are present."""
316
+ for attr_name in global_attributes.list_attributes():
317
+ spec = self._get_spec_by_field_name(attr_name)
318
+ if spec is not None:
319
+ attr_value = global_attributes.get_attribute(attr_name)
320
+
321
+ # Validate value type
322
+ self._validate_value_type(attr_name, attr_value, spec, report)
323
+
324
+ # Use visitor pattern for ESGVOC validation
325
+ if spec.specific_key is not None:
326
+ validation_result = self.esgvoc_validator.visit_specific_attribute(attr_name, spec, attr_value)
327
+ else:
328
+ validation_result = self.esgvoc_validator.visit_base_attribute(attr_name, spec, attr_value)
329
+
330
+ # Process validation result and add any issues to report
331
+ self._process_esgvoc_validation_result(validation_result, report)
332
+
333
+ # If validation passes, add to validated attributes
334
+ if not any(
335
+ issue.attribute_name == attr_name and issue.severity == ValidationSeverity.ERROR
336
+ for issue in report.issues
337
+ ):
338
+ report.validated_attributes[attr_name] = attr_value
339
+ report.mapping_used[attr_name] = str(attr_value)
340
+
341
+ def _check_extra_attributes(self, global_attributes: NetCDFGlobalAttributes, report: ValidationReport) -> None:
342
+ """Check for extra attributes not in specifications."""
343
+ for attr_name in global_attributes.list_attributes():
344
+ if self._get_spec_by_field_name(attr_name) is None:
345
+ report.extra_attributes.append(attr_name)
346
+ report.add_issue(
347
+ ValidationIssue(
348
+ attribute_name=attr_name,
349
+ severity=ValidationSeverity.INFO,
350
+ message=f"Extra attribute '{attr_name}' not defined in specifications",
351
+ actual_value=global_attributes.get_attribute(attr_name),
352
+ )
353
+ )
354
+
355
+ def _validate_value_type(
356
+ self, attr_name: str, value: Any, spec: AttributeProperty, report: ValidationReport
357
+ ) -> None:
358
+ """Validate the type of an attribute value."""
359
+ expected_type = spec.value_type
360
+
361
+ # Type validation logic
362
+ type_valid = False
363
+
364
+ if expected_type == "string":
365
+ type_valid = isinstance(value, str)
366
+ elif expected_type == "integer":
367
+ type_valid = isinstance(value, int)
368
+ elif expected_type == "float":
369
+ type_valid = isinstance(value, (int, float))
370
+
371
+ if not type_valid:
372
+ report.add_issue(
373
+ ValidationIssue(
374
+ attribute_name=attr_name,
375
+ severity=ValidationSeverity.ERROR,
376
+ message=f"Attribute '{attr_name}' has incorrect type. Expected {expected_type}, got {type(value).__name__}",
377
+ expected_value=expected_type,
378
+ actual_value=type(value).__name__,
379
+ source_collection=spec.source_collection,
380
+ )
381
+ )
382
+
383
+ def _process_esgvoc_validation_result(self, validation_result: Dict[str, Any], report: ValidationReport) -> None:
384
+ """Process the result from ESGVOC validation and add issues to report."""
385
+ attr_name = validation_result.get("attribute_name")
386
+ is_valid = validation_result.get("is_valid", False)
387
+
388
+ if not is_valid:
389
+ severity = ValidationSeverity.ERROR
390
+ error_msg = validation_result.get("error")
391
+ actual_value = validation_result.get("actual_value", "N/A")
392
+
393
+ if error_msg:
394
+ # Error during validation (e.g., collection not found)
395
+ error_type = validation_result.get("error_type", "ValidationError")
396
+ message = f"ESGVOC validation failed for '{attr_name}' (value: '{actual_value}'): {error_msg}"
397
+ else:
398
+ # Value not found in controlled vocabulary
399
+ source_collection = validation_result.get("source_collection")
400
+ validation_method = validation_result.get("validation_method")
401
+
402
+ if validation_method == "specific_key_lookup":
403
+ specific_key = validation_result.get("specific_key")
404
+ message = (
405
+ f"Value '{actual_value}' not found in controlled vocabulary. "
406
+ f"Looking for '{specific_key}' field in collection '{source_collection}'"
407
+ )
408
+ else:
409
+ message = f"Value '{actual_value}' not found in controlled vocabulary '{source_collection}'"
410
+
411
+ # Add available examples with full term information
412
+ available_terms_full = validation_result.get("available_terms_full", [])
413
+ if available_terms_full:
414
+ message += f"\n\nExample valid terms (showing {len(available_terms_full)}):"
415
+ for i, term in enumerate(available_terms_full, 1):
416
+ term_id = term.get("id", "N/A")
417
+ term_type = term.get("type", "N/A")
418
+ message += f"\n {i}. ID: '{term_id}' (type: {term_type})"
419
+
420
+ # Show relevant fields based on validation method
421
+ if validation_method == "specific_key_lookup":
422
+ specific_key = validation_result.get("specific_key")
423
+ specific_value = term.get(specific_key, "N/A")
424
+ message += f"\n {specific_key}: '{specific_value}'"
425
+
426
+ # Show a few other useful fields
427
+ for field in ["drs_name", "description", "name"]:
428
+ if field in term and term[field]:
429
+ message += f"\n {field}: '{term[field]}'"
430
+ message += "\n"
431
+
432
+ report.add_issue(
433
+ ValidationIssue(
434
+ attribute_name=attr_name,
435
+ severity=severity,
436
+ message=message,
437
+ expected_value=validation_result.get("available_examples"),
438
+ actual_value=actual_value,
439
+ source_collection=validation_result.get("source_collection"),
440
+ )
441
+ )
442
+
443
+
444
+ class ValidatorFactory:
445
+ """
446
+ Factory class for creating validators with different configurations.
447
+ """
448
+
449
+ @staticmethod
450
+ def create_from_yaml_file(yaml_file_path: str, project_id: str = "cmip6") -> GlobalAttributeValidator:
451
+ """
452
+ Create validator from YAML configuration file.
453
+
454
+ :param yaml_file_path: Path to YAML configuration file
455
+ :param project_id: Project identifier
456
+ :return: Configured GlobalAttributeValidator
457
+ """
458
+ import yaml
459
+
460
+ with open(yaml_file_path, "r") as f:
461
+ yaml_data = yaml.safe_load(f)
462
+
463
+ # Parse YAML data into list of AttributeProperty
464
+ if isinstance(yaml_data, list):
465
+ attribute_specs = [AttributeProperty(**item) for item in yaml_data]
466
+ elif isinstance(yaml_data, dict) and "specs" in yaml_data:
467
+ # Legacy dict format support
468
+ if isinstance(yaml_data["specs"], list):
469
+ attribute_specs = [AttributeProperty(**item) for item in yaml_data["specs"]]
470
+ else:
471
+ # Old dict-based format - convert to list
472
+ specs_list = []
473
+ for attr_name, attr_config in yaml_data["specs"].items():
474
+ spec_data = {
475
+ "source_collection": attr_config.get("source_collection"),
476
+ "is_required": attr_config.get("required", True),
477
+ "value_type": attr_config.get("value_type", "string"),
478
+ }
479
+ if attr_name != attr_config.get("source_collection"):
480
+ spec_data["field_name"] = attr_name
481
+ if "specific_key" in attr_config:
482
+ spec_data["specific_key"] = attr_config["specific_key"]
483
+ if "default_value" in attr_config:
484
+ spec_data["default_value"] = attr_config["default_value"]
485
+ specs_list.append(AttributeProperty(**spec_data))
486
+ attribute_specs = specs_list
487
+ else:
488
+ raise ValueError(f"Unsupported YAML format: {type(yaml_data)}")
489
+
490
+ return GlobalAttributeValidator(attribute_specs, project_id)
491
+
@@ -0,0 +1,161 @@
1
+ """
2
+ Tests for the GA (Global Attributes) validator.
3
+
4
+ Run with: python -m pytest src/esgvoc/apps/ga/test_ga.py
5
+ """
6
+
7
+ import pytest
8
+
9
+ from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
10
+ from .models import (
11
+ NetCDFHeaderParser,
12
+ ValidationSeverity,
13
+ )
14
+ from .validator import GAValidator
15
+
16
+
17
+ class TestNetCDFHeaderParser:
18
+ """Test the NetCDF header parser."""
19
+
20
+ def test_parse_simple_header(self):
21
+ """Test parsing a simple NetCDF header."""
22
+ ncdump_output = """netcdf test_file {
23
+ dimensions:
24
+ time = UNLIMITED ; // (12 currently)
25
+ lat = 180 ;
26
+ lon = 360 ;
27
+ variables:
28
+ double time(time) ;
29
+ time:units = "days since 1850-01-01" ;
30
+ time:calendar = "gregorian" ;
31
+
32
+ // global attributes:
33
+ :Conventions = "CF-1.7" ;
34
+ :title = "Test NetCDF file" ;
35
+ :institution = "Test Institution" ;
36
+ }"""
37
+
38
+ header = NetCDFHeaderParser.parse_from_ncdump(ncdump_output)
39
+
40
+ assert header.filename == "test_file"
41
+ assert len(header.dimensions) == 3
42
+ assert "time" in header.dimensions
43
+ assert header.dimensions["time"].is_unlimited
44
+ assert header.dimensions["lat"].size == 180
45
+
46
+ assert len(header.variables) == 1
47
+ assert "time" in header.variables
48
+ assert header.variables["time"].data_type == "double"
49
+
50
+ assert len(header.global_attributes.attributes) == 3
51
+ assert header.global_attributes.get_attribute("Conventions") == "CF-1.7"
52
+ assert header.global_attributes.get_attribute("title") == "Test NetCDF file"
53
+ assert header.global_attributes.has_attribute("institution")
54
+
55
+
56
+ class TestGAValidator:
57
+ """Test the GA validator."""
58
+
59
+ def test_validator_initialization(self):
60
+ """Test validator initialization from database."""
61
+ validator = GAValidator(project_id="cmip6")
62
+ assert validator.project_id == "cmip6"
63
+ assert validator.attribute_specs is not None
64
+
65
+ def test_validation_with_simple_attributes(self):
66
+ """Test validation with a simple attributes dictionary."""
67
+ validator = GAValidator(project_id="cmip6")
68
+
69
+ # Test with minimal required attributes
70
+ attributes = {
71
+ "Conventions": "CF-1.7 CMIP-6.2",
72
+ "activity_id": "CMIP",
73
+ "creation_date": "2019-04-30T17:44:13Z",
74
+ "data_specs_version": "01.00.29",
75
+ "experiment_id": "historical",
76
+ "forcing_index": 1,
77
+ "frequency": "mon",
78
+ "grid_label": "gn",
79
+ "initialization_index": 1,
80
+ "institution_id": "CCCma",
81
+ "mip_era": "CMIP6",
82
+ "nominal_resolution": "500 km",
83
+ "physics_index": 1,
84
+ "realization_index": 11,
85
+ "source_id": "CanESM5",
86
+ "table_id": "Amon",
87
+ "tracking_id": "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e",
88
+ "variable_id": "tas",
89
+ "variant_label": "r11i1p1f1",
90
+ }
91
+
92
+ report = validator.validate_from_attributes_dict(attributes, "test.nc")
93
+
94
+ # Basic checks
95
+ assert report is not None
96
+ assert report.project_id == "cmip6"
97
+ assert report.filename == "test.nc"
98
+ assert isinstance(report.is_valid, bool)
99
+ assert isinstance(report.issues, list)
100
+ assert isinstance(report.error_count, int)
101
+ assert isinstance(report.warning_count, int)
102
+
103
+ def test_get_required_attributes(self):
104
+ """Test getting required attributes list."""
105
+ validator = GAValidator(project_id="cmip6")
106
+ required_attrs = validator.get_required_attributes()
107
+
108
+ assert isinstance(required_attrs, list)
109
+ assert len(required_attrs) > 0
110
+
111
+ # Should include some standard CMIP6 required attributes
112
+ expected_attrs = ["Conventions", "activity_id", "experiment_id", "variable_id"]
113
+ for attr in expected_attrs:
114
+ if attr in validator.list_attributes():
115
+ # Only check if the attribute is defined in the specs
116
+ info = validator.get_attribute_info(attr)
117
+ if info and info.get("required"):
118
+ assert attr in required_attrs
119
+
120
+ def test_attribute_info(self):
121
+ """Test getting attribute information."""
122
+ validator = GAValidator(project_id="cmip6")
123
+
124
+ # Test with a common attribute
125
+ if "activity_id" in validator.list_attributes():
126
+ info = validator.get_attribute_info("activity_id")
127
+ assert info is not None
128
+ assert "name" in info
129
+ assert "source_collection" in info
130
+ assert "value_type" in info
131
+ assert "required" in info
132
+
133
+ # Test with non-existent attribute
134
+ info = validator.get_attribute_info("non_existent_attribute")
135
+ assert info is None
136
+
137
+
138
+ if __name__ == "__main__":
139
+ # Run basic tests when executed directly
140
+ print("Running basic GA validator tests...")
141
+
142
+ # Test 1: Parse NetCDF header
143
+ print("Test 1: NetCDF header parsing")
144
+ test = TestNetCDFHeaderParser()
145
+ try:
146
+ test.test_parse_simple_header()
147
+ print(" ✓ PASSED")
148
+ except Exception as e:
149
+ print(f" ✗ FAILED: {e}")
150
+
151
+ # Test 2: Validator initialization
152
+ print("Test 2: Validator initialization")
153
+ test_validator = TestGAValidator()
154
+ try:
155
+ test_validator.test_validator_initialization()
156
+ print(" ✓ PASSED")
157
+ except Exception as e:
158
+ print(f" ✗ FAILED: {e}")
159
+
160
+ print("Basic tests completed!")
161
+