PyPI - esgvoc - Versions diffs - 2.0.2__py3-none-any.whl - Mend

esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

esgvoc/__init__.py +3 -0
esgvoc/api/__init__.py +91 -0
esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
esgvoc/api/data_descriptors/__init__.py +159 -0
esgvoc/api/data_descriptors/activity.py +72 -0
esgvoc/api/data_descriptors/archive.py +5 -0
esgvoc/api/data_descriptors/area_label.py +30 -0
esgvoc/api/data_descriptors/branded_suffix.py +30 -0
esgvoc/api/data_descriptors/branded_variable.py +21 -0
esgvoc/api/data_descriptors/citation_url.py +5 -0
esgvoc/api/data_descriptors/contact.py +5 -0
esgvoc/api/data_descriptors/conventions.py +28 -0
esgvoc/api/data_descriptors/creation_date.py +18 -0
esgvoc/api/data_descriptors/data_descriptor.py +127 -0
esgvoc/api/data_descriptors/data_specs_version.py +25 -0
esgvoc/api/data_descriptors/date.py +5 -0
esgvoc/api/data_descriptors/directory_date.py +22 -0
esgvoc/api/data_descriptors/drs_specs.py +38 -0
esgvoc/api/data_descriptors/experiment.py +215 -0
esgvoc/api/data_descriptors/forcing_index.py +21 -0
esgvoc/api/data_descriptors/frequency.py +48 -0
esgvoc/api/data_descriptors/further_info_url.py +5 -0
esgvoc/api/data_descriptors/grid.py +43 -0
esgvoc/api/data_descriptors/horizontal_label.py +20 -0
esgvoc/api/data_descriptors/initialization_index.py +27 -0
esgvoc/api/data_descriptors/institution.py +80 -0
esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
esgvoc/api/data_descriptors/license.py +31 -0
esgvoc/api/data_descriptors/member_id.py +9 -0
esgvoc/api/data_descriptors/mip_era.py +26 -0
esgvoc/api/data_descriptors/model_component.py +32 -0
esgvoc/api/data_descriptors/models_test/models.py +17 -0
esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
esgvoc/api/data_descriptors/obs_type.py +5 -0
esgvoc/api/data_descriptors/organisation.py +22 -0
esgvoc/api/data_descriptors/physics_index.py +21 -0
esgvoc/api/data_descriptors/product.py +16 -0
esgvoc/api/data_descriptors/publication_status.py +5 -0
esgvoc/api/data_descriptors/realization_index.py +24 -0
esgvoc/api/data_descriptors/realm.py +16 -0
esgvoc/api/data_descriptors/regex.py +5 -0
esgvoc/api/data_descriptors/region.py +35 -0
esgvoc/api/data_descriptors/resolution.py +7 -0
esgvoc/api/data_descriptors/source.py +120 -0
esgvoc/api/data_descriptors/source_type.py +5 -0
esgvoc/api/data_descriptors/sub_experiment.py +5 -0
esgvoc/api/data_descriptors/table.py +28 -0
esgvoc/api/data_descriptors/temporal_label.py +20 -0
esgvoc/api/data_descriptors/time_range.py +17 -0
esgvoc/api/data_descriptors/title.py +5 -0
esgvoc/api/data_descriptors/tracking_id.py +67 -0
esgvoc/api/data_descriptors/variable.py +56 -0
esgvoc/api/data_descriptors/variant_label.py +25 -0
esgvoc/api/data_descriptors/vertical_label.py +20 -0
esgvoc/api/project_specs.py +143 -0
esgvoc/api/projects.py +1253 -0
esgvoc/api/py.typed +0 -0
esgvoc/api/pydantic_handler.py +146 -0
esgvoc/api/report.py +127 -0
esgvoc/api/search.py +171 -0
esgvoc/api/universe.py +434 -0
esgvoc/apps/__init__.py +6 -0
esgvoc/apps/cmor_tables/__init__.py +7 -0
esgvoc/apps/cmor_tables/cvs_table.py +948 -0
esgvoc/apps/drs/__init__.py +0 -0
esgvoc/apps/drs/constants.py +2 -0
esgvoc/apps/drs/generator.py +429 -0
esgvoc/apps/drs/report.py +540 -0
esgvoc/apps/drs/validator.py +312 -0
esgvoc/apps/ga/__init__.py +104 -0
esgvoc/apps/ga/example_usage.py +315 -0
esgvoc/apps/ga/models/__init__.py +47 -0
esgvoc/apps/ga/models/netcdf_header.py +306 -0
esgvoc/apps/ga/models/validator.py +491 -0
esgvoc/apps/ga/test_ga.py +161 -0
esgvoc/apps/ga/validator.py +277 -0
esgvoc/apps/jsg/json_schema_generator.py +341 -0
esgvoc/apps/jsg/templates/template.jinja +241 -0
esgvoc/apps/test_cv/README.md +214 -0
esgvoc/apps/test_cv/__init__.py +0 -0
esgvoc/apps/test_cv/cv_tester.py +1611 -0
esgvoc/apps/test_cv/example_usage.py +216 -0
esgvoc/apps/vr/__init__.py +12 -0
esgvoc/apps/vr/build_variable_registry.py +71 -0
esgvoc/apps/vr/example_usage.py +60 -0
esgvoc/apps/vr/vr_app.py +333 -0
esgvoc/cli/clean.py +304 -0
esgvoc/cli/cmor.py +46 -0
esgvoc/cli/config.py +1300 -0
esgvoc/cli/drs.py +267 -0
esgvoc/cli/find.py +138 -0
esgvoc/cli/get.py +155 -0
esgvoc/cli/install.py +41 -0
esgvoc/cli/main.py +60 -0
esgvoc/cli/offline.py +269 -0
esgvoc/cli/status.py +79 -0
esgvoc/cli/test_cv.py +258 -0
esgvoc/cli/valid.py +147 -0
esgvoc/core/constants.py +17 -0
esgvoc/core/convert.py +0 -0
esgvoc/core/data_handler.py +206 -0
esgvoc/core/db/__init__.py +3 -0
esgvoc/core/db/connection.py +40 -0
esgvoc/core/db/models/mixins.py +25 -0
esgvoc/core/db/models/project.py +102 -0
esgvoc/core/db/models/universe.py +98 -0
esgvoc/core/db/project_ingestion.py +231 -0
esgvoc/core/db/universe_ingestion.py +172 -0
esgvoc/core/exceptions.py +33 -0
esgvoc/core/logging_handler.py +26 -0
esgvoc/core/repo_fetcher.py +345 -0
esgvoc/core/service/__init__.py +41 -0
esgvoc/core/service/configuration/config_manager.py +196 -0
esgvoc/core/service/configuration/setting.py +363 -0
esgvoc/core/service/data_merger.py +634 -0
esgvoc/core/service/esg_voc.py +77 -0
esgvoc/core/service/resolver_config.py +56 -0
esgvoc/core/service/state.py +324 -0
esgvoc/core/service/string_heuristics.py +98 -0
esgvoc/core/service/term_cache.py +108 -0
esgvoc/core/service/uri_resolver.py +133 -0
esgvoc-2.0.2.dist-info/METADATA +82 -0
esgvoc-2.0.2.dist-info/RECORD +147 -0
esgvoc-2.0.2.dist-info/WHEEL +4 -0
esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0

esgvoc/apps/drs/validator.py ADDED Viewed

@@ -0,0 +1,312 @@
+from typing import cast
+import esgvoc.api.projects as projects
+import esgvoc.apps.drs.constants as constants
+from esgvoc.api.project_specs import (
+    DrsPart,
+    DrsSpecification,
+    DrsType,
+    ProjectSpecs,
+)
+from esgvoc.apps.drs.report import (
+    BlankTerm,
+    ComplianceIssue,
+    DrsIssue,
+    DrsValidationReport,
+    ExtraChar,
+    ExtraSeparator,
+    ExtraTerm,
+    FileNameExtensionIssue,
+    InvalidTerm,
+    MissingTerm,
+    ParsingIssue,
+    Space,
+    Unparsable,
+    ValidationError,
+    ValidationWarning,
+)
+from esgvoc.core.exceptions import EsgvocDbError, EsgvocNotFoundError
+class DrsApplication:
+    """
+    Generic DRS application class.
+    """
+    def __init__(self, project_id: str, pedantic: bool = False) -> None:
+        self.project_id: str = project_id
+        """The project id."""
+        self.pedantic: bool = pedantic
+        """Same as the option of GCC: turn warnings into errors. Default False."""
+        project_specs: ProjectSpecs | None = projects.get_project(project_id)
+        if not project_specs or project_specs.drs_specs is None:
+            raise EsgvocNotFoundError(f"unable to find project spec or only drs_spec for '{project_id}'")
+        self.directory_specs: DrsSpecification = project_specs.drs_specs[DrsType.DIRECTORY]
+        """The DRS directory specs of the project."""
+        self.file_name_specs: DrsSpecification = project_specs.drs_specs[DrsType.FILE_NAME]
+        """The DRS file name specs of the project."""
+        self.dataset_id_specs: DrsSpecification = project_specs.drs_specs[DrsType.DATASET_ID]
+        """The DRS dataset id specs of the project."""
+    def _get_full_file_name_extension(self) -> str:
+        """
+        Returns the full file name extension (the separator plus the extension) of the DRS file
+        name specs of the project.
+        :returns: The full file name extension.
+        :rtype: str
+        """
+        specs: DrsSpecification = self.file_name_specs
+        if specs.properties:
+            full_extension = (
+                specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY]
+                + specs.properties[constants.FILE_NAME_EXTENSION_KEY]
+            )
+        else:
+            raise EsgvocDbError(
+                "missing properties in the DRS file name specifications of the " + f"project '{self.project_id}'"
+            )
+        return full_extension
+class DrsValidator(DrsApplication):
+    """
+    Valid a DRS directory, dataset id and file name expression against a project.
+    """
+    def validate_directory(self, drs_expression: str, prefix: str | None = None) -> DrsValidationReport:
+        """
+        Validate a DRS directory expression.
+        :param drs_expression: A DRS directory expression.
+        :type drs_expression: str
+        :param prefix: A directory prefix to be removed from the directory expression.
+        :type prefix: str|None
+        :returns: A validation report.
+        :rtype: DrsValidationReport
+        """
+        if prefix:
+            # Remove prefix if present. Always returns a copy.
+            drs_expression = drs_expression.removeprefix(prefix)
+        return self._validate(drs_expression, self.directory_specs)
+    def validate_dataset_id(self, drs_expression: str) -> DrsValidationReport:
+        """
+        Validate a DRS dataset id expression.
+        :param drs_expression: A DRS dataset id expression.
+        :type drs_expression: str
+        :returns: A validation report.
+        :rtype: DrsValidationReport
+        """
+        return self._validate(drs_expression, self.dataset_id_specs)
+    def validate_file_name(self, drs_expression: str) -> DrsValidationReport:
+        """
+        Validate a file name expression.
+        :param drs_expression: A DRS file name expression.
+        :type drs_expression: str
+        :returns: A validation report.
+        :rtype: DrsValidationReport
+        """
+        full_extension = self._get_full_file_name_extension()
+        if drs_expression.endswith(full_extension):
+            drs_expression = drs_expression.replace(full_extension, "")
+            result = self._validate(drs_expression, self.file_name_specs)
+        else:
+            issue = FileNameExtensionIssue(expected_extension=full_extension)
+            result = self._create_report(self.file_name_specs.type, drs_expression, [issue], [])
+        return result
+    def validate(self, drs_expression: str, drs_type: DrsType | str) -> DrsValidationReport:
+        """
+        Validate a DRS expression.
+        :param drs_expression: A DRS expression.
+        :type drs_expression: str
+        :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
+        :type drs_type: DrsType|str
+        :returns: A validation report.
+        :rtype: DrsValidationReport
+        """
+        match drs_type:
+            case DrsType.DIRECTORY:
+                return self.validate_directory(drs_expression=drs_expression)
+            case DrsType.FILE_NAME:
+                return self.validate_file_name(drs_expression=drs_expression)
+            case DrsType.DATASET_ID:
+                return self.validate_dataset_id(drs_expression=drs_expression)
+            case _:
+                raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
+    def _parse(
+        self, drs_expression: str, separator: str, drs_type: DrsType
+    ) -> tuple[
+        list[str] | None,  # terms
+        list[DrsIssue],  # Errors
+        list[DrsIssue],
+    ]:  # Warnings
+        errors: list[DrsIssue] = list()
+        warnings: list[DrsIssue] = list()
+        cursor_offset = 0
+        # Spaces at the beginning/end of expression:
+        start_with_space = drs_expression[0].isspace()
+        end_with_space = drs_expression[-1].isspace()
+        if start_with_space or end_with_space:
+            issue: ParsingIssue = Space()
+            if self.pedantic:
+                errors.append(issue)
+            else:
+                warnings.append(issue)
+            if start_with_space:
+                previous_len = len(drs_expression)
+                drs_expression = drs_expression.lstrip()
+                cursor_offset = previous_len - len(drs_expression)
+            if end_with_space:
+                drs_expression = drs_expression.rstrip()
+        terms = drs_expression.split(separator)
+        if len(terms) < 2:
+            errors.append(Unparsable(expected_drs_type=drs_type))
+            return None, errors, warnings  # Early exit
+        max_term_index = len(terms)
+        cursor_position = initial_cursor_position = len(drs_expression) + 1
+        has_white_term = False
+        for index in range(max_term_index - 1, -1, -1):
+            term = terms[index]
+            if (is_white_term := term.isspace()) or (not term):
+                has_white_term = has_white_term or is_white_term
+                cursor_position -= len(term) + 1
+                del terms[index]
+                continue
+            else:
+                break
+        if cursor_position != initial_cursor_position:
+            max_term_index = len(terms)
+            column = cursor_position + cursor_offset
+            if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
+                issue = ExtraSeparator(column=column)
+                if self.pedantic:
+                    errors.append(issue)
+                else:
+                    warnings.append(issue)
+            else:
+                issue = ExtraChar(column=column)
+                errors.append(issue)
+        for index in range(max_term_index - 1, -1, -1):
+            term = terms[index]
+            len_term = len(term)
+            if not term:
+                column = cursor_position + cursor_offset
+                issue = ExtraSeparator(column=column)
+                if self.pedantic or drs_type != DrsType.DIRECTORY or index == 0:
+                    errors.append(issue)
+                else:
+                    warnings.append(issue)
+                del terms[index]
+            if term.isspace():
+                column = cursor_position + cursor_offset - len_term
+                issue = BlankTerm(column=column)
+                errors.append(issue)
+                del terms[index]
+            cursor_position -= len_term + 1
+        # Mypy doesn't understand that ParsingIssues are DrsIssues...
+        sorted_errors = DrsValidator._sort_parser_issues(errors)  # type: ignore
+        sorted_warnings = DrsValidator._sort_parser_issues(warnings)  # type: ignore
+        return terms, sorted_errors, sorted_warnings  # type: ignore
+    @staticmethod
+    def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
+        return sorted(issues, key=lambda issue: issue.column if issue.column else 0)
+    def _validate_term(self, term: str, part: DrsPart) -> bool:
+        if part.source_collection_term is None:
+            matching_terms = projects.valid_term_in_collection(term, self.project_id, part.source_collection)
+            if len(matching_terms) > 0:
+                return True
+            else:
+                return False
+        else:
+            return projects.valid_term(
+                term, self.project_id, part.source_collection, part.source_collection_term
+            ).validated
+    def _create_report(
+        self,
+        type: DrsType,
+        drs_expression: str,
+        errors: list[DrsIssue],
+        warnings: list[DrsIssue],
+        mapping_used: dict[str, str] | None = None,
+    ) -> DrsValidationReport:
+        if mapping_used is None:
+            mapping_used = {}
+        return DrsValidationReport(
+            project_id=self.project_id,
+            type=type,
+            expression=drs_expression,
+            mapping_used=mapping_used,
+            errors=cast(list[ValidationError], errors),
+            warnings=cast(list[ValidationWarning], warnings),
+        )
+    def _validate(self, drs_expression: str, specs: DrsSpecification) -> DrsValidationReport:
+        terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
+        if not terms:
+            # Early exit.
+            return self._create_report(specs.type, drs_expression, errors, warnings)
+        term_index = 0
+        term_max_index = len(terms)
+        part_index = 0
+        part_max_index = len(specs.parts)
+        matching_code_mapping = dict()
+        mapping_used: dict[str, str] = dict()
+        while part_index < part_max_index:
+            term = terms[term_index]
+            part: DrsPart = specs.parts[part_index]
+            if self._validate_term(term, part):
+                term_index += 1
+                part_index += 1
+                matching_code_mapping[part.__str__()] = 0
+                mapping_used[part.source_collection] = term
+            elif part.is_required:
+                issue: ComplianceIssue = InvalidTerm(
+                    term=term, term_position=term_index + 1, collection_id_or_constant_value=str(part)
+                )
+                errors.append(issue)
+                matching_code_mapping[part.__str__()] = 1
+                term_index += 1
+                part_index += 1
+            else:  # The part is not required so try to match the term with the next part.
+                part_index += 1
+                matching_code_mapping[part.__str__()] = -1
+            if term_index == term_max_index:
+                break
+        # Cases:
+        # - All terms and collections have been processed.
+        # - Not enough term to process all collections.
+        # - Extra terms left whereas all collections have been processed:
+        #   + The last collections are required => report extra terms.
+        #   + The last collections are not required and these terms were not validated by them.
+        #     => Should report error even if the collections are not required.
+        if part_index < part_max_index:  # Missing terms.
+            for index in range(part_index, part_max_index):
+                part = specs.parts[index]
+                issue = MissingTerm(collection_id=str(part), collection_position=index + 1)
+                if part.is_required:
+                    errors.append(issue)
+                else:
+                    warnings.append(issue)
+        elif term_index < term_max_index:  # Extra terms.
+            part_index -= term_max_index - term_index
+            for index in range(term_index, term_max_index):
+                term = terms[index]
+                part = specs.parts[part_index]
+                if (not part.is_required) and matching_code_mapping[part.__str__()] < 0:  # noqa E125
+                    issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
+                else:
+                    issue = ExtraTerm(term=term, term_position=index, collection_id=None)
+                errors.append(issue)
+                part_index += 1
+        return self._create_report(specs.type, drs_expression, errors, warnings, mapping_used)

esgvoc/apps/ga/__init__.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""GA application for global attributes validation for netCDF files.
+This package provides tools for validating NetCDF global attributes against
+project specifications (like CMIP6, CMIP7) using controlled vocabularies
+from the esgvoc API.
+Key Features:
+- YAML-based configuration for attribute specifications
+- Integration with esgvoc controlled vocabularies
+- NetCDF header parsing from ncdump output
+- Comprehensive validation reporting
+- Support for different project specifications
+Example Usage:
+```python
+from esgvoc.apps.ga import GAValidator, validate_netcdf_attributes
+# Quick validation from ncdump output
+report = validate_netcdf_attributes(
+    ncdump_output=ncdump_text,
+    project_id="cmip6",
+    filename="my_file.nc"
+)
+print(f"Validation result: {'PASS' if report.is_valid else 'FAIL'}")
+print(f"Errors: {report.error_count}, Warnings: {report.warning_count}")
+# Or use the full validator class
+validator = GAValidator(project_id="cmip6")
+report = validator.validate_from_ncdump(ncdump_text)
+# Get detailed validation summary
+from esgvoc.apps.ga import create_validation_summary
+print(create_validation_summary(report))
+```
+Advanced Usage:
+```python
+from esgvoc.apps.ga.models import NetCDFHeader, NetCDFHeaderParser
+# Parse NetCDF header from ncdump output
+ncdump_output = '''
+netcdf test_file {
+// global attributes:
+        :Conventions = "CF-1.7 CMIP-6.2" ;
+        :activity_id = "CMIP" ;
+        :experiment_id = "historical" ;
+}
+'''
+header = NetCDFHeaderParser.parse_from_ncdump(ncdump_output)
+print(f"File: {header.filename}")
+print(f"Attributes: {header.global_attributes.list_attributes()}")
+```
+"""
+# Main GA validator interface
+from .validator import (
+    GAValidator,
+    GAValidatorFactory,
+    validate_netcdf_attributes,
+    create_validation_summary
+)
+# Core models
+from .models import (
+    # Models for advanced usage
+    NetCDFHeader,
+    NetCDFHeaderParser,
+    ValidationReport,
+    ValidationSeverity,
+    ValidationIssue,
+    # Validator models
+    ESGVocAttributeValidator,
+    ValidatorFactory,
+    # Import AttributeProperty from project_specs
+    AttributeProperty,
+    AttributeSpecification,
+)
+__all__ = [
+    # Main interface
+    "GAValidator",
+    "GAValidatorFactory",
+    "validate_netcdf_attributes",
+    "create_validation_summary",
+    # Models
+    "NetCDFHeader",
+    "NetCDFHeaderParser",
+    "ValidationReport",
+    "ValidationSeverity",
+    "ValidationIssue",
+    # Attribute specifications from project_specs
+    "AttributeProperty",
+    "AttributeSpecification",
+    # Validators
+    "ESGVocAttributeValidator",
+    "ValidatorFactory",
+]