PyPI - ckanapi-harvesters - Versions diffs - 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

ckanapi-harvesters 0.0.0py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

ckanapi_harvesters/__init__.py +32 -10
ckanapi_harvesters/auxiliary/__init__.py +26 -0
ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
ckanapi_harvesters/auxiliary/deprecated.py +82 -0
ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
ckanapi_harvesters/auxiliary/list_records.py +60 -0
ckanapi_harvesters/auxiliary/login.py +163 -0
ckanapi_harvesters/auxiliary/path.py +208 -0
ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
ckanapi_harvesters/auxiliary/urls.py +40 -0
ckanapi_harvesters/builder/__init__.py +40 -0
ckanapi_harvesters/builder/builder_aux.py +20 -0
ckanapi_harvesters/builder/builder_ckan.py +238 -0
ckanapi_harvesters/builder/builder_errors.py +36 -0
ckanapi_harvesters/builder/builder_field.py +122 -0
ckanapi_harvesters/builder/builder_package.py +9 -0
ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
ckanapi_harvesters/builder/builder_resource.py +589 -0
ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
ckanapi_harvesters/builder/builder_resource_init.py +126 -0
ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
ckanapi_harvesters/builder/example/__init__.py +21 -0
ckanapi_harvesters/builder/example/builder_example.py +21 -0
ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
ckanapi_harvesters/builder/mapper_datastore.py +93 -0
ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
ckanapi_harvesters/builder/specific/__init__.py +11 -0
ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
ckanapi_harvesters/ckan_api/__init__.py +20 -0
ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
ckanapi_harvesters/harvesters/__init__.py +23 -0
ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
ckanapi_harvesters/harvesters/harvester_init.py +30 -0
ckanapi_harvesters/harvesters/harvester_model.py +49 -0
ckanapi_harvesters/harvesters/harvester_params.py +323 -0
ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
ckanapi_harvesters/harvesters/postgre_params.py +86 -0
ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
ckanapi_harvesters/policies/__init__.py +20 -0
ckanapi_harvesters/policies/data_format_policy.py +269 -0
ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
ckanapi_harvesters/reports/__init__.py +11 -0
ckanapi_harvesters/reports/admin_report.py +292 -0
{ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
ckanapi_harvesters/divider/__init__.py +0 -27
ckanapi_harvesters/divider/divider.py +0 -53
ckanapi_harvesters/divider/divider_error.py +0 -59
ckanapi_harvesters/main.py +0 -30
ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
{ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
{ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0

ckanapi_harvesters/policies/data_format_policy.py ADDED Viewed

@@ -0,0 +1,269 @@
+#!python3
+# -*- coding: utf-8 -*-
+"""
+Data format policy representation and enforcing
+"""
+from typing import List, Set, Union, Tuple, Dict
+from warnings import warn
+import json
+import os
+import copy
+import requests
+from requests.auth import AuthBase
+from ckanapi_harvesters.auxiliary.ckan_configuration import allow_policy_from_url, download_external_resource_urls
+from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
+from ckanapi_harvesters.auxiliary.urls import is_valid_url
+from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
+from ckanapi_harvesters.auxiliary.ckan_errors import ExternalUrlLockedError
+from ckanapi_harvesters.policies import POLICY_FILE_FORMAT_VERSION
+from ckanapi_harvesters.policies.data_format_policy_errors import (DataPolicyError, UnsupportedPolicyVersionError,
+                                                                   _policy_msg, ErrorCount, ErrorLevel, UrlPolicyLockedError)
+from ckanapi_harvesters.policies.data_format_policy_defs import StringMatchMode, newline_char
+from ckanapi_harvesters.policies.data_format_policy_defs import ListChoiceMode, StringValueSpecification
+from ckanapi_harvesters.policies.data_format_policy_abc import DataPolicyABC
+from ckanapi_harvesters.policies.data_format_policy_lists import ValueListPolicy, GroupedValueListPolicy, SingleValueListPolicy
+from ckanapi_harvesters.policies.data_format_policy_tag_groups import TagListPolicy, TagGroupsListPolicy
+from ckanapi_harvesters.policies.data_format_policy_custom_fields import CustomFieldSpecification, CustomFieldsPolicy
+from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanConfigurableObjectABC
+class CkanPackageDataFormatPolicy(DataPolicyABC):
+    """
+    Main class to define data format policy for package metadata
+    """
+    default_to_json_reduced_size:bool = False
+    def __init__(self, label:str=None, description:str=None,
+                 package_tags:TagGroupsListPolicy=None, package_custom_fields:CustomFieldsPolicy=None,
+                 package_mandatory_attributes:Set[str]=None, resource_mandatory_attributes:Set[str]=None,
+                 datastore_fields_mandatory_attributes:Set[str]=None, resource_format:SingleValueListPolicy=None):
+        super().__init__()
+        if label is None:
+            label = "Policy"
+        if description is None:
+            description = ""
+        if isinstance(package_mandatory_attributes, str):
+            package_mandatory_attributes = set(ckan_tags_sep.split(package_mandatory_attributes))
+        if isinstance(resource_mandatory_attributes, str):
+            resource_mandatory_attributes = set(ckan_tags_sep.split(resource_mandatory_attributes))
+        if isinstance(datastore_fields_mandatory_attributes, str):
+            datastore_fields_mandatory_attributes = set(ckan_tags_sep.split(datastore_fields_mandatory_attributes))
+        self.label: str = label
+        self.description: str = description
+        self.package_tags:TagGroupsListPolicy = package_tags
+        self.package_custom_fields: CustomFieldsPolicy = package_custom_fields
+        self.package_mandatory_attributes:Set[str] = package_mandatory_attributes
+        self.resource_mandatory_attributes:Set[str] = resource_mandatory_attributes
+        self.datastore_fields_mandatory_attributes:Set[str] = datastore_fields_mandatory_attributes
+        self.resource_format:SingleValueListPolicy = resource_format
+        self.file_format_version:Union[str,None] = None
+        self.source_file: Union[str,None] = None
+    def __copy__(self):
+        return self.copy()
+    def copy(self) -> "CkanPackageDataFormatPolicy":
+        dest = CkanPackageDataFormatPolicy()
+        dest.label = self.label
+        dest.description = self.description
+        dest.package_tags = copy.deepcopy(self.package_tags)
+        dest.package_custom_fields = copy.deepcopy(self.package_custom_fields)
+        dest.package_mandatory_attributes = copy.deepcopy(self.package_mandatory_attributes)
+        dest.resource_mandatory_attributes = copy.deepcopy(self.resource_mandatory_attributes)
+        dest.datastore_fields_mandatory_attributes = copy.deepcopy(self.datastore_fields_mandatory_attributes)
+        dest.resource_format = copy.deepcopy(self.resource_format)
+        dest.file_format_version = self.file_format_version
+        dest.source_file = self.source_file
+        dest.error_level = self.error_level
+        return dest
+    def to_dict(self, *, sets_as_lists:bool=True) -> dict:
+        d = {"info": {"file_format_version": POLICY_FILE_FORMAT_VERSION,
+                      "label": self.label,
+                      "description": self.description,
+                      },}
+        if self.package_tags is not None:
+            d["package_tags_policy"] = self.package_tags.to_dict()
+        if self.package_custom_fields is not None:
+            d["package_custom_fields_policy"] = self.package_custom_fields.to_dict()
+        if self.package_mandatory_attributes is not None:
+            set_object = self.package_mandatory_attributes
+            if sets_as_lists:
+                set_object = sorted(list(set_object))
+            d["package_mandatory_attributes"] = set_object
+        if self.resource_mandatory_attributes is not None:
+            set_object = self.resource_mandatory_attributes
+            if sets_as_lists:
+                set_object = sorted(list(set_object))
+            d["resource_mandatory_attributes"] = set_object
+        if self.datastore_fields_mandatory_attributes is not None:
+            set_object = self.datastore_fields_mandatory_attributes
+            if sets_as_lists:
+                set_object = sorted(list(set_object))
+            d["datastore_fields_mandatory_attributes"] = set_object
+        if self.resource_format is not None:
+            d["resource_format_policy"] = self.resource_format.to_dict()
+        d.update(super().to_dict())
+        return {"ckan_package_policy": d}
+    @staticmethod
+    def from_dict(d:dict) -> "CkanPackageDataFormatPolicy":
+        obj = CkanPackageDataFormatPolicy()
+        obj._load_from_dict(d)
+        return obj
+    def _load_from_dict(self, d:dict):
+        d = d["ckan_package_policy"]
+        super()._load_from_dict(d)
+        self.file_format_version = d["info"]["file_format_version"]
+        if not self.file_format_version == POLICY_FILE_FORMAT_VERSION:
+            raise UnsupportedPolicyVersionError(self.file_format_version)
+        self.label = d["info"]["label"]
+        self.description = d["info"]["description"]
+        # for package tags management, see also tags and vocabularies in the CKAN API documentation. Here, tags groups are the equivalent of tag vocabularies.
+        self.package_tags = TagGroupsListPolicy.from_dict(d["package_tags_policy"]) if "package_tags_policy" in d.keys() else None
+        self.package_custom_fields = CustomFieldsPolicy.from_dict(d["package_custom_fields_policy"]) if "package_custom_fields_policy" in d.keys() else None
+        self.package_mandatory_attributes = set(d["package_mandatory_attributes"]) if "package_mandatory_attributes" in d.keys() else None
+        self.resource_mandatory_attributes = set(d["resource_mandatory_attributes"]) if "resource_mandatory_attributes" in d.keys() else None
+        self.datastore_fields_mandatory_attributes = set(d["datastore_fields_mandatory_attributes"]) if "datastore_fields_mandatory_attributes" in d.keys() else None
+        self.resource_format = SingleValueListPolicy.from_dict(d["resource_format_policy"]) if "resource_format_policy" in d.keys() else None
+    def to_json(self, json_file:str, reduced_size:bool=None) -> None:
+        if reduced_size is None:
+            reduced_size = self.default_to_json_reduced_size
+        policy_dict = self.to_dict()
+        with open(json_file, "w", encoding="utf-8") as json_file:
+            if reduced_size:
+                json.dump(policy_dict, json_file, ensure_ascii=False)
+            else:
+                json.dump(policy_dict, json_file, ensure_ascii=False, indent=4)
+    def to_jsons(self, reduced_size:bool=None) -> str:
+        if reduced_size is None:
+            reduced_size = self.default_to_json_reduced_size
+        policy_dict = self.to_dict()
+        if reduced_size:
+            # do not include spaces and line endings (not human-readable format)
+            return json.dumps(policy_dict, ensure_ascii=False)
+        else:
+            return json.dumps(policy_dict, indent=4, ensure_ascii=False)
+    @staticmethod
+    def from_jsons(stream:str, *,
+                   source_file:str=None, load_error:bool=True) -> Union["CkanPackageDataFormatPolicy", None]:
+        try:
+            policy_dict = json.loads(stream)
+            obj = CkanPackageDataFormatPolicy.from_dict(policy_dict)
+        except Exception as e:
+            if load_error:
+                raise e from e
+            else:
+                msg = f"Could not load policy (JSON error): {str(e)}"
+                warn(msg)
+                return None
+        obj.source_file = source_file
+        return obj
+    @staticmethod
+    def from_json(policy_file:str, *, base_dir:str=None,
+                  headers:dict=None, proxies:dict=None, auth:Union[AuthBase, Tuple[str,str]]=None, verify:Union[bool,str,None]=None,
+                  error_not_found:bool=True) -> Union["CkanPackageDataFormatPolicy",None]:
+        policy_dict = None
+        if is_valid_url(policy_file):
+            if not allow_policy_from_url:
+                raise UrlPolicyLockedError(policy_file)
+            # if (not download_external_resource_urls) and (not ckan.is_url_internal(policy_file)):  # ckan: unknown
+            #     raise ExternalUrlLockedError(policy_file)
+            response = requests.get(policy_file, headers=headers, proxies=proxies, auth=auth, verify=verify)
+            if response.status_code != 200 and error_not_found:
+                raise FileNotFoundError(policy_file)
+            policy_dict = json.loads(response.content.decode())
+        else:
+            policy_file = path_rel_to_dir(policy_file, base_dir)
+            if not os.path.isfile(policy_file) and not error_not_found:
+                return None
+            with open(policy_file, "r") as f:
+                policy_dict = json.load(f)
+        obj = CkanPackageDataFormatPolicy.from_dict(policy_dict)
+        obj.source_file = policy_file
+        return obj
+    def _enforce_attributes_list(self, value:CkanConfigurableObjectABC, spec:Set[str], *, context:str, verbose: bool, buffer:List[DataPolicyError]):
+        extra_spec = spec - value.configurable_attributes
+        if len(extra_spec) > 0:
+            raise KeyError("These attributes do not exist for " + value.get_resource_type() + ": " + ",".join(extra_spec) + ". Allowed attributes: " + str(value.configurable_attributes))
+        current_attributes = {name for name in value.configurable_attributes if getattr(value, name) is not None}
+        missing_attributes = set(spec) - current_attributes
+        if missing_attributes:
+            msg = DataPolicyError(context, self.error_level, f"Mandatory attributes were not found: {', '.join(missing_attributes)}")
+            _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
+            return False
+        else:
+            return True
+    def enforce(self, values: CkanPackageInfo, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
+        package_info = values
+        success = True
+        if context is None:
+            context = "Package " + package_info.name
+        if self.package_tags is not None:
+            success &= self.package_tags.enforce(package_info.tags, context=context + " / package tags", verbose=verbose, buffer=buffer)
+        if self.package_custom_fields is not None:
+            success &= self.package_custom_fields.enforce(package_info.custom_fields, context=context, verbose=verbose, buffer=buffer)
+        if self.package_mandatory_attributes is not None:
+            success &= self._enforce_attributes_list(package_info, self.package_mandatory_attributes, context=context, verbose=verbose, buffer=buffer)
+        for resource_info in package_info.package_resources.values():
+            resource_context = context + " / resource " + resource_info.name
+            if self.resource_format is not None:
+                resource_format_context = resource_context + " / resource format"
+                success &= self.resource_format.enforce(resource_info.format, context=resource_format_context, verbose=verbose, buffer=buffer)
+            if self.resource_mandatory_attributes is not None:
+                success &= self._enforce_attributes_list(resource_info, self.resource_mandatory_attributes, context=resource_context, verbose=verbose, buffer=buffer)
+            if self.datastore_fields_mandatory_attributes is not None and resource_info.datastore_info is not None:
+                for field_info in resource_info.datastore_info.fields:
+                    field_context = resource_context + " / field " + field_info.name
+                    success &= self._enforce_attributes_list(field_info, self.datastore_fields_mandatory_attributes, context=field_context, verbose=verbose, buffer=buffer)
+        return success
+    def policy_check_package(self, package_info: CkanPackageInfo, *, package_buffer:List[DataPolicyError]=None,
+                             display_message:bool=True, raise_error:bool=False) -> bool:
+        """
+        Main entry-point to check the policy rules against the package.
+        :param package_info: package and resources metadata
+        :param package_buffer: you can specify a list object to indirectly obtain the detailed list of error messages.
+        The keys of this dictionary are the package names.
+        :param display_message: option to display the messages in the command line
+        :param raise_error: option to raise an exception if any rule with a high error level is encountered
+        :return: True if no error was encountered
+        """
+        if package_buffer is None:
+            package_buffer: List[DataPolicyError] = []
+        context = "Package " + package_info.name
+        success = self.enforce(package_info, context=context, verbose=True, buffer=package_buffer)
+        error_count = ErrorCount(package_buffer)
+        # consistency check
+        if success:
+            assert(error_count.total == 0)
+        else:
+            assert(error_count.total > 0)
+        # command-line output
+        if display_message:
+            if success:
+                print("Package '" + package_info.name + "' passed all tests")
+            else:
+                print("Package '" + package_info.name + "': " + error_count.error_count_message() + ":")
+                print('\n'.join([error_message.message for error_message in package_buffer]))
+        # raise error after all this
+        if raise_error and error_count.error > 0:
+            raise DataPolicyError(context, ErrorLevel.Error, error_count.error_count_message())
+        return success

ckanapi_harvesters/policies/data_format_policy_abc.py ADDED Viewed

@@ -0,0 +1,97 @@
+#!python3
+# -*- coding: utf-8 -*-
+"""
+Data format policy representation and enforcing
+"""
+from typing import List, Any, Iterable, Union, Dict, Set
+from abc import ABC, abstractmethod
+import re
+import fnmatch
+from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
+from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError, ErrorLevel, _policy_msg
+from ckanapi_harvesters.policies.data_format_policy_defs import StringMatchMode, ListChoiceMode, newline_char
+from ckanapi_harvesters.policies.data_format_policy_defs import StringValueSpecification
+class DataPolicyABC(ABC):
+    def __init__(self, error_level:ErrorLevel=ErrorLevel.Information):
+        self.error_level: ErrorLevel = error_level
+    @abstractmethod
+    def to_dict(self) -> dict:
+        return {"error_level": self.error_level.name}
+    @staticmethod
+    @abstractmethod
+    def from_dict(d:dict):
+        raise NotImplementedError()
+    @abstractmethod
+    def _load_from_dict(self, d:dict):
+        self.error_level = ErrorLevel.from_str(d["error_level"])
+    @abstractmethod
+    def enforce(self, values: Any, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
+        raise NotImplementedError()
+class DataPolicyElementABC(DataPolicyABC, ABC):
+    def __init__(self, mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
+        super().__init__(error_level=error_level)
+        self.mandatory:bool = mandatory
+        self.match_mode: StringMatchMode = StringMatchMode.Match
+    @abstractmethod
+    def to_dict(self) -> dict:
+        d = {"mandatory": self.mandatory, "match_mode": self.match_mode.name}
+        d.update(super().to_dict())
+        return d
+    @staticmethod
+    @abstractmethod
+    def from_dict(d:dict):
+        raise NotImplementedError()
+    @abstractmethod
+    def _load_from_dict(self, d:dict) -> None:
+        super()._load_from_dict(d)
+        self.mandatory = d["mandatory"]
+        self.match_mode = StringMatchMode.from_str(d["match_mode"])
+    def _enforce_unit_string(self, values: Union[str, List[str]], spec: Union[str, Iterable[str]], *, context:str, verbose:bool,
+                             buffer:List[DataPolicyError], add_buffer:bool=True) -> bool:
+        if values is None or len(values) == 0:
+            return not self.match_mode == StringMatchMode.NotEmpty
+        if isinstance(values, str):
+            values = [values]
+        success = True
+        for value in values:
+            success_value = True
+            if isinstance(spec, str):
+                spec = [spec]
+            if value is None:
+                if self.mandatory:
+                    success_value = False
+            elif self.match_mode == StringMatchMode.Match:
+                success_value = value.lower() in {unit_spec.lower() for unit_spec in spec}
+            elif self.match_mode == StringMatchMode.MatchCaseSensitive:
+                success_value = value in spec
+            elif self.match_mode == StringMatchMode.Regex:
+                # TODO: test
+                success_value = any([re.match(unit_spec,value,flags=re.IGNORECASE) is not None for unit_spec in spec])
+            elif self.match_mode == StringMatchMode.RegexCaseSensitive:
+                # TODO: test
+                success_value = any([re.match(unit_spec,value) is not None for unit_spec in spec])
+            elif self.match_mode == StringMatchMode.Wildcard:
+                success_value = any([fnmatch.fnmatch(value, unit_spec) is not None for unit_spec in spec])
+            elif self.match_mode == StringMatchMode.WildcardCaseSensitive:
+                success_value = any([fnmatch.fnmatchcase(value, unit_spec) is not None for unit_spec in spec])
+            if add_buffer and not success_value:
+                msg = DataPolicyError(context, self.error_level, f"Value does not match spec '{spec}' ({self.match_mode}): {value}")
+                _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
+            success &= success_value
+        return success

ckanapi_harvesters/policies/data_format_policy_custom_fields.py ADDED Viewed

@@ -0,0 +1,156 @@
+#!python3
+# -*- coding: utf-8 -*-
+"""
+Data format policy representation and enforcing
+"""
+from typing import List, Any, Iterable, Union, Dict, Set
+from abc import ABC, abstractmethod
+from warnings import warn
+import re
+import datetime
+import pandas as pd
+from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element, _bool_from_string
+from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
+from ckanapi_harvesters.policies.data_format_policy_defs import DataType
+from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError, ErrorLevel, _policy_msg
+from ckanapi_harvesters.policies.data_format_policy_defs import newline_char, StringMatchMode, ListChoiceMode
+from ckanapi_harvesters.policies.data_format_policy_defs import StringValueSpecification
+from ckanapi_harvesters.policies.data_format_policy_abc import DataPolicyElementABC
+class CustomFieldSpecification(DataPolicyElementABC):
+    def __init__(self, key: str=None, values: List[str]=None, data_type:DataType=None,
+                 match_mode:StringMatchMode=StringMatchMode.Any,
+                 help:str=None,
+                 mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
+        super().__init__(mandatory=mandatory, error_level=error_level)
+        self.key: str = key
+        self.values: List[str] = values
+        self.data_type:DataType = data_type
+        self.match_mode: StringMatchMode = match_mode
+        self.help: str = help
+    def to_dict(self) -> dict:
+        d = {"key": self.key,
+             "values": self.values,
+             "data_type": self.data_type.name if self.data_type is not None else "",
+             "match_mode": self.match_mode.name}
+        d.update(super().to_dict())
+        return d
+    @staticmethod
+    def from_dict(d:dict) -> "CustomFieldSpecification":
+        obj = CustomFieldSpecification()
+        obj._load_from_dict(d)
+        return obj
+    def _load_from_dict(self, d:dict):
+        super()._load_from_dict(d)
+        self.key = d["key"]
+        self.values = d["values"] if "values" in d.keys() else None
+        self.data_type = DataType.from_str(d["data_type"]) if "data_type" in d.keys() and not d["data_type"] == "" else None
+        self.match_mode = StringMatchMode.from_str(d["match_mode"]) if "match_mode" in d.keys() else None
+    @staticmethod
+    def from_df_row(row: pd.Series) -> "CustomFieldSpecification":
+        key = _string_from_element(row["key"]).strip()
+        values_str = _string_from_element(row["values"])
+        values = values_str.split(ckan_tags_sep)
+        mode_str = _string_from_element(row["mode"])
+        mode = StringMatchMode.from_str(mode_str) if mode_str is not None else StringMatchMode.Any
+        help:Union[str,None] = None
+        if "help" in row.keys():
+            help = _string_from_element(row["help"])
+        return CustomFieldSpecification(key=key, values=values, match_mode=mode, help=help)
+    def enforce(self, values: str, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
+        key_context = context + " / custom key " + self.key
+        value = values
+        specs = self.values
+        if specs is None:
+            return not self.match_mode == StringMatchMode.NotEmpty
+        if self.data_type is None or self.data_type == DataType.Text:
+            success = self._enforce_unit_string(value, specs, context=key_context, verbose=verbose, buffer=buffer)
+        elif self.data_type == DataType.Bool:
+            self.match_mode = StringMatchMode.Match
+            success = self._enforce_unit_string(value, {"True", "False"}, context=key_context, verbose=verbose, buffer=buffer)
+        elif self.data_type == DataType.TimeStamp:
+            if value is not None and len(value) > 0:
+                try:
+                    timestamp = datetime.datetime.fromisoformat(value)
+                except Exception as e:
+                    return False
+                success = True
+            else:
+                success = True
+        elif self.data_type == DataType.Numeric:
+            self.match_mode = StringMatchMode.Regex
+            success = self._enforce_unit_string(value, "/d+", context=key_context, verbose=verbose, buffer=buffer)
+        else:
+            raise NotImplementedError("Unsupported data type: " + str(self.data_type))
+        return success
+class CustomFieldsPolicy(DataPolicyElementABC):
+    def __init__(self, custom_fields_spec:List[CustomFieldSpecification]=None,
+                 restrict_to_list:ErrorLevel=ErrorLevel.Information, keys_case_sensitive:bool=True,
+                 mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
+        super().__init__(mandatory=mandatory, error_level=error_level)
+        if custom_fields_spec is None:
+            custom_fields_spec = []
+        self.restrict_to_list: ErrorLevel = restrict_to_list
+        self.keys_case_sensitive:bool = keys_case_sensitive
+        self.custom_fields_spec:Dict[str,CustomFieldSpecification] = {}
+        if keys_case_sensitive:
+            self.custom_fields_spec = {keypair_spec.key: keypair_spec for keypair_spec in custom_fields_spec}
+        else:
+            self.custom_fields_spec = {keypair_spec.key.lower(): keypair_spec for keypair_spec in custom_fields_spec}
+    def to_dict(self) -> dict:
+        d = {"custom_fields": [spec.to_dict() for spec in self.custom_fields_spec.values()],
+             "keys_case_sensitive": self.keys_case_sensitive, "restrict_to_list": self.restrict_to_list.name}
+        d.update(super().to_dict())
+        return d
+    @staticmethod
+    def from_dict(d:dict) -> "CustomFieldsPolicy":
+        obj = CustomFieldsPolicy()
+        obj._load_from_dict(d)
+        return obj
+    def _load_from_dict(self, d:dict):
+        super()._load_from_dict(d)
+        self.custom_fields_spec = {spec["key"]: CustomFieldSpecification.from_dict(spec) for spec in d["custom_fields"]}
+        self.keys_case_sensitive = _bool_from_string(d["keys_case_sensitive"]) if "keys_case_sensitive" in d.keys() else None
+        self.restrict_to_list = ErrorLevel.from_str(d["restrict_to_list"]) if "restrict_to_list" in d.keys() else None
+    def enforce(self, values: Dict[str, str], *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
+        success = True
+        if self.keys_case_sensitive:
+            keys = set(values.keys())
+        else:
+            keys = {key.lower() for key in values.keys()}
+        extra_keys = keys - set(self.custom_fields_spec.keys())
+        if len(extra_keys) > 0:
+            msg = DataPolicyError(context, self.restrict_to_list, f"Custom keys do not make part of the defined list: {','.join(extra_keys)}")
+            _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
+            success = False
+        mandatory_keys = {key for key, keypair in self.custom_fields_spec.items() if keypair.mandatory}
+        missing_keys = mandatory_keys - keys
+        if len(missing_keys) > 0:
+            msg = DataPolicyError(context, self.restrict_to_list, f"Mandatory custom keys were not found: {', '.join(missing_keys)}")
+            _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
+            success = False
+        for key, value in values.items():
+            key_context = context + " / custom key " + key
+            if not self.keys_case_sensitive:
+                key = key.lower()
+            spec = self.custom_fields_spec[key] if key in self.custom_fields_spec.keys() else None
+            if spec is not None:
+                success_value = spec.enforce(value, context=context, verbose=verbose, buffer=buffer)
+                success &= success_value
+        return success

ckanapi_harvesters/policies/data_format_policy_defs.py ADDED Viewed

@@ -0,0 +1,135 @@
+#!python3
+# -*- coding: utf-8 -*-
+"""
+Data format policy representation and enforcing
+"""
+from typing import List, Dict, Tuple, Union
+from enum import IntEnum
+import pandas as pd
+from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
+from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
+newline_char = '\n'
+class ListChoiceMode(IntEnum):
+    Any = 0
+    MaxOne = 1
+    MandatoryOne = 2
+    MandatoryMulti = 3
+    NoExtra = 4
+    def __str__(self):
+        return self.name.lower()
+    @staticmethod
+    def from_str(s):
+        s = s.lower().strip()
+        if s == "any":
+            return ListChoiceMode.Any
+        elif s == "maxone":
+            return ListChoiceMode.MaxOne
+        elif s == "mandatoryone":
+            return ListChoiceMode.MandatoryOne
+        elif s == "mandatorymulti":
+            return ListChoiceMode.MandatoryMulti
+        elif s == "noextra":
+            return ListChoiceMode.NoExtra
+        else:
+            raise ValueError(s)
+class StringMatchMode(IntEnum):
+    Any = 0
+    NotEmpty = 1
+    Match = 2
+    MatchCaseSensitive = 3
+    Regex = 4
+    RegexCaseSensitive = 5
+    Wildcard = 6
+    WildcardCaseSensitive = 7
+    def __str__(self):
+        return self.name.lower()
+    @staticmethod
+    def from_str(s):
+        s = s.lower().strip()
+        if s == "any":
+            return StringMatchMode.Any
+        elif s == "notempty":
+            return StringMatchMode.NotEmpty
+        elif s == "match":
+            return StringMatchMode.Match
+        elif s == "matchcasesensitive":
+            return StringMatchMode.MatchCaseSensitive
+        elif s == "regex":
+            return StringMatchMode.Regex
+        elif s == "regexcasesensitive":
+            return StringMatchMode.RegexCaseSensitive
+        elif s == "wildcard":
+            return StringMatchMode.Wildcard
+        elif s == "wildcardcasesensitive":
+            return StringMatchMode.WildcardCaseSensitive
+        else:
+            return StringMatchMode.Any  # default value
+class DataType(IntEnum):
+    Text = 1
+    Numeric = 2
+    TimeStamp = 3
+    Bool = 4
+    def __str__(self):
+        return self.name.lower()
+    @staticmethod
+    def from_str(s):
+        s = s.lower().strip()
+        if s == "text":
+            return DataType.Text
+        elif s == "numeric":
+            return DataType.Numeric
+        elif s == "timestamp":
+            return DataType.TimeStamp
+        elif s == "bool":
+            return DataType.Bool
+        else:
+            raise ValueError(s)
+class StringValueSpecification:
+    def __init__(self, value:str, help:str=None):
+        if help is None:
+            help = ""
+        self.value: str = value
+        self.help: str = help
+    def to_tuple(self) -> Tuple[str,str]:
+        return self.value, self.help
+    @staticmethod
+    def from_tuple(values: Union[str, Tuple[str,str]]) -> "StringValueSpecification":
+        if isinstance(values, str):
+            value, help = values, ""
+        elif len(values) == 1:
+            value, help = values[0], ""
+        else:
+            value, help = values
+        return StringValueSpecification(value, help)
+    def to_dict(self) -> dict:
+        return {"value": self.value, "help": self.help}
+    @staticmethod
+    def from_dict(values: dict) -> "StringValueSpecification":
+        value = values["value"]
+        help = values["help"] if "help" in values.keys() else ""
+        return StringValueSpecification(value, help)

ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

ckanapi-harvesters 0.0.0py3-none-any.whl → 0.0.3py3-none-any.whl