ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,269 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data format policy representation and enforcing
5
+ """
6
+ from typing import List, Set, Union, Tuple, Dict
7
+ from warnings import warn
8
+ import json
9
+ import os
10
+ import copy
11
+
12
+ import requests
13
+ from requests.auth import AuthBase
14
+
15
+ from ckanapi_harvesters.auxiliary.ckan_configuration import allow_policy_from_url, download_external_resource_urls
16
+ from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
17
+ from ckanapi_harvesters.auxiliary.urls import is_valid_url
18
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
19
+ from ckanapi_harvesters.auxiliary.ckan_errors import ExternalUrlLockedError
20
+ from ckanapi_harvesters.policies import POLICY_FILE_FORMAT_VERSION
21
+ from ckanapi_harvesters.policies.data_format_policy_errors import (DataPolicyError, UnsupportedPolicyVersionError,
22
+ _policy_msg, ErrorCount, ErrorLevel, UrlPolicyLockedError)
23
+ from ckanapi_harvesters.policies.data_format_policy_defs import StringMatchMode, newline_char
24
+ from ckanapi_harvesters.policies.data_format_policy_defs import ListChoiceMode, StringValueSpecification
25
+ from ckanapi_harvesters.policies.data_format_policy_abc import DataPolicyABC
26
+ from ckanapi_harvesters.policies.data_format_policy_lists import ValueListPolicy, GroupedValueListPolicy, SingleValueListPolicy
27
+ from ckanapi_harvesters.policies.data_format_policy_tag_groups import TagListPolicy, TagGroupsListPolicy
28
+ from ckanapi_harvesters.policies.data_format_policy_custom_fields import CustomFieldSpecification, CustomFieldsPolicy
29
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanConfigurableObjectABC
30
+
31
+
32
+
33
+ class CkanPackageDataFormatPolicy(DataPolicyABC):
34
+ """
35
+ Main class to define data format policy for package metadata
36
+ """
37
+ default_to_json_reduced_size:bool = False
38
+
39
+ def __init__(self, label:str=None, description:str=None,
40
+ package_tags:TagGroupsListPolicy=None, package_custom_fields:CustomFieldsPolicy=None,
41
+ package_mandatory_attributes:Set[str]=None, resource_mandatory_attributes:Set[str]=None,
42
+ datastore_fields_mandatory_attributes:Set[str]=None, resource_format:SingleValueListPolicy=None):
43
+ super().__init__()
44
+ if label is None:
45
+ label = "Policy"
46
+ if description is None:
47
+ description = ""
48
+ if isinstance(package_mandatory_attributes, str):
49
+ package_mandatory_attributes = set(ckan_tags_sep.split(package_mandatory_attributes))
50
+ if isinstance(resource_mandatory_attributes, str):
51
+ resource_mandatory_attributes = set(ckan_tags_sep.split(resource_mandatory_attributes))
52
+ if isinstance(datastore_fields_mandatory_attributes, str):
53
+ datastore_fields_mandatory_attributes = set(ckan_tags_sep.split(datastore_fields_mandatory_attributes))
54
+ self.label: str = label
55
+ self.description: str = description
56
+ self.package_tags:TagGroupsListPolicy = package_tags
57
+ self.package_custom_fields: CustomFieldsPolicy = package_custom_fields
58
+ self.package_mandatory_attributes:Set[str] = package_mandatory_attributes
59
+ self.resource_mandatory_attributes:Set[str] = resource_mandatory_attributes
60
+ self.datastore_fields_mandatory_attributes:Set[str] = datastore_fields_mandatory_attributes
61
+ self.resource_format:SingleValueListPolicy = resource_format
62
+ self.file_format_version:Union[str,None] = None
63
+ self.source_file: Union[str,None] = None
64
+
65
+ def __copy__(self):
66
+ return self.copy()
67
+
68
+ def copy(self) -> "CkanPackageDataFormatPolicy":
69
+ dest = CkanPackageDataFormatPolicy()
70
+ dest.label = self.label
71
+ dest.description = self.description
72
+ dest.package_tags = copy.deepcopy(self.package_tags)
73
+ dest.package_custom_fields = copy.deepcopy(self.package_custom_fields)
74
+ dest.package_mandatory_attributes = copy.deepcopy(self.package_mandatory_attributes)
75
+ dest.resource_mandatory_attributes = copy.deepcopy(self.resource_mandatory_attributes)
76
+ dest.datastore_fields_mandatory_attributes = copy.deepcopy(self.datastore_fields_mandatory_attributes)
77
+ dest.resource_format = copy.deepcopy(self.resource_format)
78
+ dest.file_format_version = self.file_format_version
79
+ dest.source_file = self.source_file
80
+ dest.error_level = self.error_level
81
+ return dest
82
+
83
+ def to_dict(self, *, sets_as_lists:bool=True) -> dict:
84
+ d = {"info": {"file_format_version": POLICY_FILE_FORMAT_VERSION,
85
+ "label": self.label,
86
+ "description": self.description,
87
+ },}
88
+ if self.package_tags is not None:
89
+ d["package_tags_policy"] = self.package_tags.to_dict()
90
+ if self.package_custom_fields is not None:
91
+ d["package_custom_fields_policy"] = self.package_custom_fields.to_dict()
92
+ if self.package_mandatory_attributes is not None:
93
+ set_object = self.package_mandatory_attributes
94
+ if sets_as_lists:
95
+ set_object = sorted(list(set_object))
96
+ d["package_mandatory_attributes"] = set_object
97
+ if self.resource_mandatory_attributes is not None:
98
+ set_object = self.resource_mandatory_attributes
99
+ if sets_as_lists:
100
+ set_object = sorted(list(set_object))
101
+ d["resource_mandatory_attributes"] = set_object
102
+ if self.datastore_fields_mandatory_attributes is not None:
103
+ set_object = self.datastore_fields_mandatory_attributes
104
+ if sets_as_lists:
105
+ set_object = sorted(list(set_object))
106
+ d["datastore_fields_mandatory_attributes"] = set_object
107
+ if self.resource_format is not None:
108
+ d["resource_format_policy"] = self.resource_format.to_dict()
109
+ d.update(super().to_dict())
110
+ return {"ckan_package_policy": d}
111
+
112
+ @staticmethod
113
+ def from_dict(d:dict) -> "CkanPackageDataFormatPolicy":
114
+ obj = CkanPackageDataFormatPolicy()
115
+ obj._load_from_dict(d)
116
+ return obj
117
+
118
+ def _load_from_dict(self, d:dict):
119
+ d = d["ckan_package_policy"]
120
+ super()._load_from_dict(d)
121
+ self.file_format_version = d["info"]["file_format_version"]
122
+ if not self.file_format_version == POLICY_FILE_FORMAT_VERSION:
123
+ raise UnsupportedPolicyVersionError(self.file_format_version)
124
+ self.label = d["info"]["label"]
125
+ self.description = d["info"]["description"]
126
+ # for package tags management, see also tags and vocabularies in the CKAN API documentation. Here, tags groups are the equivalent of tag vocabularies.
127
+ self.package_tags = TagGroupsListPolicy.from_dict(d["package_tags_policy"]) if "package_tags_policy" in d.keys() else None
128
+ self.package_custom_fields = CustomFieldsPolicy.from_dict(d["package_custom_fields_policy"]) if "package_custom_fields_policy" in d.keys() else None
129
+ self.package_mandatory_attributes = set(d["package_mandatory_attributes"]) if "package_mandatory_attributes" in d.keys() else None
130
+ self.resource_mandatory_attributes = set(d["resource_mandatory_attributes"]) if "resource_mandatory_attributes" in d.keys() else None
131
+ self.datastore_fields_mandatory_attributes = set(d["datastore_fields_mandatory_attributes"]) if "datastore_fields_mandatory_attributes" in d.keys() else None
132
+ self.resource_format = SingleValueListPolicy.from_dict(d["resource_format_policy"]) if "resource_format_policy" in d.keys() else None
133
+
134
+ def to_json(self, json_file:str, reduced_size:bool=None) -> None:
135
+ if reduced_size is None:
136
+ reduced_size = self.default_to_json_reduced_size
137
+ policy_dict = self.to_dict()
138
+ with open(json_file, "w", encoding="utf-8") as json_file:
139
+ if reduced_size:
140
+ json.dump(policy_dict, json_file, ensure_ascii=False)
141
+ else:
142
+ json.dump(policy_dict, json_file, ensure_ascii=False, indent=4)
143
+
144
+ def to_jsons(self, reduced_size:bool=None) -> str:
145
+ if reduced_size is None:
146
+ reduced_size = self.default_to_json_reduced_size
147
+ policy_dict = self.to_dict()
148
+ if reduced_size:
149
+ # do not include spaces and line endings (not human-readable format)
150
+ return json.dumps(policy_dict, ensure_ascii=False)
151
+ else:
152
+ return json.dumps(policy_dict, indent=4, ensure_ascii=False)
153
+
154
+ @staticmethod
155
+ def from_jsons(stream:str, *,
156
+ source_file:str=None, load_error:bool=True) -> Union["CkanPackageDataFormatPolicy", None]:
157
+ try:
158
+ policy_dict = json.loads(stream)
159
+ obj = CkanPackageDataFormatPolicy.from_dict(policy_dict)
160
+ except Exception as e:
161
+ if load_error:
162
+ raise e from e
163
+ else:
164
+ msg = f"Could not load policy (JSON error): {str(e)}"
165
+ warn(msg)
166
+ return None
167
+ obj.source_file = source_file
168
+ return obj
169
+
170
+ @staticmethod
171
+ def from_json(policy_file:str, *, base_dir:str=None,
172
+ headers:dict=None, proxies:dict=None, auth:Union[AuthBase, Tuple[str,str]]=None, verify:Union[bool,str,None]=None,
173
+ error_not_found:bool=True) -> Union["CkanPackageDataFormatPolicy",None]:
174
+ policy_dict = None
175
+ if is_valid_url(policy_file):
176
+ if not allow_policy_from_url:
177
+ raise UrlPolicyLockedError(policy_file)
178
+ # if (not download_external_resource_urls) and (not ckan.is_url_internal(policy_file)): # ckan: unknown
179
+ # raise ExternalUrlLockedError(policy_file)
180
+ response = requests.get(policy_file, headers=headers, proxies=proxies, auth=auth, verify=verify)
181
+ if response.status_code != 200 and error_not_found:
182
+ raise FileNotFoundError(policy_file)
183
+ policy_dict = json.loads(response.content.decode())
184
+ else:
185
+ policy_file = path_rel_to_dir(policy_file, base_dir)
186
+ if not os.path.isfile(policy_file) and not error_not_found:
187
+ return None
188
+ with open(policy_file, "r") as f:
189
+ policy_dict = json.load(f)
190
+ obj = CkanPackageDataFormatPolicy.from_dict(policy_dict)
191
+ obj.source_file = policy_file
192
+ return obj
193
+
194
+ def _enforce_attributes_list(self, value:CkanConfigurableObjectABC, spec:Set[str], *, context:str, verbose: bool, buffer:List[DataPolicyError]):
195
+ extra_spec = spec - value.configurable_attributes
196
+ if len(extra_spec) > 0:
197
+ raise KeyError("These attributes do not exist for " + value.get_resource_type() + ": " + ",".join(extra_spec) + ". Allowed attributes: " + str(value.configurable_attributes))
198
+ current_attributes = {name for name in value.configurable_attributes if getattr(value, name) is not None}
199
+ missing_attributes = set(spec) - current_attributes
200
+ if missing_attributes:
201
+ msg = DataPolicyError(context, self.error_level, f"Mandatory attributes were not found: {', '.join(missing_attributes)}")
202
+ _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
203
+ return False
204
+ else:
205
+ return True
206
+
207
+ def enforce(self, values: CkanPackageInfo, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
208
+ package_info = values
209
+ success = True
210
+ if context is None:
211
+ context = "Package " + package_info.name
212
+ if self.package_tags is not None:
213
+ success &= self.package_tags.enforce(package_info.tags, context=context + " / package tags", verbose=verbose, buffer=buffer)
214
+ if self.package_custom_fields is not None:
215
+ success &= self.package_custom_fields.enforce(package_info.custom_fields, context=context, verbose=verbose, buffer=buffer)
216
+ if self.package_mandatory_attributes is not None:
217
+ success &= self._enforce_attributes_list(package_info, self.package_mandatory_attributes, context=context, verbose=verbose, buffer=buffer)
218
+ for resource_info in package_info.package_resources.values():
219
+ resource_context = context + " / resource " + resource_info.name
220
+ if self.resource_format is not None:
221
+ resource_format_context = resource_context + " / resource format"
222
+ success &= self.resource_format.enforce(resource_info.format, context=resource_format_context, verbose=verbose, buffer=buffer)
223
+ if self.resource_mandatory_attributes is not None:
224
+ success &= self._enforce_attributes_list(resource_info, self.resource_mandatory_attributes, context=resource_context, verbose=verbose, buffer=buffer)
225
+ if self.datastore_fields_mandatory_attributes is not None and resource_info.datastore_info is not None:
226
+ for field_info in resource_info.datastore_info.fields:
227
+ field_context = resource_context + " / field " + field_info.name
228
+ success &= self._enforce_attributes_list(field_info, self.datastore_fields_mandatory_attributes, context=field_context, verbose=verbose, buffer=buffer)
229
+ return success
230
+
231
+ def policy_check_package(self, package_info: CkanPackageInfo, *, package_buffer:List[DataPolicyError]=None,
232
+ display_message:bool=True, raise_error:bool=False) -> bool:
233
+ """
234
+ Main entry-point to check the policy rules against the package.
235
+
236
+ :param package_info: package and resources metadata
237
+ :param package_buffer: you can specify a list object to indirectly obtain the detailed list of error messages.
238
+ The keys of this dictionary are the package names.
239
+ :param display_message: option to display the messages in the command line
240
+ :param raise_error: option to raise an exception if any rule with a high error level is encountered
241
+ :return: True if no error was encountered
242
+ """
243
+ if package_buffer is None:
244
+ package_buffer: List[DataPolicyError] = []
245
+ context = "Package " + package_info.name
246
+ success = self.enforce(package_info, context=context, verbose=True, buffer=package_buffer)
247
+ error_count = ErrorCount(package_buffer)
248
+ # consistency check
249
+ if success:
250
+ assert(error_count.total == 0)
251
+ else:
252
+ assert(error_count.total > 0)
253
+ # command-line output
254
+ if display_message:
255
+ if success:
256
+ print("Package '" + package_info.name + "' passed all tests")
257
+ else:
258
+ print("Package '" + package_info.name + "': " + error_count.error_count_message() + ":")
259
+ print('\n'.join([error_message.message for error_message in package_buffer]))
260
+ # raise error after all this
261
+ if raise_error and error_count.error > 0:
262
+ raise DataPolicyError(context, ErrorLevel.Error, error_count.error_count_message())
263
+ return success
264
+
265
+
266
+
267
+
268
+
269
+
@@ -0,0 +1,97 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data format policy representation and enforcing
5
+ """
6
+ from typing import List, Any, Iterable, Union, Dict, Set
7
+ from abc import ABC, abstractmethod
8
+ import re
9
+ import fnmatch
10
+
11
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
12
+ from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError, ErrorLevel, _policy_msg
13
+ from ckanapi_harvesters.policies.data_format_policy_defs import StringMatchMode, ListChoiceMode, newline_char
14
+ from ckanapi_harvesters.policies.data_format_policy_defs import StringValueSpecification
15
+
16
+
17
+ class DataPolicyABC(ABC):
18
+ def __init__(self, error_level:ErrorLevel=ErrorLevel.Information):
19
+ self.error_level: ErrorLevel = error_level
20
+
21
+ @abstractmethod
22
+ def to_dict(self) -> dict:
23
+ return {"error_level": self.error_level.name}
24
+
25
+ @staticmethod
26
+ @abstractmethod
27
+ def from_dict(d:dict):
28
+ raise NotImplementedError()
29
+
30
+ @abstractmethod
31
+ def _load_from_dict(self, d:dict):
32
+ self.error_level = ErrorLevel.from_str(d["error_level"])
33
+
34
+ @abstractmethod
35
+ def enforce(self, values: Any, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
36
+ raise NotImplementedError()
37
+
38
+
39
+ class DataPolicyElementABC(DataPolicyABC, ABC):
40
+ def __init__(self, mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
41
+ super().__init__(error_level=error_level)
42
+ self.mandatory:bool = mandatory
43
+ self.match_mode: StringMatchMode = StringMatchMode.Match
44
+
45
+ @abstractmethod
46
+ def to_dict(self) -> dict:
47
+ d = {"mandatory": self.mandatory, "match_mode": self.match_mode.name}
48
+ d.update(super().to_dict())
49
+ return d
50
+
51
+ @staticmethod
52
+ @abstractmethod
53
+ def from_dict(d:dict):
54
+ raise NotImplementedError()
55
+
56
+ @abstractmethod
57
+ def _load_from_dict(self, d:dict) -> None:
58
+ super()._load_from_dict(d)
59
+ self.mandatory = d["mandatory"]
60
+ self.match_mode = StringMatchMode.from_str(d["match_mode"])
61
+
62
+ def _enforce_unit_string(self, values: Union[str, List[str]], spec: Union[str, Iterable[str]], *, context:str, verbose:bool,
63
+ buffer:List[DataPolicyError], add_buffer:bool=True) -> bool:
64
+ if values is None or len(values) == 0:
65
+ return not self.match_mode == StringMatchMode.NotEmpty
66
+ if isinstance(values, str):
67
+ values = [values]
68
+ success = True
69
+ for value in values:
70
+ success_value = True
71
+ if isinstance(spec, str):
72
+ spec = [spec]
73
+ if value is None:
74
+ if self.mandatory:
75
+ success_value = False
76
+ elif self.match_mode == StringMatchMode.Match:
77
+ success_value = value.lower() in {unit_spec.lower() for unit_spec in spec}
78
+ elif self.match_mode == StringMatchMode.MatchCaseSensitive:
79
+ success_value = value in spec
80
+ elif self.match_mode == StringMatchMode.Regex:
81
+ # TODO: test
82
+ success_value = any([re.match(unit_spec,value,flags=re.IGNORECASE) is not None for unit_spec in spec])
83
+ elif self.match_mode == StringMatchMode.RegexCaseSensitive:
84
+ # TODO: test
85
+ success_value = any([re.match(unit_spec,value) is not None for unit_spec in spec])
86
+ elif self.match_mode == StringMatchMode.Wildcard:
87
+ success_value = any([fnmatch.fnmatch(value, unit_spec) is not None for unit_spec in spec])
88
+ elif self.match_mode == StringMatchMode.WildcardCaseSensitive:
89
+ success_value = any([fnmatch.fnmatchcase(value, unit_spec) is not None for unit_spec in spec])
90
+ if add_buffer and not success_value:
91
+ msg = DataPolicyError(context, self.error_level, f"Value does not match spec '{spec}' ({self.match_mode}): {value}")
92
+ _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
93
+ success &= success_value
94
+ return success
95
+
96
+
97
+
@@ -0,0 +1,156 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data format policy representation and enforcing
5
+ """
6
+ from typing import List, Any, Iterable, Union, Dict, Set
7
+ from abc import ABC, abstractmethod
8
+ from warnings import warn
9
+ import re
10
+ import datetime
11
+
12
+ import pandas as pd
13
+
14
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element, _bool_from_string
15
+ from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
16
+ from ckanapi_harvesters.policies.data_format_policy_defs import DataType
17
+ from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError, ErrorLevel, _policy_msg
18
+ from ckanapi_harvesters.policies.data_format_policy_defs import newline_char, StringMatchMode, ListChoiceMode
19
+ from ckanapi_harvesters.policies.data_format_policy_defs import StringValueSpecification
20
+ from ckanapi_harvesters.policies.data_format_policy_abc import DataPolicyElementABC
21
+
22
+
23
+ class CustomFieldSpecification(DataPolicyElementABC):
24
+ def __init__(self, key: str=None, values: List[str]=None, data_type:DataType=None,
25
+ match_mode:StringMatchMode=StringMatchMode.Any,
26
+ help:str=None,
27
+ mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
28
+ super().__init__(mandatory=mandatory, error_level=error_level)
29
+ self.key: str = key
30
+ self.values: List[str] = values
31
+ self.data_type:DataType = data_type
32
+ self.match_mode: StringMatchMode = match_mode
33
+ self.help: str = help
34
+
35
+ def to_dict(self) -> dict:
36
+ d = {"key": self.key,
37
+ "values": self.values,
38
+ "data_type": self.data_type.name if self.data_type is not None else "",
39
+ "match_mode": self.match_mode.name}
40
+ d.update(super().to_dict())
41
+ return d
42
+
43
+ @staticmethod
44
+ def from_dict(d:dict) -> "CustomFieldSpecification":
45
+ obj = CustomFieldSpecification()
46
+ obj._load_from_dict(d)
47
+ return obj
48
+
49
+ def _load_from_dict(self, d:dict):
50
+ super()._load_from_dict(d)
51
+ self.key = d["key"]
52
+ self.values = d["values"] if "values" in d.keys() else None
53
+ self.data_type = DataType.from_str(d["data_type"]) if "data_type" in d.keys() and not d["data_type"] == "" else None
54
+ self.match_mode = StringMatchMode.from_str(d["match_mode"]) if "match_mode" in d.keys() else None
55
+
56
+ @staticmethod
57
+ def from_df_row(row: pd.Series) -> "CustomFieldSpecification":
58
+ key = _string_from_element(row["key"]).strip()
59
+ values_str = _string_from_element(row["values"])
60
+ values = values_str.split(ckan_tags_sep)
61
+ mode_str = _string_from_element(row["mode"])
62
+ mode = StringMatchMode.from_str(mode_str) if mode_str is not None else StringMatchMode.Any
63
+ help:Union[str,None] = None
64
+ if "help" in row.keys():
65
+ help = _string_from_element(row["help"])
66
+ return CustomFieldSpecification(key=key, values=values, match_mode=mode, help=help)
67
+
68
+ def enforce(self, values: str, *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
69
+ key_context = context + " / custom key " + self.key
70
+ value = values
71
+ specs = self.values
72
+ if specs is None:
73
+ return not self.match_mode == StringMatchMode.NotEmpty
74
+ if self.data_type is None or self.data_type == DataType.Text:
75
+ success = self._enforce_unit_string(value, specs, context=key_context, verbose=verbose, buffer=buffer)
76
+ elif self.data_type == DataType.Bool:
77
+ self.match_mode = StringMatchMode.Match
78
+ success = self._enforce_unit_string(value, {"True", "False"}, context=key_context, verbose=verbose, buffer=buffer)
79
+ elif self.data_type == DataType.TimeStamp:
80
+ if value is not None and len(value) > 0:
81
+ try:
82
+ timestamp = datetime.datetime.fromisoformat(value)
83
+ except Exception as e:
84
+ return False
85
+ success = True
86
+ else:
87
+ success = True
88
+ elif self.data_type == DataType.Numeric:
89
+ self.match_mode = StringMatchMode.Regex
90
+ success = self._enforce_unit_string(value, "/d+", context=key_context, verbose=verbose, buffer=buffer)
91
+ else:
92
+ raise NotImplementedError("Unsupported data type: " + str(self.data_type))
93
+ return success
94
+
95
+
96
+ class CustomFieldsPolicy(DataPolicyElementABC):
97
+ def __init__(self, custom_fields_spec:List[CustomFieldSpecification]=None,
98
+ restrict_to_list:ErrorLevel=ErrorLevel.Information, keys_case_sensitive:bool=True,
99
+ mandatory:bool=False, error_level:ErrorLevel=ErrorLevel.Information):
100
+ super().__init__(mandatory=mandatory, error_level=error_level)
101
+ if custom_fields_spec is None:
102
+ custom_fields_spec = []
103
+ self.restrict_to_list: ErrorLevel = restrict_to_list
104
+ self.keys_case_sensitive:bool = keys_case_sensitive
105
+ self.custom_fields_spec:Dict[str,CustomFieldSpecification] = {}
106
+ if keys_case_sensitive:
107
+ self.custom_fields_spec = {keypair_spec.key: keypair_spec for keypair_spec in custom_fields_spec}
108
+ else:
109
+ self.custom_fields_spec = {keypair_spec.key.lower(): keypair_spec for keypair_spec in custom_fields_spec}
110
+
111
+ def to_dict(self) -> dict:
112
+ d = {"custom_fields": [spec.to_dict() for spec in self.custom_fields_spec.values()],
113
+ "keys_case_sensitive": self.keys_case_sensitive, "restrict_to_list": self.restrict_to_list.name}
114
+ d.update(super().to_dict())
115
+ return d
116
+
117
+ @staticmethod
118
+ def from_dict(d:dict) -> "CustomFieldsPolicy":
119
+ obj = CustomFieldsPolicy()
120
+ obj._load_from_dict(d)
121
+ return obj
122
+
123
+ def _load_from_dict(self, d:dict):
124
+ super()._load_from_dict(d)
125
+ self.custom_fields_spec = {spec["key"]: CustomFieldSpecification.from_dict(spec) for spec in d["custom_fields"]}
126
+ self.keys_case_sensitive = _bool_from_string(d["keys_case_sensitive"]) if "keys_case_sensitive" in d.keys() else None
127
+ self.restrict_to_list = ErrorLevel.from_str(d["restrict_to_list"]) if "restrict_to_list" in d.keys() else None
128
+
129
+ def enforce(self, values: Dict[str, str], *, context:str=None, verbose: bool = True, buffer:List[DataPolicyError]=None) -> bool:
130
+ success = True
131
+ if self.keys_case_sensitive:
132
+ keys = set(values.keys())
133
+ else:
134
+ keys = {key.lower() for key in values.keys()}
135
+ extra_keys = keys - set(self.custom_fields_spec.keys())
136
+ if len(extra_keys) > 0:
137
+ msg = DataPolicyError(context, self.restrict_to_list, f"Custom keys do not make part of the defined list: {','.join(extra_keys)}")
138
+ _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
139
+ success = False
140
+ mandatory_keys = {key for key, keypair in self.custom_fields_spec.items() if keypair.mandatory}
141
+ missing_keys = mandatory_keys - keys
142
+ if len(missing_keys) > 0:
143
+ msg = DataPolicyError(context, self.restrict_to_list, f"Mandatory custom keys were not found: {', '.join(missing_keys)}")
144
+ _policy_msg(msg, error_level=self.error_level, buffer=buffer, verbose=verbose)
145
+ success = False
146
+ for key, value in values.items():
147
+ key_context = context + " / custom key " + key
148
+ if not self.keys_case_sensitive:
149
+ key = key.lower()
150
+ spec = self.custom_fields_spec[key] if key in self.custom_fields_spec.keys() else None
151
+ if spec is not None:
152
+ success_value = spec.enforce(value, context=context, verbose=verbose, buffer=buffer)
153
+ success &= success_value
154
+ return success
155
+
156
+
@@ -0,0 +1,135 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data format policy representation and enforcing
5
+ """
6
+ from typing import List, Dict, Tuple, Union
7
+ from enum import IntEnum
8
+
9
+ import pandas as pd
10
+
11
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
12
+ from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
13
+
14
+
15
+ newline_char = '\n'
16
+
17
+
18
+ class ListChoiceMode(IntEnum):
19
+ Any = 0
20
+ MaxOne = 1
21
+ MandatoryOne = 2
22
+ MandatoryMulti = 3
23
+ NoExtra = 4
24
+
25
+ def __str__(self):
26
+ return self.name.lower()
27
+
28
+ @staticmethod
29
+ def from_str(s):
30
+ s = s.lower().strip()
31
+ if s == "any":
32
+ return ListChoiceMode.Any
33
+ elif s == "maxone":
34
+ return ListChoiceMode.MaxOne
35
+ elif s == "mandatoryone":
36
+ return ListChoiceMode.MandatoryOne
37
+ elif s == "mandatorymulti":
38
+ return ListChoiceMode.MandatoryMulti
39
+ elif s == "noextra":
40
+ return ListChoiceMode.NoExtra
41
+ else:
42
+ raise ValueError(s)
43
+
44
+
45
+ class StringMatchMode(IntEnum):
46
+ Any = 0
47
+ NotEmpty = 1
48
+ Match = 2
49
+ MatchCaseSensitive = 3
50
+ Regex = 4
51
+ RegexCaseSensitive = 5
52
+ Wildcard = 6
53
+ WildcardCaseSensitive = 7
54
+
55
+ def __str__(self):
56
+ return self.name.lower()
57
+
58
+ @staticmethod
59
+ def from_str(s):
60
+ s = s.lower().strip()
61
+ if s == "any":
62
+ return StringMatchMode.Any
63
+ elif s == "notempty":
64
+ return StringMatchMode.NotEmpty
65
+ elif s == "match":
66
+ return StringMatchMode.Match
67
+ elif s == "matchcasesensitive":
68
+ return StringMatchMode.MatchCaseSensitive
69
+ elif s == "regex":
70
+ return StringMatchMode.Regex
71
+ elif s == "regexcasesensitive":
72
+ return StringMatchMode.RegexCaseSensitive
73
+ elif s == "wildcard":
74
+ return StringMatchMode.Wildcard
75
+ elif s == "wildcardcasesensitive":
76
+ return StringMatchMode.WildcardCaseSensitive
77
+ else:
78
+ return StringMatchMode.Any # default value
79
+
80
+
81
+ class DataType(IntEnum):
82
+ Text = 1
83
+ Numeric = 2
84
+ TimeStamp = 3
85
+ Bool = 4
86
+
87
+ def __str__(self):
88
+ return self.name.lower()
89
+
90
+ @staticmethod
91
+ def from_str(s):
92
+ s = s.lower().strip()
93
+ if s == "text":
94
+ return DataType.Text
95
+ elif s == "numeric":
96
+ return DataType.Numeric
97
+ elif s == "timestamp":
98
+ return DataType.TimeStamp
99
+ elif s == "bool":
100
+ return DataType.Bool
101
+ else:
102
+ raise ValueError(s)
103
+
104
+
105
+ class StringValueSpecification:
106
+ def __init__(self, value:str, help:str=None):
107
+ if help is None:
108
+ help = ""
109
+ self.value: str = value
110
+ self.help: str = help
111
+
112
+ def to_tuple(self) -> Tuple[str,str]:
113
+ return self.value, self.help
114
+
115
+ @staticmethod
116
+ def from_tuple(values: Union[str, Tuple[str,str]]) -> "StringValueSpecification":
117
+ if isinstance(values, str):
118
+ value, help = values, ""
119
+ elif len(values) == 1:
120
+ value, help = values[0], ""
121
+ else:
122
+ value, help = values
123
+ return StringValueSpecification(value, help)
124
+
125
+ def to_dict(self) -> dict:
126
+ return {"value": self.value, "help": self.help}
127
+
128
+ @staticmethod
129
+ def from_dict(values: dict) -> "StringValueSpecification":
130
+ value = values["value"]
131
+ help = values["help"] if "help" in values.keys() else ""
132
+ return StringValueSpecification(value, help)
133
+
134
+
135
+