ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
from typing import List, Dict, Tuple, Generator, Any, Union
|
|
7
|
+
import argparse
|
|
8
|
+
|
|
9
|
+
from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
|
|
10
|
+
from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
|
|
11
|
+
from ckanapi_harvesters.auxiliary import ckan_configuration
|
|
12
|
+
from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError
|
|
13
|
+
from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
|
|
14
|
+
|
|
15
|
+
from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
|
|
16
|
+
from ckanapi_harvesters.ckan_api.ckan_api_2_readonly import CkanApiReadOnlyParams
|
|
17
|
+
from ckanapi_harvesters.ckan_api.ckan_api_2_readonly import CkanApiReadOnly
|
|
18
|
+
|
|
19
|
+
ckan_default_policy_keyword = "default"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CkanApiPolicyParams(CkanApiReadOnlyParams):
|
|
23
|
+
def __init__(self, *, proxies:Union[str,dict,ProxyConfig]=None,
|
|
24
|
+
ckan_headers:dict=None, http_headers:dict=None):
|
|
25
|
+
super().__init__(proxies=proxies, ckan_headers=ckan_headers, http_headers=http_headers)
|
|
26
|
+
self.policy_check_pre: bool = False
|
|
27
|
+
self.policy_check_post: bool = True
|
|
28
|
+
self.verbose_policy: bool = True
|
|
29
|
+
|
|
30
|
+
def copy(self, new_identifier:str=None, *, dest=None):
|
|
31
|
+
if dest is None:
|
|
32
|
+
dest = CkanApiPolicyParams()
|
|
33
|
+
super().copy(dest=dest)
|
|
34
|
+
dest.policy_check_pre = self.policy_check_pre
|
|
35
|
+
dest.policy_check_post = self.policy_check_post
|
|
36
|
+
dest.verbose_policy = self.verbose_policy
|
|
37
|
+
return dest
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CkanApiPolicy(CkanApiReadOnly):
|
|
41
|
+
def __init__(self, url: str = None, *, proxies:Union[str,dict,ProxyConfig] = None,
|
|
42
|
+
apikey: Union[str,CkanApiKey] = None, apikey_file: str = None,
|
|
43
|
+
owner_org: str = None, params:CkanApiPolicyParams=None,
|
|
44
|
+
map:CkanMap=None, policy: CkanPackageDataFormatPolicy = None, policy_file: str = None,
|
|
45
|
+
identifier=None):
|
|
46
|
+
"""
|
|
47
|
+
CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
|
|
48
|
+
|
|
49
|
+
:param url: url of the CKAN server
|
|
50
|
+
:param proxies: proxies to use for requests
|
|
51
|
+
:param apikey: way to provide the API key directly (optional)
|
|
52
|
+
:param apikey_file: path to a file containing a valid API key in the first line of text (optional)
|
|
53
|
+
:param policy: data format policy to use with policy_check function
|
|
54
|
+
:param policy_file: path to a JSON file containing the data format policy to use with policy_check function
|
|
55
|
+
:param owner_org: name of the organization to limit package_search (optional)
|
|
56
|
+
:param params: other connection/behavior parameters
|
|
57
|
+
:param map: map of known resources
|
|
58
|
+
:param policy: data format policy to be used with the policy_check function.
|
|
59
|
+
:param policy_file: path to a JSON file containing the data format policy to load.
|
|
60
|
+
:param identifier: identifier of the ckan client
|
|
61
|
+
"""
|
|
62
|
+
super().__init__(url=url, proxies=proxies, apikey_file=apikey_file, apikey=apikey,
|
|
63
|
+
owner_org=owner_org, map=map, identifier=identifier)
|
|
64
|
+
self.policy: Union[CkanPackageDataFormatPolicy,None] = policy
|
|
65
|
+
self.policy_source: Union[str,None] = None
|
|
66
|
+
if policy_file is not None:
|
|
67
|
+
self.load_policy(policy_file, base_dir=None)
|
|
68
|
+
self.default_policy_load_on_map: bool = True
|
|
69
|
+
if params is None:
|
|
70
|
+
params = CkanApiPolicyParams()
|
|
71
|
+
if proxies is not None:
|
|
72
|
+
params.proxies = proxies
|
|
73
|
+
self.params: CkanApiPolicyParams = params
|
|
74
|
+
|
|
75
|
+
def copy(self, new_identifier: str = None, *, dest=None):
|
|
76
|
+
if dest is None:
|
|
77
|
+
dest = CkanApiPolicy()
|
|
78
|
+
super().copy(new_identifier=new_identifier, dest=dest)
|
|
79
|
+
if self.policy is not None:
|
|
80
|
+
dest.policy = self.policy.copy()
|
|
81
|
+
dest.default_policy_load_on_map = self.default_policy_load_on_map
|
|
82
|
+
return dest
|
|
83
|
+
|
|
84
|
+
def set_verbosity(self, verbosity:bool=True, verbose_extra:bool=None) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Enable/disable full verbose output
|
|
87
|
+
|
|
88
|
+
:param verbosity: boolean. Cannot be None
|
|
89
|
+
:return:
|
|
90
|
+
"""
|
|
91
|
+
super().set_verbosity(verbosity=verbosity, verbose_extra=verbose_extra)
|
|
92
|
+
if verbose_extra is not None:
|
|
93
|
+
self.params.verbose_policy = verbose_extra
|
|
94
|
+
|
|
95
|
+
def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
96
|
+
# overload adding support to load a policy from a file
|
|
97
|
+
parser = super()._setup_cli_ckan_parser(parser=parser)
|
|
98
|
+
parser.add_argument("--policy-file", type=str,
|
|
99
|
+
help="Path to a file containing the CKAN data format policy (json format)")
|
|
100
|
+
return parser
|
|
101
|
+
|
|
102
|
+
def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None,
|
|
103
|
+
error_not_found:bool=True, default_proxies:dict=None, proxy_headers:dict=None,
|
|
104
|
+
proxies:dict=None, headers:dict=None) -> None:
|
|
105
|
+
# overload adding support to load a policy from a file
|
|
106
|
+
super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
|
|
107
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
108
|
+
if proxies is None:
|
|
109
|
+
proxies = self.params.proxies
|
|
110
|
+
if args.policy_file is not None:
|
|
111
|
+
self.load_policy(args.policy_file, proxies=proxies, headers=headers, error_not_found=error_not_found)
|
|
112
|
+
|
|
113
|
+
def query_default_policy(self, *, error_not_found:bool=False, load_error:bool=True) -> Union[CkanPackageDataFormatPolicy,None]:
|
|
114
|
+
"""
|
|
115
|
+
Download default policy and return it without loading it in the policy attribute.
|
|
116
|
+
|
|
117
|
+
:param error_not_found:
|
|
118
|
+
:return:
|
|
119
|
+
"""
|
|
120
|
+
self.map_resources(ckan_configuration.configuration_package_name, error_not_found=error_not_found, load_policy=False) # load_policy=False or else infinite loop
|
|
121
|
+
resource_info = self.map.get_resource_info(ckan_configuration.policy_resource, ckan_configuration.configuration_package_name,
|
|
122
|
+
error_not_mapped=error_not_found)
|
|
123
|
+
if resource_info is not None:
|
|
124
|
+
resource_id = resource_info.id
|
|
125
|
+
url = resource_info.download_url
|
|
126
|
+
_, response = self.resource_download(resource_id)
|
|
127
|
+
payload = response.text
|
|
128
|
+
return CkanPackageDataFormatPolicy.from_jsons(payload, source_file=url, load_error=load_error)
|
|
129
|
+
else:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def load_default_policy(self, *,
|
|
133
|
+
error_not_found:bool=False, load_error:bool=True, cancel_if_present:bool=False,
|
|
134
|
+
force:bool=False) -> Union[CkanPackageDataFormatPolicy,None]:
|
|
135
|
+
"""
|
|
136
|
+
Function to load the default data format policy from the CKAN server.
|
|
137
|
+
The default policy is defined in ckan_configuration
|
|
138
|
+
|
|
139
|
+
:param error_not_found:
|
|
140
|
+
:param cancel_if_present:
|
|
141
|
+
:param force:
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
144
|
+
if force:
|
|
145
|
+
self.policy = None
|
|
146
|
+
if self.policy_source == ckan_default_policy_keyword and cancel_if_present:
|
|
147
|
+
return self.policy
|
|
148
|
+
self.policy = self.query_default_policy(error_not_found=error_not_found, load_error=load_error)
|
|
149
|
+
self.policy_source = ckan_default_policy_keyword
|
|
150
|
+
return self.policy
|
|
151
|
+
|
|
152
|
+
def load_policy(self, policy_file: str, base_dir: str = None, proxies:dict=None, headers:dict=None,
|
|
153
|
+
error_not_found: bool = True) -> CkanPackageDataFormatPolicy:
|
|
154
|
+
"""
|
|
155
|
+
Load the CKAN data format policy from file (JSON format).
|
|
156
|
+
|
|
157
|
+
:param policy_file: path to the policy file
|
|
158
|
+
:param base_dir: base directory, if the apikey_file is a relative path
|
|
159
|
+
:return:
|
|
160
|
+
"""
|
|
161
|
+
if proxies is None:
|
|
162
|
+
proxies = self.params.proxies
|
|
163
|
+
if policy_file is None:
|
|
164
|
+
policy_file = ckan_default_policy_keyword # set to "default"
|
|
165
|
+
if policy_file is not None and policy_file.lower() == ckan_default_policy_keyword: # if equals "default"
|
|
166
|
+
return self.load_default_policy(error_not_found=error_not_found, force=True, cancel_if_present=False)
|
|
167
|
+
self.policy = CkanPackageDataFormatPolicy.from_json(policy_file, base_dir=base_dir, proxies=proxies, headers=headers,
|
|
168
|
+
error_not_found=error_not_found)
|
|
169
|
+
self.policy_source = policy_file
|
|
170
|
+
return self.policy
|
|
171
|
+
|
|
172
|
+
def policy_check(self, package_list:Union[str,List[str]]=None, policy: CkanPackageDataFormatPolicy=None,
|
|
173
|
+
*, buffer:Dict[str, List[DataPolicyError]]=None, raise_error:bool=False,
|
|
174
|
+
verbose:bool=None) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Enforce policy on mapped packages
|
|
177
|
+
|
|
178
|
+
:param policy:
|
|
179
|
+
:return:
|
|
180
|
+
"""
|
|
181
|
+
success = True
|
|
182
|
+
if package_list is None:
|
|
183
|
+
package_list = self.map.packages.keys() # check on all packages
|
|
184
|
+
elif isinstance(package_list, str):
|
|
185
|
+
package_list = [package_list]
|
|
186
|
+
if policy is None:
|
|
187
|
+
policy = self.policy
|
|
188
|
+
if verbose is None:
|
|
189
|
+
verbose = self.params.verbose_policy
|
|
190
|
+
if policy is None:
|
|
191
|
+
# no policy loaded at all
|
|
192
|
+
return True
|
|
193
|
+
if verbose:
|
|
194
|
+
print(f"Testing policy {policy.label}")
|
|
195
|
+
for package_name in package_list:
|
|
196
|
+
package_info = self.get_package_info_or_request(package_name)
|
|
197
|
+
package_buffer: List[DataPolicyError] = []
|
|
198
|
+
success &= policy.policy_check_package(package_info, display_message=verbose,
|
|
199
|
+
package_buffer=package_buffer, raise_error=raise_error)
|
|
200
|
+
if buffer is not None:
|
|
201
|
+
buffer[package_info.name] = package_buffer
|
|
202
|
+
if verbose:
|
|
203
|
+
print(f"Data format policy {policy.label} success: {success}")
|
|
204
|
+
return success
|
|
205
|
+
|
|
206
|
+
def set_default_map_mode(self, datastore_info:bool=None, resource_view_list:bool=None,
|
|
207
|
+
organization_info:bool=None, license_list:bool=None,
|
|
208
|
+
load_policy:bool=None) -> None:
|
|
209
|
+
super().set_default_map_mode(datastore_info=datastore_info, resource_view_list=resource_view_list,
|
|
210
|
+
organization_info=organization_info, license_list=license_list)
|
|
211
|
+
if load_policy is None:
|
|
212
|
+
load_policy = self.default_policy_load_on_map
|
|
213
|
+
self.default_policy_load_on_map = load_policy
|
|
214
|
+
|
|
215
|
+
def map_resources(self, package_list:Union[str, List[str]]=None, *, params:dict=None,
|
|
216
|
+
datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None, license_list:bool=None,
|
|
217
|
+
only_missing:bool=True, error_not_found:bool=True,
|
|
218
|
+
owner_org:str=None, load_policy:bool=None) -> CkanMap:
|
|
219
|
+
# overload including a call to load the default data format policy
|
|
220
|
+
self.set_default_map_mode(load_policy=load_policy)
|
|
221
|
+
map = super().map_resources(package_list=package_list, params=params, datastore_info=datastore_info,
|
|
222
|
+
resource_view_list=resource_view_list, organization_info=organization_info,
|
|
223
|
+
license_list=license_list, only_missing=only_missing, error_not_found=error_not_found,
|
|
224
|
+
owner_org=owner_org)
|
|
225
|
+
load_policy = self.default_policy_load_on_map
|
|
226
|
+
if load_policy:
|
|
227
|
+
self.load_default_policy(cancel_if_present=True, load_error=False)
|
|
228
|
+
return map
|
|
229
|
+
|