ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,229 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ """
6
+ from typing import List, Dict, Tuple, Generator, Any, Union
7
+ import argparse
8
+
9
+ from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
10
+ from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
11
+ from ckanapi_harvesters.auxiliary import ckan_configuration
12
+ from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError
13
+ from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
14
+
15
+ from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
16
+ from ckanapi_harvesters.ckan_api.ckan_api_2_readonly import CkanApiReadOnlyParams
17
+ from ckanapi_harvesters.ckan_api.ckan_api_2_readonly import CkanApiReadOnly
18
+
19
+ ckan_default_policy_keyword = "default"
20
+
21
+
22
+ class CkanApiPolicyParams(CkanApiReadOnlyParams):
23
+ def __init__(self, *, proxies:Union[str,dict,ProxyConfig]=None,
24
+ ckan_headers:dict=None, http_headers:dict=None):
25
+ super().__init__(proxies=proxies, ckan_headers=ckan_headers, http_headers=http_headers)
26
+ self.policy_check_pre: bool = False
27
+ self.policy_check_post: bool = True
28
+ self.verbose_policy: bool = True
29
+
30
+ def copy(self, new_identifier:str=None, *, dest=None):
31
+ if dest is None:
32
+ dest = CkanApiPolicyParams()
33
+ super().copy(dest=dest)
34
+ dest.policy_check_pre = self.policy_check_pre
35
+ dest.policy_check_post = self.policy_check_post
36
+ dest.verbose_policy = self.verbose_policy
37
+ return dest
38
+
39
+
40
+ class CkanApiPolicy(CkanApiReadOnly):
41
+ def __init__(self, url: str = None, *, proxies:Union[str,dict,ProxyConfig] = None,
42
+ apikey: Union[str,CkanApiKey] = None, apikey_file: str = None,
43
+ owner_org: str = None, params:CkanApiPolicyParams=None,
44
+ map:CkanMap=None, policy: CkanPackageDataFormatPolicy = None, policy_file: str = None,
45
+ identifier=None):
46
+ """
47
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
48
+
49
+ :param url: url of the CKAN server
50
+ :param proxies: proxies to use for requests
51
+ :param apikey: way to provide the API key directly (optional)
52
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
53
+ :param policy: data format policy to use with policy_check function
54
+ :param policy_file: path to a JSON file containing the data format policy to use with policy_check function
55
+ :param owner_org: name of the organization to limit package_search (optional)
56
+ :param params: other connection/behavior parameters
57
+ :param map: map of known resources
58
+ :param policy: data format policy to be used with the policy_check function.
59
+ :param policy_file: path to a JSON file containing the data format policy to load.
60
+ :param identifier: identifier of the ckan client
61
+ """
62
+ super().__init__(url=url, proxies=proxies, apikey_file=apikey_file, apikey=apikey,
63
+ owner_org=owner_org, map=map, identifier=identifier)
64
+ self.policy: Union[CkanPackageDataFormatPolicy,None] = policy
65
+ self.policy_source: Union[str,None] = None
66
+ if policy_file is not None:
67
+ self.load_policy(policy_file, base_dir=None)
68
+ self.default_policy_load_on_map: bool = True
69
+ if params is None:
70
+ params = CkanApiPolicyParams()
71
+ if proxies is not None:
72
+ params.proxies = proxies
73
+ self.params: CkanApiPolicyParams = params
74
+
75
+ def copy(self, new_identifier: str = None, *, dest=None):
76
+ if dest is None:
77
+ dest = CkanApiPolicy()
78
+ super().copy(new_identifier=new_identifier, dest=dest)
79
+ if self.policy is not None:
80
+ dest.policy = self.policy.copy()
81
+ dest.default_policy_load_on_map = self.default_policy_load_on_map
82
+ return dest
83
+
84
+ def set_verbosity(self, verbosity:bool=True, verbose_extra:bool=None) -> None:
85
+ """
86
+ Enable/disable full verbose output
87
+
88
+ :param verbosity: boolean. Cannot be None
89
+ :return:
90
+ """
91
+ super().set_verbosity(verbosity=verbosity, verbose_extra=verbose_extra)
92
+ if verbose_extra is not None:
93
+ self.params.verbose_policy = verbose_extra
94
+
95
+ def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
96
+ # overload adding support to load a policy from a file
97
+ parser = super()._setup_cli_ckan_parser(parser=parser)
98
+ parser.add_argument("--policy-file", type=str,
99
+ help="Path to a file containing the CKAN data format policy (json format)")
100
+ return parser
101
+
102
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None,
103
+ error_not_found:bool=True, default_proxies:dict=None, proxy_headers:dict=None,
104
+ proxies:dict=None, headers:dict=None) -> None:
105
+ # overload adding support to load a policy from a file
106
+ super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
107
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
108
+ if proxies is None:
109
+ proxies = self.params.proxies
110
+ if args.policy_file is not None:
111
+ self.load_policy(args.policy_file, proxies=proxies, headers=headers, error_not_found=error_not_found)
112
+
113
+ def query_default_policy(self, *, error_not_found:bool=False, load_error:bool=True) -> Union[CkanPackageDataFormatPolicy,None]:
114
+ """
115
+ Download default policy and return it without loading it in the policy attribute.
116
+
117
+ :param error_not_found:
118
+ :return:
119
+ """
120
+ self.map_resources(ckan_configuration.configuration_package_name, error_not_found=error_not_found, load_policy=False) # load_policy=False or else infinite loop
121
+ resource_info = self.map.get_resource_info(ckan_configuration.policy_resource, ckan_configuration.configuration_package_name,
122
+ error_not_mapped=error_not_found)
123
+ if resource_info is not None:
124
+ resource_id = resource_info.id
125
+ url = resource_info.download_url
126
+ _, response = self.resource_download(resource_id)
127
+ payload = response.text
128
+ return CkanPackageDataFormatPolicy.from_jsons(payload, source_file=url, load_error=load_error)
129
+ else:
130
+ return None
131
+
132
+ def load_default_policy(self, *,
133
+ error_not_found:bool=False, load_error:bool=True, cancel_if_present:bool=False,
134
+ force:bool=False) -> Union[CkanPackageDataFormatPolicy,None]:
135
+ """
136
+ Function to load the default data format policy from the CKAN server.
137
+ The default policy is defined in ckan_configuration
138
+
139
+ :param error_not_found:
140
+ :param cancel_if_present:
141
+ :param force:
142
+ :return:
143
+ """
144
+ if force:
145
+ self.policy = None
146
+ if self.policy_source == ckan_default_policy_keyword and cancel_if_present:
147
+ return self.policy
148
+ self.policy = self.query_default_policy(error_not_found=error_not_found, load_error=load_error)
149
+ self.policy_source = ckan_default_policy_keyword
150
+ return self.policy
151
+
152
+ def load_policy(self, policy_file: str, base_dir: str = None, proxies:dict=None, headers:dict=None,
153
+ error_not_found: bool = True) -> CkanPackageDataFormatPolicy:
154
+ """
155
+ Load the CKAN data format policy from file (JSON format).
156
+
157
+ :param policy_file: path to the policy file
158
+ :param base_dir: base directory, if the apikey_file is a relative path
159
+ :return:
160
+ """
161
+ if proxies is None:
162
+ proxies = self.params.proxies
163
+ if policy_file is None:
164
+ policy_file = ckan_default_policy_keyword # set to "default"
165
+ if policy_file is not None and policy_file.lower() == ckan_default_policy_keyword: # if equals "default"
166
+ return self.load_default_policy(error_not_found=error_not_found, force=True, cancel_if_present=False)
167
+ self.policy = CkanPackageDataFormatPolicy.from_json(policy_file, base_dir=base_dir, proxies=proxies, headers=headers,
168
+ error_not_found=error_not_found)
169
+ self.policy_source = policy_file
170
+ return self.policy
171
+
172
+ def policy_check(self, package_list:Union[str,List[str]]=None, policy: CkanPackageDataFormatPolicy=None,
173
+ *, buffer:Dict[str, List[DataPolicyError]]=None, raise_error:bool=False,
174
+ verbose:bool=None) -> bool:
175
+ """
176
+ Enforce policy on mapped packages
177
+
178
+ :param policy:
179
+ :return:
180
+ """
181
+ success = True
182
+ if package_list is None:
183
+ package_list = self.map.packages.keys() # check on all packages
184
+ elif isinstance(package_list, str):
185
+ package_list = [package_list]
186
+ if policy is None:
187
+ policy = self.policy
188
+ if verbose is None:
189
+ verbose = self.params.verbose_policy
190
+ if policy is None:
191
+ # no policy loaded at all
192
+ return True
193
+ if verbose:
194
+ print(f"Testing policy {policy.label}")
195
+ for package_name in package_list:
196
+ package_info = self.get_package_info_or_request(package_name)
197
+ package_buffer: List[DataPolicyError] = []
198
+ success &= policy.policy_check_package(package_info, display_message=verbose,
199
+ package_buffer=package_buffer, raise_error=raise_error)
200
+ if buffer is not None:
201
+ buffer[package_info.name] = package_buffer
202
+ if verbose:
203
+ print(f"Data format policy {policy.label} success: {success}")
204
+ return success
205
+
206
+ def set_default_map_mode(self, datastore_info:bool=None, resource_view_list:bool=None,
207
+ organization_info:bool=None, license_list:bool=None,
208
+ load_policy:bool=None) -> None:
209
+ super().set_default_map_mode(datastore_info=datastore_info, resource_view_list=resource_view_list,
210
+ organization_info=organization_info, license_list=license_list)
211
+ if load_policy is None:
212
+ load_policy = self.default_policy_load_on_map
213
+ self.default_policy_load_on_map = load_policy
214
+
215
+ def map_resources(self, package_list:Union[str, List[str]]=None, *, params:dict=None,
216
+ datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None, license_list:bool=None,
217
+ only_missing:bool=True, error_not_found:bool=True,
218
+ owner_org:str=None, load_policy:bool=None) -> CkanMap:
219
+ # overload including a call to load the default data format policy
220
+ self.set_default_map_mode(load_policy=load_policy)
221
+ map = super().map_resources(package_list=package_list, params=params, datastore_info=datastore_info,
222
+ resource_view_list=resource_view_list, organization_info=organization_info,
223
+ license_list=license_list, only_missing=only_missing, error_not_found=error_not_found,
224
+ owner_org=owner_org)
225
+ load_policy = self.default_policy_load_on_map
226
+ if load_policy:
227
+ self.load_default_policy(cancel_if_present=True, load_error=False)
228
+ return map
229
+