ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/METADATA +74 -38
- ckanapi_harvesters-0.0.2.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/top_level.txt +0 -0
ckanapi_harvesters/__init__.py
CHANGED
|
@@ -1,15 +1,37 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Package with helper function for CKAN requests using pandas DataFrames.
|
|
1
5
|
"""
|
|
2
|
-
Module for demonstrating a simple 'Hello World' function.
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
|
|
7
|
+
# builder_file_format_version = "0.0.1"
|
|
8
|
+
try:
|
|
9
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
10
|
+
except ImportError: # Python <3.8
|
|
11
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
6
12
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
13
|
+
try:
|
|
14
|
+
__version__ = version("ckanapi_harvesters")
|
|
15
|
+
except PackageNotFoundError:
|
|
16
|
+
__version__ = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
from . import auxiliary
|
|
24
|
+
from . import policies
|
|
25
|
+
from . import harvesters
|
|
26
|
+
from . import ckan_api
|
|
27
|
+
from . import builder
|
|
28
|
+
from . import reports
|
|
29
|
+
|
|
30
|
+
# usage shortcuts
|
|
31
|
+
from .auxiliary import CkanMap
|
|
32
|
+
from .policies import CkanPackageDataFormatPolicy
|
|
33
|
+
from .ckan_api import CkanApi, CKAN_API_VERSION
|
|
34
|
+
from .builder import BUILDER_FILE_FORMAT_VERSION
|
|
35
|
+
from .builder import BuilderPackage, BuilderDataStoreMultiABC, BuilderDataStoreFolder, RequestFileMapperIndexKeys
|
|
12
36
|
|
|
13
|
-
from .main import hello_world
|
|
14
37
|
|
|
15
|
-
__all__ = ['hello_world']
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Package with helper function for CKAN requests using pandas DataFrames.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from . import ckan_defs
|
|
8
|
+
from . import path
|
|
9
|
+
from . import login
|
|
10
|
+
from . import urls
|
|
11
|
+
from . import proxy_config
|
|
12
|
+
from . import external_code_import
|
|
13
|
+
from . import list_records
|
|
14
|
+
from . import ckan_action
|
|
15
|
+
from . import ckan_errors
|
|
16
|
+
from . import ckan_configuration
|
|
17
|
+
from . import ckan_api_key
|
|
18
|
+
from . import ckan_model
|
|
19
|
+
from . import ckan_map
|
|
20
|
+
from . import ckan_vocabulary_deprecated
|
|
21
|
+
from . import ckan_auxiliary
|
|
22
|
+
from . import deprecated
|
|
23
|
+
|
|
24
|
+
from .ckan_map import CkanMap
|
|
25
|
+
from .external_code_import import unlock_external_code_execution
|
|
26
|
+
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Action response common treatments
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CkanActionResponse:
|
|
13
|
+
"""
|
|
14
|
+
Class which decodes and checks the response of a CKAN request
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, response: requests.Response, dry_run: bool=False):
|
|
17
|
+
self.response: requests.Response = response # for debug purposes
|
|
18
|
+
self.response_dict:Union[dict,None] = None
|
|
19
|
+
self.status_code:int = response.status_code
|
|
20
|
+
self.success:bool = False
|
|
21
|
+
self.success_json_loads:bool = False
|
|
22
|
+
self.result:Union[dict,None] = None
|
|
23
|
+
self.error_message: Union[None,str,dict] = None
|
|
24
|
+
self.len:Union[int,None] = None
|
|
25
|
+
self.dry_run:bool = dry_run
|
|
26
|
+
|
|
27
|
+
if response.content is None and response.request is None:
|
|
28
|
+
# dry run
|
|
29
|
+
assert(dry_run)
|
|
30
|
+
self.success = True
|
|
31
|
+
self.success_json_loads = False
|
|
32
|
+
self.status_code = 1
|
|
33
|
+
self.error_message = "Request not sent: dry run mode"
|
|
34
|
+
self.len = 0
|
|
35
|
+
else:
|
|
36
|
+
try:
|
|
37
|
+
response_dict = json.loads(response.content.decode())
|
|
38
|
+
self.response_dict = response_dict
|
|
39
|
+
self.success_json_loads = True
|
|
40
|
+
if (response.status_code == 200 and "success" in response_dict.keys() and "result" in response_dict.keys()
|
|
41
|
+
and response_dict["success"]):
|
|
42
|
+
self.success = True
|
|
43
|
+
self.result = response_dict["result"]
|
|
44
|
+
else:
|
|
45
|
+
if "error" in response_dict.keys():
|
|
46
|
+
self.error_message = response_dict["error"]
|
|
47
|
+
else:
|
|
48
|
+
self.error_message = response.content.decode()
|
|
49
|
+
except Exception as json_error:
|
|
50
|
+
self.error_message = f"JSON decode error {json_error} & CKAN error {response.content.decode()}"
|
|
51
|
+
|
|
52
|
+
def __len__(self):
|
|
53
|
+
if self.len is None:
|
|
54
|
+
raise RuntimeError("queried len but does not have len")
|
|
55
|
+
return self.len
|
|
56
|
+
|
|
57
|
+
def default_error(self, ckan) -> "CkanActionError":
|
|
58
|
+
"""
|
|
59
|
+
Raise specific error codes depending on response
|
|
60
|
+
"""
|
|
61
|
+
if self.status_code == 404 and self.success_json_loads and self.error_message["__type"] == "Not Found Error":
|
|
62
|
+
return CkanNotFoundError(ckan, "(Generic)", self)
|
|
63
|
+
elif self.status_code == 403 and self.success_json_loads and self.error_message["__type"] == "Authorization Error":
|
|
64
|
+
return CkanAuthorizationError(ckan, self)
|
|
65
|
+
else:
|
|
66
|
+
return CkanActionError(ckan, self)
|
|
67
|
+
|
|
68
|
+
## action error codes
|
|
69
|
+
class CkanActionError(Exception):
|
|
70
|
+
def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
|
|
71
|
+
super().__init__(response.error_message)
|
|
72
|
+
self.response = response
|
|
73
|
+
self.status_code = response.status_code
|
|
74
|
+
if display_request:
|
|
75
|
+
ckan._error_print_debug_response(response.response)
|
|
76
|
+
|
|
77
|
+
def __str__(self):
|
|
78
|
+
return f"Server code [{self.status_code}]: " + super().__str__()
|
|
79
|
+
|
|
80
|
+
class CkanNotFoundError(CkanActionError):
|
|
81
|
+
def __init__(self, ckan, object_type:str, response: CkanActionResponse, display_request:bool=True):
|
|
82
|
+
response.error_message = f"{object_type} not found: {response.error_message}"
|
|
83
|
+
super().__init__(ckan, response, display_request=display_request)
|
|
84
|
+
self.object_type = object_type
|
|
85
|
+
|
|
86
|
+
class CkanAuthorizationError(CkanActionError):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
class CkanSqlCapabilityError(CkanActionError):
|
|
90
|
+
def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
|
|
91
|
+
response.error_message = f"sql capabilities are not activated on CKAN server. See documentation for option ckan.datastore.sqlsearch.enabled"
|
|
92
|
+
super().__init__(ckan, response, display_request=display_request)
|
|
93
|
+
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Methods to load an API key
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os.path
|
|
8
|
+
from warnings import warn
|
|
9
|
+
from typing import Dict, Union, Iterable
|
|
10
|
+
import getpass
|
|
11
|
+
import argparse
|
|
12
|
+
|
|
13
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import ApiKeyFileError
|
|
14
|
+
from ckanapi_harvesters.auxiliary.path import sanitize_path, path_rel_to_dir
|
|
15
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ApiKey:
|
|
20
|
+
"""
|
|
21
|
+
API key storage class.
|
|
22
|
+
"""
|
|
23
|
+
CKAN_API_KEY_HEADER_NAME = {"Authorization", "X-CKAN-API-Key"} # match apikey_header_name of your CKAN instance
|
|
24
|
+
CKAN_API_KEY_ENVIRON = "CKAN_API_KEY" # not recommended to store sensitive information in environment variables
|
|
25
|
+
API_KEY_FILE_ENVIRON = "CKAN_API_KEY_FILE"
|
|
26
|
+
|
|
27
|
+
def __init__(self, *, apikey:str=None, apikey_file:str=None,
|
|
28
|
+
api_key_header_name:Union[str, Iterable[str]]=None):
|
|
29
|
+
"""
|
|
30
|
+
CKAN Database API key storage class.
|
|
31
|
+
|
|
32
|
+
:param apikey: way to provide the API key directly (optional)
|
|
33
|
+
:param apikey_file: path to a file containing a valid API key in the first line of text (optional)
|
|
34
|
+
"""
|
|
35
|
+
if api_key_header_name is None:
|
|
36
|
+
api_key_header_name = "Authorization"
|
|
37
|
+
self.apikey_file: str = apikey_file # path to a file containing a valid API key in the first line of text (optional)
|
|
38
|
+
self._apikey: str = apikey # API key used for restricted package access
|
|
39
|
+
self.api_key_header_name = api_key_header_name
|
|
40
|
+
|
|
41
|
+
def __del__(self):
|
|
42
|
+
self.clear()
|
|
43
|
+
|
|
44
|
+
def __copy__(self):
|
|
45
|
+
return self.copy()
|
|
46
|
+
|
|
47
|
+
def copy(self, *, dest=None):
|
|
48
|
+
if dest is None:
|
|
49
|
+
dest = ApiKey()
|
|
50
|
+
dest.apikey_file = self.apikey_file
|
|
51
|
+
dest._apikey = self._apikey
|
|
52
|
+
return dest
|
|
53
|
+
|
|
54
|
+
def __str__(self):
|
|
55
|
+
if self._apikey is None:
|
|
56
|
+
return "None"
|
|
57
|
+
elif self._apikey == "":
|
|
58
|
+
return "<empty string>"
|
|
59
|
+
else:
|
|
60
|
+
return "*****"
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def value(self) -> Union[str,None]:
|
|
64
|
+
return self._apikey
|
|
65
|
+
@value.setter
|
|
66
|
+
def value(self, value:Union[str,None]):
|
|
67
|
+
self._apikey = value
|
|
68
|
+
|
|
69
|
+
def is_empty(self):
|
|
70
|
+
return self._apikey is None
|
|
71
|
+
|
|
72
|
+
def clear(self) -> None:
|
|
73
|
+
self._apikey = None
|
|
74
|
+
|
|
75
|
+
def load_from_environ(self, *, error_not_found:bool=False) -> bool:
|
|
76
|
+
"""
|
|
77
|
+
Load CKAN API key from environment variables, by order of priority:
|
|
78
|
+
|
|
79
|
+
By default, no environment variables are used.
|
|
80
|
+
"""
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def load_apikey(self, apikey_file:str=None, *, base_dir:str=None, error_not_found:bool=True) -> bool:
|
|
84
|
+
"""
|
|
85
|
+
Load the API key from file.
|
|
86
|
+
The file should contain a valid API key in the first line of text.
|
|
87
|
+
|
|
88
|
+
:param apikey_file: path to the API key file. The following keywords are accepted:
|
|
89
|
+
- "environ": the API key will be looked up in the environment variable with load_from_environ
|
|
90
|
+
:param base_dir: base directory to find the API key file, if a relative path is provided
|
|
91
|
+
:param error_not_found: option to raise an exception if the API key file is not found
|
|
92
|
+
:return:
|
|
93
|
+
"""
|
|
94
|
+
if apikey_file is None:
|
|
95
|
+
apikey_file = self.apikey_file
|
|
96
|
+
apikey_file = path_rel_to_dir(apikey_file, base_dir=base_dir, keyword_exceptions={environ_keyword})
|
|
97
|
+
if apikey_file is None:
|
|
98
|
+
raise ApiKeyFileError('apikey_file is required')
|
|
99
|
+
api_keyword = apikey_file.strip().lower()
|
|
100
|
+
if api_keyword == environ_keyword:
|
|
101
|
+
return self.load_from_environ(error_not_found=error_not_found)
|
|
102
|
+
if not(os.path.isfile(apikey_file)) and not error_not_found:
|
|
103
|
+
msg = f"API key file does not exist: {apikey_file}"
|
|
104
|
+
warn(msg)
|
|
105
|
+
return False
|
|
106
|
+
with open(apikey_file, 'r') as f:
|
|
107
|
+
apikey = f.readline().strip()
|
|
108
|
+
f.close()
|
|
109
|
+
self.value = apikey
|
|
110
|
+
self.apikey_file = apikey_file
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def get_auth_header(self) -> Dict[str, str]:
|
|
114
|
+
"""
|
|
115
|
+
Returns the correct header with the API key for the requests needing it.
|
|
116
|
+
If no API key was loaded, returns an empty dictionary.
|
|
117
|
+
"""
|
|
118
|
+
if self.value is not None:
|
|
119
|
+
apikey_encoded = self.value
|
|
120
|
+
if isinstance(self.api_key_header_name, str):
|
|
121
|
+
return {self.api_key_header_name: apikey_encoded}
|
|
122
|
+
else:
|
|
123
|
+
return {key: apikey_encoded for key in self.api_key_header_name}
|
|
124
|
+
else:
|
|
125
|
+
return {}
|
|
126
|
+
|
|
127
|
+
def input(self):
|
|
128
|
+
"""
|
|
129
|
+
Prompt the user to input the API key in the console window.
|
|
130
|
+
|
|
131
|
+
:return:
|
|
132
|
+
"""
|
|
133
|
+
api_key = getpass.getpass("Please enter the API key: ")
|
|
134
|
+
self._apikey = api_key
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
138
|
+
if parser is None:
|
|
139
|
+
parser = argparse.ArgumentParser(description="API key initialization")
|
|
140
|
+
parser.add_argument("--apikey", type=str,
|
|
141
|
+
help="API key")
|
|
142
|
+
parser.add_argument("--apikey-file", type=str,
|
|
143
|
+
help="Path to a file containing the API key (first line)")
|
|
144
|
+
return parser
|
|
145
|
+
|
|
146
|
+
def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
|
|
147
|
+
if args.apikey is not None:
|
|
148
|
+
self.value = args.apikey
|
|
149
|
+
if args.apikey_file is not None:
|
|
150
|
+
self.load_apikey(args.apikey_file, base_dir=base_dir, error_not_found=error_not_found)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class CkanApiKey(ApiKey):
|
|
154
|
+
"""
|
|
155
|
+
CKAN Database API key storage class.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(self, *, apikey:str=None, apikey_file:str=None):
|
|
159
|
+
"""
|
|
160
|
+
CKAN Database API key storage class.
|
|
161
|
+
|
|
162
|
+
:param apikey: way to provide the API key directly (optional)
|
|
163
|
+
:param apikey_file: path to a file containing a valid API key in the first line of text (optional)
|
|
164
|
+
"""
|
|
165
|
+
super().__init__(apikey=apikey, apikey_file=apikey_file, api_key_header_name=self.CKAN_API_KEY_HEADER_NAME)
|
|
166
|
+
|
|
167
|
+
def copy(self, *, dest=None) -> "CkanApiKey":
|
|
168
|
+
if dest is None:
|
|
169
|
+
dest = CkanApiKey()
|
|
170
|
+
super().copy(dest=dest)
|
|
171
|
+
return dest
|
|
172
|
+
|
|
173
|
+
def load_from_environ(self, *, error_not_found:bool=False) -> bool:
|
|
174
|
+
"""
|
|
175
|
+
Load CKAN API key from environment variables, by order of priority:
|
|
176
|
+
|
|
177
|
+
- `CKAN_API_KEY`: for the raw API key (it is not recommended to store API key in an environment variable)
|
|
178
|
+
- `CKAN_API_KEY_FILE`: path to a file containing a valid API key in the first line of text
|
|
179
|
+
|
|
180
|
+
:param error_not_found: raise an error if the API key file was not found
|
|
181
|
+
:return:
|
|
182
|
+
"""
|
|
183
|
+
apikey = os.environ.get(self.CKAN_API_KEY_ENVIRON) # "CKAN_API_KEY"
|
|
184
|
+
apikey_file = sanitize_path(os.environ.get(self.API_KEY_FILE_ENVIRON)) # "CKAN_API_KEY_FILE"
|
|
185
|
+
if apikey is not None:
|
|
186
|
+
msg = f"It is not recommended to store sensitive information in environment variables such as the API key ({self.CKAN_API_KEY_ENVIRON})"
|
|
187
|
+
warn(msg)
|
|
188
|
+
self.value = apikey
|
|
189
|
+
return True
|
|
190
|
+
elif apikey_file is not None:
|
|
191
|
+
assert not apikey_file.strip().lower() == environ_keyword # this value would create an infinite loop
|
|
192
|
+
return self.load_apikey(apikey_file, error_not_found=error_not_found)
|
|
193
|
+
else:
|
|
194
|
+
msg = f"No API key was found in the environment variable {self.CKAN_API_KEY_ENVIRON}"
|
|
195
|
+
warn(msg)
|
|
196
|
+
return False
|
|
197
|
+
|
|
198
|
+
def input(self):
|
|
199
|
+
"""
|
|
200
|
+
Prompt the user to input the API key in the console window.
|
|
201
|
+
|
|
202
|
+
:return:
|
|
203
|
+
"""
|
|
204
|
+
api_key = getpass.getpass("Please enter the CKAN API key: ")
|
|
205
|
+
self._apikey = api_key
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
209
|
+
if parser is None:
|
|
210
|
+
parser = argparse.ArgumentParser(description="CKAN API key initialization")
|
|
211
|
+
ApiKey._setup_cli_parser(parser=parser)
|
|
212
|
+
return parser
|
|
213
|
+
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Data model to represent a CKAN database architecture
|
|
5
|
+
"""
|
|
6
|
+
from typing import Iterable, Union, Set, Tuple, final
|
|
7
|
+
from enum import IntEnum
|
|
8
|
+
import json
|
|
9
|
+
import numbers
|
|
10
|
+
import os
|
|
11
|
+
import io
|
|
12
|
+
import shlex
|
|
13
|
+
import argparse
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ckanapi_harvesters.auxiliary.path import path_rel_to_dir, make_path_relative
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
ckan_package_name_re = "^[0-9a-z-_]*$"
|
|
23
|
+
datastore_id_col = "_id"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CkanIdFieldTreatment(IntEnum):
|
|
27
|
+
Keep = 0
|
|
28
|
+
SetIndex = 1
|
|
29
|
+
Remove = 2
|
|
30
|
+
|
|
31
|
+
re_geometry = r"geometry\((\w+),\s*(\d+)\)"
|
|
32
|
+
def parse_geometry_native_type(geometry_type:str) -> Tuple[str,int]:
|
|
33
|
+
match = re.search(re_geometry, geometry_type)
|
|
34
|
+
geometry_type = match.group(1)
|
|
35
|
+
geo_epsg = int(match.group(2))
|
|
36
|
+
return geometry_type, geo_epsg
|
|
37
|
+
|
|
38
|
+
class CkanFieldInternalAttrs:
|
|
39
|
+
"""
|
|
40
|
+
Custom information for internal use
|
|
41
|
+
"""
|
|
42
|
+
def __init__(self):
|
|
43
|
+
self.geometry_as_source: Union[bool, None] = None
|
|
44
|
+
self.geometry_type: Union[str, None] = None
|
|
45
|
+
self.epsg_target:Union[int,None] = None
|
|
46
|
+
self.epsg_source:Union[int,None] = None
|
|
47
|
+
|
|
48
|
+
def __copy__(self):
|
|
49
|
+
return self.copy()
|
|
50
|
+
|
|
51
|
+
def __eq__(self, other):
|
|
52
|
+
return self.__dict__ == other.__dict__
|
|
53
|
+
|
|
54
|
+
def copy(self) -> "CkanFieldInternalAttrs":
|
|
55
|
+
dest = CkanFieldInternalAttrs()
|
|
56
|
+
# from: native type (geometries)
|
|
57
|
+
dest.geometry_type = self.geometry_type
|
|
58
|
+
dest.epsg_target = self.epsg_target
|
|
59
|
+
# user options
|
|
60
|
+
dest.epsg_source = self.epsg_source
|
|
61
|
+
return dest
|
|
62
|
+
|
|
63
|
+
def merge(self, new_values: "CkanFieldInternalAttrs") -> "CkanFieldInternalAttrs":
|
|
64
|
+
dest = self.copy()
|
|
65
|
+
if new_values.geometry_type is not None:
|
|
66
|
+
dest.geometry_type = new_values.geometry_type
|
|
67
|
+
if new_values.epsg_source is not None:
|
|
68
|
+
dest.epsg_source = new_values.epsg_source
|
|
69
|
+
if new_values.epsg_target is not None:
|
|
70
|
+
dest.epsg_target = new_values.epsg_target
|
|
71
|
+
return dest
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _setup_cli_ckan_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
75
|
+
if parser is None:
|
|
76
|
+
parser = argparse.ArgumentParser(description="CKAN internal field parameters")
|
|
77
|
+
parser.add_argument("--epsg-src", type=int,
|
|
78
|
+
help="Source EPSG (geographic coordinate system) for the column, used by data_cleaner")
|
|
79
|
+
return parser
|
|
80
|
+
|
|
81
|
+
def _cli_ckan_args_apply(self, args: argparse.Namespace) -> None:
|
|
82
|
+
if args.epsg_src:
|
|
83
|
+
self.epsg_source = args.epsg_src
|
|
84
|
+
|
|
85
|
+
def init_from_options_string(self, options_string:str) -> None:
|
|
86
|
+
if options_string is None:
|
|
87
|
+
return
|
|
88
|
+
parser = self._setup_cli_ckan_parser()
|
|
89
|
+
args = parser.parse_args(shlex.split(options_string))
|
|
90
|
+
self._cli_ckan_args_apply(args)
|
|
91
|
+
|
|
92
|
+
def init_from_native_type(self, native_type:str) -> None:
|
|
93
|
+
if native_type is None:
|
|
94
|
+
return
|
|
95
|
+
if native_type.lower().strip().startswith("geometry("):
|
|
96
|
+
geometry_type, geo_epsg = parse_geometry_native_type(native_type)
|
|
97
|
+
self.geometry_type = geometry_type
|
|
98
|
+
self.epsg_target = geo_epsg
|
|
99
|
+
|
|
100
|
+
def update_from_ckan(self, ckan):
|
|
101
|
+
if self.epsg_source is not None:
|
|
102
|
+
self.epsg_target = ckan.params.ckan_default_target_epsg
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
## Requests ------------------
|
|
106
|
+
json_headers = {"Content-Type": "application/json", 'Accept': 'text/plain'}
|
|
107
|
+
max_len_debug_print = 5000
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def json_encode_params(params:dict) -> Tuple[str, dict]:
|
|
111
|
+
"""
|
|
112
|
+
For upload requests, with a records field, it is necessary to specify the params in the data argument
|
|
113
|
+
instead of the json argument of requests.
|
|
114
|
+
In the case there are NaN values, these are not supported by the requests encoder.
|
|
115
|
+
|
|
116
|
+
___Requirement___: add headers=json_headers !!!
|
|
117
|
+
|
|
118
|
+
:param params:
|
|
119
|
+
:return:
|
|
120
|
+
"""
|
|
121
|
+
data_payload = json.dumps(params, separators=(',', ':'))
|
|
122
|
+
return data_payload, json_headers
|
|
123
|
+
|
|
124
|
+
class RequestType(IntEnum):
|
|
125
|
+
Get = 1
|
|
126
|
+
Post = 2
|
|
127
|
+
|
|
128
|
+
def requests_multipart_data(json_dict:dict, files:dict) -> dict:
|
|
129
|
+
"""
|
|
130
|
+
Generate the multipart data for a request containing json and a file.
|
|
131
|
+
Used to fill the files argument of requests.post
|
|
132
|
+
json_headers must not be used
|
|
133
|
+
|
|
134
|
+
:param json_dict:
|
|
135
|
+
:param files:
|
|
136
|
+
:return:
|
|
137
|
+
"""
|
|
138
|
+
json_payload = json.dumps(json_dict)
|
|
139
|
+
multipart_data = {"json": (None, json_payload, "application/json")}
|
|
140
|
+
assert_or_raise(isinstance(files, dict) and not "json" in files.keys(), ValueError("files"))
|
|
141
|
+
multipart_data.update(files)
|
|
142
|
+
return multipart_data
|
|
143
|
+
|
|
144
|
+
df_upload_to_csv_kwargs = dict()
|
|
145
|
+
df_download_to_csv_kwargs = dict()
|
|
146
|
+
|
|
147
|
+
def upload_prepare_requests_files_arg(*, files:dict=None, file_path:str=None, df:pd.DataFrame=None,
|
|
148
|
+
payload:Union[bytes, io.BufferedIOBase]=None, payload_name:str=None) -> dict:
|
|
149
|
+
"""
|
|
150
|
+
Create files argument for requests.post, by order of priority:
|
|
151
|
+
|
|
152
|
+
:param files: files pass through argument to the requests.post function. Use to send other data formats.
|
|
153
|
+
:param payload: bytes to upload as a file
|
|
154
|
+
:param payload_name: name of the payload to use (associated with the payload argument) - this determines the format recognized in CKAN viewers.
|
|
155
|
+
:param file_path: path of the file to transmit (binary and text files are supported here)
|
|
156
|
+
:param df: pandas DataFrame to replace resource
|
|
157
|
+
|
|
158
|
+
:return:
|
|
159
|
+
"""
|
|
160
|
+
if files is not None:
|
|
161
|
+
assert (file_path is None and df is None and payload is None)
|
|
162
|
+
elif payload is not None:
|
|
163
|
+
assert (file_path is None and df is None)
|
|
164
|
+
if payload_name is not None:
|
|
165
|
+
payload_file_name = payload_name
|
|
166
|
+
files = {"upload": (payload_file_name, payload)}
|
|
167
|
+
else:
|
|
168
|
+
files = {"upload": payload}
|
|
169
|
+
elif file_path is not None:
|
|
170
|
+
# tested with text files only, use files pass-through argument for other formats
|
|
171
|
+
assert (df is None)
|
|
172
|
+
file_name = os.path.basename(file_path)
|
|
173
|
+
payload_file_name = file_name
|
|
174
|
+
# files = {file_name: (os.path.basename(file_path), open(file_path, "r"), "text/plain")}
|
|
175
|
+
files = {"upload": (payload_file_name, open(file_path, "r"))}
|
|
176
|
+
elif df is not None:
|
|
177
|
+
payload_file_name = "file.csv"
|
|
178
|
+
files = {"upload": (payload_file_name, df.to_csv(index=False, **df_upload_to_csv_kwargs), "text/plain")}
|
|
179
|
+
else:
|
|
180
|
+
files = None
|
|
181
|
+
return files
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
## Path for specific objects ------------------
|
|
185
|
+
def ca_file_rel_to_dir(ca_file:Union[str,None], base_dir:str=None) -> Tuple[Union[bool,str,None], Union[str,None]]:
|
|
186
|
+
if ca_file is not None:
|
|
187
|
+
bool_keyword = ca_file.strip().lower()
|
|
188
|
+
if bool_keyword == "true":
|
|
189
|
+
return True, None
|
|
190
|
+
elif bool_keyword == "false":
|
|
191
|
+
return False, None
|
|
192
|
+
else:
|
|
193
|
+
return path_rel_to_dir(ca_file, base_dir), ca_file
|
|
194
|
+
else:
|
|
195
|
+
return None, None
|
|
196
|
+
|
|
197
|
+
def ca_arg_to_str(ca_cert:Union[bool,str,None], base_dir:str=None, source_string:str=None) -> Union[str,None]:
|
|
198
|
+
if ca_cert is not None and isinstance(ca_cert, bool) and not ca_cert:
|
|
199
|
+
return "False"
|
|
200
|
+
elif ca_cert is not None and isinstance(ca_cert, str):
|
|
201
|
+
return make_path_relative(ca_cert, base_dir, source_string=source_string)
|
|
202
|
+
else:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def ssl_arguments_decompose(ca_cert:Union[bool,str,None], *, default_ssl:bool=True) -> Tuple[bool, Union[str,None]]:
|
|
206
|
+
"""
|
|
207
|
+
Decompose requirements argument verify into boolean and path to a certificate file.
|
|
208
|
+
|
|
209
|
+
:param ca_cert:
|
|
210
|
+
:param default_ssl: option to indicate if SSL should be enabled if ca_cert is None
|
|
211
|
+
:return: Tuple ssl, ssl_certfile
|
|
212
|
+
"""
|
|
213
|
+
if ca_cert is None:
|
|
214
|
+
return default_ssl, None
|
|
215
|
+
elif isinstance(ca_cert, bool):
|
|
216
|
+
return ca_cert, None
|
|
217
|
+
elif isinstance(ca_cert, str):
|
|
218
|
+
return True, ca_cert
|
|
219
|
+
|
|
220
|
+
## Auxiliary functions ------------------
|
|
221
|
+
def assert_or_raise(condition: bool, e: Exception) -> None:
|
|
222
|
+
if not condition:
|
|
223
|
+
raise e
|
|
224
|
+
|
|
225
|
+
def find_duplicates(list_str:Iterable) -> list:
|
|
226
|
+
seen = set()
|
|
227
|
+
uniq = []
|
|
228
|
+
duplicates = []
|
|
229
|
+
for x in list_str:
|
|
230
|
+
if x not in seen:
|
|
231
|
+
seen.add(x)
|
|
232
|
+
uniq.append(x)
|
|
233
|
+
else:
|
|
234
|
+
duplicates.append(x)
|
|
235
|
+
return duplicates
|
|
236
|
+
|
|
237
|
+
def dict_recursive_update(d:dict,u:dict) -> dict:
|
|
238
|
+
for k,v in u.items():
|
|
239
|
+
if isinstance(v, dict):
|
|
240
|
+
d[k] = dict_recursive_update(d.get(k, {}),v)
|
|
241
|
+
else:
|
|
242
|
+
d[k] = v
|
|
243
|
+
return d
|
|
244
|
+
|
|
245
|
+
def _bool_from_string(string:str, default_value:Union[bool,None]=False) -> Union[bool,None]:
|
|
246
|
+
if isinstance(string, bool):
|
|
247
|
+
return string
|
|
248
|
+
else:
|
|
249
|
+
keyword = string.lower().strip()
|
|
250
|
+
if keyword == "true":
|
|
251
|
+
return True
|
|
252
|
+
elif keyword == "false":
|
|
253
|
+
return False
|
|
254
|
+
else:
|
|
255
|
+
return default_value
|
|
256
|
+
|
|
257
|
+
def _string_from_element(element: pd.Series, empty_value=None) -> str:
|
|
258
|
+
if isinstance(element, pd.Series):
|
|
259
|
+
value = element.values[0]
|
|
260
|
+
else:
|
|
261
|
+
value = element
|
|
262
|
+
if ((value is None)
|
|
263
|
+
or (isinstance(value, numbers.Number) and np.isnan(value))
|
|
264
|
+
or (isinstance(value, str) and len(value) == 0)):
|
|
265
|
+
return empty_value
|
|
266
|
+
else:
|
|
267
|
+
return value
|
|
268
|
+
|
|
269
|
+
def bytes_to_megabytes(size_bytes:int) -> float:
|
|
270
|
+
return round(size_bytes / 1024 / 1024, 2)
|
|
271
|
+
|
|
272
|
+
## json
|
|
273
|
+
def _jsons_repl_func(match):
|
|
274
|
+
return " ".join(match.group().split())
|
|
275
|
+
def to_jsons_indent_lists_single_line(obj, *args, reduced_size:bool=False, **kwargs) -> str:
|
|
276
|
+
"""
|
|
277
|
+
Modified json representation of an object.
|
|
278
|
+
Lists with strings / integers are displayed on one line.
|
|
279
|
+
|
|
280
|
+
:param obj: object to encode
|
|
281
|
+
:param args: args to pass to json.dumps()
|
|
282
|
+
:param reduced_size: option to not indent the json output (not human-readable)
|
|
283
|
+
:param kwargs: kwargs to pass to json.dumps()
|
|
284
|
+
:return:
|
|
285
|
+
"""
|
|
286
|
+
if reduced_size:
|
|
287
|
+
return json.dumps(obj, *args, **kwargs)
|
|
288
|
+
else:
|
|
289
|
+
output = json.dumps(obj, *args, indent=4, **kwargs)
|
|
290
|
+
output = re.sub(r"(?<=\[)[^\[\]\{\}]+(?=\])", _jsons_repl_func, output)
|
|
291
|
+
# output = re.sub(r"(?<=\{)[^\[\]\{\}]+(?=\})", _jsons_repl_func, output)
|
|
292
|
+
return output
|
|
293
|
+
|