ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/METADATA +74 -38
  103. ckanapi_harvesters-0.0.2.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,37 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Package with helper function for CKAN requests using pandas DataFrames.
1
5
  """
2
- Module for demonstrating a simple 'Hello World' function.
3
6
 
4
- This module imports the hello_world function from the main module
5
- and makes it available for public use.
7
+ # builder_file_format_version = "0.0.1"
8
+ try:
9
+ from importlib.metadata import version, PackageNotFoundError
10
+ except ImportError: # Python <3.8
11
+ from importlib_metadata import version, PackageNotFoundError
6
12
 
7
- Functions
8
- ---------
9
- hello_world()
10
- Print 'Hello World!' to the console.
11
- """
13
+ try:
14
+ __version__ = version("ckanapi_harvesters")
15
+ except PackageNotFoundError:
16
+ __version__ = None
17
+
18
+
19
+ import os
20
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
21
+
22
+
23
+ from . import auxiliary
24
+ from . import policies
25
+ from . import harvesters
26
+ from . import ckan_api
27
+ from . import builder
28
+ from . import reports
29
+
30
+ # usage shortcuts
31
+ from .auxiliary import CkanMap
32
+ from .policies import CkanPackageDataFormatPolicy
33
+ from .ckan_api import CkanApi, CKAN_API_VERSION
34
+ from .builder import BUILDER_FILE_FORMAT_VERSION
35
+ from .builder import BuilderPackage, BuilderDataStoreMultiABC, BuilderDataStoreFolder, RequestFileMapperIndexKeys
12
36
 
13
- from .main import hello_world
14
37
 
15
- __all__ = ['hello_world']
@@ -0,0 +1,26 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Package with helper function for CKAN requests using pandas DataFrames.
5
+ """
6
+
7
+ from . import ckan_defs
8
+ from . import path
9
+ from . import login
10
+ from . import urls
11
+ from . import proxy_config
12
+ from . import external_code_import
13
+ from . import list_records
14
+ from . import ckan_action
15
+ from . import ckan_errors
16
+ from . import ckan_configuration
17
+ from . import ckan_api_key
18
+ from . import ckan_model
19
+ from . import ckan_map
20
+ from . import ckan_vocabulary_deprecated
21
+ from . import ckan_auxiliary
22
+ from . import deprecated
23
+
24
+ from .ckan_map import CkanMap
25
+ from .external_code_import import unlock_external_code_execution
26
+
@@ -0,0 +1,93 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Action response common treatments
5
+ """
6
+ from typing import Union
7
+ import json
8
+
9
+ import requests
10
+
11
+
12
+ class CkanActionResponse:
13
+ """
14
+ Class which decodes and checks the response of a CKAN request
15
+ """
16
+ def __init__(self, response: requests.Response, dry_run: bool=False):
17
+ self.response: requests.Response = response # for debug purposes
18
+ self.response_dict:Union[dict,None] = None
19
+ self.status_code:int = response.status_code
20
+ self.success:bool = False
21
+ self.success_json_loads:bool = False
22
+ self.result:Union[dict,None] = None
23
+ self.error_message: Union[None,str,dict] = None
24
+ self.len:Union[int,None] = None
25
+ self.dry_run:bool = dry_run
26
+
27
+ if response.content is None and response.request is None:
28
+ # dry run
29
+ assert(dry_run)
30
+ self.success = True
31
+ self.success_json_loads = False
32
+ self.status_code = 1
33
+ self.error_message = "Request not sent: dry run mode"
34
+ self.len = 0
35
+ else:
36
+ try:
37
+ response_dict = json.loads(response.content.decode())
38
+ self.response_dict = response_dict
39
+ self.success_json_loads = True
40
+ if (response.status_code == 200 and "success" in response_dict.keys() and "result" in response_dict.keys()
41
+ and response_dict["success"]):
42
+ self.success = True
43
+ self.result = response_dict["result"]
44
+ else:
45
+ if "error" in response_dict.keys():
46
+ self.error_message = response_dict["error"]
47
+ else:
48
+ self.error_message = response.content.decode()
49
+ except Exception as json_error:
50
+ self.error_message = f"JSON decode error {json_error} & CKAN error {response.content.decode()}"
51
+
52
+ def __len__(self):
53
+ if self.len is None:
54
+ raise RuntimeError("queried len but does not have len")
55
+ return self.len
56
+
57
+ def default_error(self, ckan) -> "CkanActionError":
58
+ """
59
+ Raise specific error codes depending on response
60
+ """
61
+ if self.status_code == 404 and self.success_json_loads and self.error_message["__type"] == "Not Found Error":
62
+ return CkanNotFoundError(ckan, "(Generic)", self)
63
+ elif self.status_code == 403 and self.success_json_loads and self.error_message["__type"] == "Authorization Error":
64
+ return CkanAuthorizationError(ckan, self)
65
+ else:
66
+ return CkanActionError(ckan, self)
67
+
68
+ ## action error codes
69
+ class CkanActionError(Exception):
70
+ def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
71
+ super().__init__(response.error_message)
72
+ self.response = response
73
+ self.status_code = response.status_code
74
+ if display_request:
75
+ ckan._error_print_debug_response(response.response)
76
+
77
+ def __str__(self):
78
+ return f"Server code [{self.status_code}]: " + super().__str__()
79
+
80
+ class CkanNotFoundError(CkanActionError):
81
+ def __init__(self, ckan, object_type:str, response: CkanActionResponse, display_request:bool=True):
82
+ response.error_message = f"{object_type} not found: {response.error_message}"
83
+ super().__init__(ckan, response, display_request=display_request)
84
+ self.object_type = object_type
85
+
86
+ class CkanAuthorizationError(CkanActionError):
87
+ pass
88
+
89
+ class CkanSqlCapabilityError(CkanActionError):
90
+ def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
91
+ response.error_message = f"sql capabilities are not activated on CKAN server. See documentation for option ckan.datastore.sqlsearch.enabled"
92
+ super().__init__(ckan, response, display_request=display_request)
93
+
@@ -0,0 +1,213 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Methods to load an API key
5
+ """
6
+
7
+ import os.path
8
+ from warnings import warn
9
+ from typing import Dict, Union, Iterable
10
+ import getpass
11
+ import argparse
12
+
13
+ from ckanapi_harvesters.auxiliary.ckan_errors import ApiKeyFileError
14
+ from ckanapi_harvesters.auxiliary.path import sanitize_path, path_rel_to_dir
15
+ from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
16
+
17
+
18
+
19
+ class ApiKey:
20
+ """
21
+ API key storage class.
22
+ """
23
+ CKAN_API_KEY_HEADER_NAME = {"Authorization", "X-CKAN-API-Key"} # match apikey_header_name of your CKAN instance
24
+ CKAN_API_KEY_ENVIRON = "CKAN_API_KEY" # not recommended to store sensitive information in environment variables
25
+ API_KEY_FILE_ENVIRON = "CKAN_API_KEY_FILE"
26
+
27
+ def __init__(self, *, apikey:str=None, apikey_file:str=None,
28
+ api_key_header_name:Union[str, Iterable[str]]=None):
29
+ """
30
+ CKAN Database API key storage class.
31
+
32
+ :param apikey: way to provide the API key directly (optional)
33
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
34
+ """
35
+ if api_key_header_name is None:
36
+ api_key_header_name = "Authorization"
37
+ self.apikey_file: str = apikey_file # path to a file containing a valid API key in the first line of text (optional)
38
+ self._apikey: str = apikey # API key used for restricted package access
39
+ self.api_key_header_name = api_key_header_name
40
+
41
+ def __del__(self):
42
+ self.clear()
43
+
44
+ def __copy__(self):
45
+ return self.copy()
46
+
47
+ def copy(self, *, dest=None):
48
+ if dest is None:
49
+ dest = ApiKey()
50
+ dest.apikey_file = self.apikey_file
51
+ dest._apikey = self._apikey
52
+ return dest
53
+
54
+ def __str__(self):
55
+ if self._apikey is None:
56
+ return "None"
57
+ elif self._apikey == "":
58
+ return "<empty string>"
59
+ else:
60
+ return "*****"
61
+
62
+ @property
63
+ def value(self) -> Union[str,None]:
64
+ return self._apikey
65
+ @value.setter
66
+ def value(self, value:Union[str,None]):
67
+ self._apikey = value
68
+
69
+ def is_empty(self):
70
+ return self._apikey is None
71
+
72
+ def clear(self) -> None:
73
+ self._apikey = None
74
+
75
+ def load_from_environ(self, *, error_not_found:bool=False) -> bool:
76
+ """
77
+ Load CKAN API key from environment variables, by order of priority:
78
+
79
+ By default, no environment variables are used.
80
+ """
81
+ return False
82
+
83
+ def load_apikey(self, apikey_file:str=None, *, base_dir:str=None, error_not_found:bool=True) -> bool:
84
+ """
85
+ Load the API key from file.
86
+ The file should contain a valid API key in the first line of text.
87
+
88
+ :param apikey_file: path to the API key file. The following keywords are accepted:
89
+ - "environ": the API key will be looked up in the environment variable with load_from_environ
90
+ :param base_dir: base directory to find the API key file, if a relative path is provided
91
+ :param error_not_found: option to raise an exception if the API key file is not found
92
+ :return:
93
+ """
94
+ if apikey_file is None:
95
+ apikey_file = self.apikey_file
96
+ apikey_file = path_rel_to_dir(apikey_file, base_dir=base_dir, keyword_exceptions={environ_keyword})
97
+ if apikey_file is None:
98
+ raise ApiKeyFileError('apikey_file is required')
99
+ api_keyword = apikey_file.strip().lower()
100
+ if api_keyword == environ_keyword:
101
+ return self.load_from_environ(error_not_found=error_not_found)
102
+ if not(os.path.isfile(apikey_file)) and not error_not_found:
103
+ msg = f"API key file does not exist: {apikey_file}"
104
+ warn(msg)
105
+ return False
106
+ with open(apikey_file, 'r') as f:
107
+ apikey = f.readline().strip()
108
+ f.close()
109
+ self.value = apikey
110
+ self.apikey_file = apikey_file
111
+ return True
112
+
113
+ def get_auth_header(self) -> Dict[str, str]:
114
+ """
115
+ Returns the correct header with the API key for the requests needing it.
116
+ If no API key was loaded, returns an empty dictionary.
117
+ """
118
+ if self.value is not None:
119
+ apikey_encoded = self.value
120
+ if isinstance(self.api_key_header_name, str):
121
+ return {self.api_key_header_name: apikey_encoded}
122
+ else:
123
+ return {key: apikey_encoded for key in self.api_key_header_name}
124
+ else:
125
+ return {}
126
+
127
+ def input(self):
128
+ """
129
+ Prompt the user to input the API key in the console window.
130
+
131
+ :return:
132
+ """
133
+ api_key = getpass.getpass("Please enter the API key: ")
134
+ self._apikey = api_key
135
+
136
+ @staticmethod
137
+ def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
138
+ if parser is None:
139
+ parser = argparse.ArgumentParser(description="API key initialization")
140
+ parser.add_argument("--apikey", type=str,
141
+ help="API key")
142
+ parser.add_argument("--apikey-file", type=str,
143
+ help="Path to a file containing the API key (first line)")
144
+ return parser
145
+
146
+ def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
147
+ if args.apikey is not None:
148
+ self.value = args.apikey
149
+ if args.apikey_file is not None:
150
+ self.load_apikey(args.apikey_file, base_dir=base_dir, error_not_found=error_not_found)
151
+
152
+
153
+ class CkanApiKey(ApiKey):
154
+ """
155
+ CKAN Database API key storage class.
156
+ """
157
+
158
+ def __init__(self, *, apikey:str=None, apikey_file:str=None):
159
+ """
160
+ CKAN Database API key storage class.
161
+
162
+ :param apikey: way to provide the API key directly (optional)
163
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
164
+ """
165
+ super().__init__(apikey=apikey, apikey_file=apikey_file, api_key_header_name=self.CKAN_API_KEY_HEADER_NAME)
166
+
167
+ def copy(self, *, dest=None) -> "CkanApiKey":
168
+ if dest is None:
169
+ dest = CkanApiKey()
170
+ super().copy(dest=dest)
171
+ return dest
172
+
173
+ def load_from_environ(self, *, error_not_found:bool=False) -> bool:
174
+ """
175
+ Load CKAN API key from environment variables, by order of priority:
176
+
177
+ - `CKAN_API_KEY`: for the raw API key (it is not recommended to store API key in an environment variable)
178
+ - `CKAN_API_KEY_FILE`: path to a file containing a valid API key in the first line of text
179
+
180
+ :param error_not_found: raise an error if the API key file was not found
181
+ :return:
182
+ """
183
+ apikey = os.environ.get(self.CKAN_API_KEY_ENVIRON) # "CKAN_API_KEY"
184
+ apikey_file = sanitize_path(os.environ.get(self.API_KEY_FILE_ENVIRON)) # "CKAN_API_KEY_FILE"
185
+ if apikey is not None:
186
+ msg = f"It is not recommended to store sensitive information in environment variables such as the API key ({self.CKAN_API_KEY_ENVIRON})"
187
+ warn(msg)
188
+ self.value = apikey
189
+ return True
190
+ elif apikey_file is not None:
191
+ assert not apikey_file.strip().lower() == environ_keyword # this value would create an infinite loop
192
+ return self.load_apikey(apikey_file, error_not_found=error_not_found)
193
+ else:
194
+ msg = f"No API key was found in the environment variable {self.CKAN_API_KEY_ENVIRON}"
195
+ warn(msg)
196
+ return False
197
+
198
+ def input(self):
199
+ """
200
+ Prompt the user to input the API key in the console window.
201
+
202
+ :return:
203
+ """
204
+ api_key = getpass.getpass("Please enter the CKAN API key: ")
205
+ self._apikey = api_key
206
+
207
+ @staticmethod
208
+ def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
209
+ if parser is None:
210
+ parser = argparse.ArgumentParser(description="CKAN API key initialization")
211
+ ApiKey._setup_cli_parser(parser=parser)
212
+ return parser
213
+
@@ -0,0 +1,293 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data model to represent a CKAN database architecture
5
+ """
6
+ from typing import Iterable, Union, Set, Tuple, final
7
+ from enum import IntEnum
8
+ import json
9
+ import numbers
10
+ import os
11
+ import io
12
+ import shlex
13
+ import argparse
14
+ import re
15
+
16
+ import pandas as pd
17
+ import numpy as np
18
+
19
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir, make_path_relative
20
+
21
+
22
+ ckan_package_name_re = "^[0-9a-z-_]*$"
23
+ datastore_id_col = "_id"
24
+
25
+
26
+ class CkanIdFieldTreatment(IntEnum):
27
+ Keep = 0
28
+ SetIndex = 1
29
+ Remove = 2
30
+
31
+ re_geometry = r"geometry\((\w+),\s*(\d+)\)"
32
+ def parse_geometry_native_type(geometry_type:str) -> Tuple[str,int]:
33
+ match = re.search(re_geometry, geometry_type)
34
+ geometry_type = match.group(1)
35
+ geo_epsg = int(match.group(2))
36
+ return geometry_type, geo_epsg
37
+
38
+ class CkanFieldInternalAttrs:
39
+ """
40
+ Custom information for internal use
41
+ """
42
+ def __init__(self):
43
+ self.geometry_as_source: Union[bool, None] = None
44
+ self.geometry_type: Union[str, None] = None
45
+ self.epsg_target:Union[int,None] = None
46
+ self.epsg_source:Union[int,None] = None
47
+
48
+ def __copy__(self):
49
+ return self.copy()
50
+
51
+ def __eq__(self, other):
52
+ return self.__dict__ == other.__dict__
53
+
54
+ def copy(self) -> "CkanFieldInternalAttrs":
55
+ dest = CkanFieldInternalAttrs()
56
+ # from: native type (geometries)
57
+ dest.geometry_type = self.geometry_type
58
+ dest.epsg_target = self.epsg_target
59
+ # user options
60
+ dest.epsg_source = self.epsg_source
61
+ return dest
62
+
63
+ def merge(self, new_values: "CkanFieldInternalAttrs") -> "CkanFieldInternalAttrs":
64
+ dest = self.copy()
65
+ if new_values.geometry_type is not None:
66
+ dest.geometry_type = new_values.geometry_type
67
+ if new_values.epsg_source is not None:
68
+ dest.epsg_source = new_values.epsg_source
69
+ if new_values.epsg_target is not None:
70
+ dest.epsg_target = new_values.epsg_target
71
+ return dest
72
+
73
+ @staticmethod
74
+ def _setup_cli_ckan_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
75
+ if parser is None:
76
+ parser = argparse.ArgumentParser(description="CKAN internal field parameters")
77
+ parser.add_argument("--epsg-src", type=int,
78
+ help="Source EPSG (geographic coordinate system) for the column, used by data_cleaner")
79
+ return parser
80
+
81
+ def _cli_ckan_args_apply(self, args: argparse.Namespace) -> None:
82
+ if args.epsg_src:
83
+ self.epsg_source = args.epsg_src
84
+
85
+ def init_from_options_string(self, options_string:str) -> None:
86
+ if options_string is None:
87
+ return
88
+ parser = self._setup_cli_ckan_parser()
89
+ args = parser.parse_args(shlex.split(options_string))
90
+ self._cli_ckan_args_apply(args)
91
+
92
+ def init_from_native_type(self, native_type:str) -> None:
93
+ if native_type is None:
94
+ return
95
+ if native_type.lower().strip().startswith("geometry("):
96
+ geometry_type, geo_epsg = parse_geometry_native_type(native_type)
97
+ self.geometry_type = geometry_type
98
+ self.epsg_target = geo_epsg
99
+
100
+ def update_from_ckan(self, ckan):
101
+ if self.epsg_source is not None:
102
+ self.epsg_target = ckan.params.ckan_default_target_epsg
103
+
104
+
105
+ ## Requests ------------------
106
+ json_headers = {"Content-Type": "application/json", 'Accept': 'text/plain'}
107
+ max_len_debug_print = 5000
108
+
109
+
110
+ def json_encode_params(params:dict) -> Tuple[str, dict]:
111
+ """
112
+ For upload requests, with a records field, it is necessary to specify the params in the data argument
113
+ instead of the json argument of requests.
114
+ In the case there are NaN values, these are not supported by the requests encoder.
115
+
116
+ ___Requirement___: add headers=json_headers !!!
117
+
118
+ :param params:
119
+ :return:
120
+ """
121
+ data_payload = json.dumps(params, separators=(',', ':'))
122
+ return data_payload, json_headers
123
+
124
+ class RequestType(IntEnum):
125
+ Get = 1
126
+ Post = 2
127
+
128
+ def requests_multipart_data(json_dict:dict, files:dict) -> dict:
129
+ """
130
+ Generate the multipart data for a request containing json and a file.
131
+ Used to fill the files argument of requests.post
132
+ json_headers must not be used
133
+
134
+ :param json_dict:
135
+ :param files:
136
+ :return:
137
+ """
138
+ json_payload = json.dumps(json_dict)
139
+ multipart_data = {"json": (None, json_payload, "application/json")}
140
+ assert_or_raise(isinstance(files, dict) and not "json" in files.keys(), ValueError("files"))
141
+ multipart_data.update(files)
142
+ return multipart_data
143
+
144
+ df_upload_to_csv_kwargs = dict()
145
+ df_download_to_csv_kwargs = dict()
146
+
147
+ def upload_prepare_requests_files_arg(*, files:dict=None, file_path:str=None, df:pd.DataFrame=None,
148
+ payload:Union[bytes, io.BufferedIOBase]=None, payload_name:str=None) -> dict:
149
+ """
150
+ Create files argument for requests.post, by order of priority:
151
+
152
+ :param files: files pass through argument to the requests.post function. Use to send other data formats.
153
+ :param payload: bytes to upload as a file
154
+ :param payload_name: name of the payload to use (associated with the payload argument) - this determines the format recognized in CKAN viewers.
155
+ :param file_path: path of the file to transmit (binary and text files are supported here)
156
+ :param df: pandas DataFrame to replace resource
157
+
158
+ :return:
159
+ """
160
+ if files is not None:
161
+ assert (file_path is None and df is None and payload is None)
162
+ elif payload is not None:
163
+ assert (file_path is None and df is None)
164
+ if payload_name is not None:
165
+ payload_file_name = payload_name
166
+ files = {"upload": (payload_file_name, payload)}
167
+ else:
168
+ files = {"upload": payload}
169
+ elif file_path is not None:
170
+ # tested with text files only, use files pass-through argument for other formats
171
+ assert (df is None)
172
+ file_name = os.path.basename(file_path)
173
+ payload_file_name = file_name
174
+ # files = {file_name: (os.path.basename(file_path), open(file_path, "r"), "text/plain")}
175
+ files = {"upload": (payload_file_name, open(file_path, "r"))}
176
+ elif df is not None:
177
+ payload_file_name = "file.csv"
178
+ files = {"upload": (payload_file_name, df.to_csv(index=False, **df_upload_to_csv_kwargs), "text/plain")}
179
+ else:
180
+ files = None
181
+ return files
182
+
183
+
184
+ ## Path for specific objects ------------------
185
+ def ca_file_rel_to_dir(ca_file:Union[str,None], base_dir:str=None) -> Tuple[Union[bool,str,None], Union[str,None]]:
186
+ if ca_file is not None:
187
+ bool_keyword = ca_file.strip().lower()
188
+ if bool_keyword == "true":
189
+ return True, None
190
+ elif bool_keyword == "false":
191
+ return False, None
192
+ else:
193
+ return path_rel_to_dir(ca_file, base_dir), ca_file
194
+ else:
195
+ return None, None
196
+
197
+ def ca_arg_to_str(ca_cert:Union[bool,str,None], base_dir:str=None, source_string:str=None) -> Union[str,None]:
198
+ if ca_cert is not None and isinstance(ca_cert, bool) and not ca_cert:
199
+ return "False"
200
+ elif ca_cert is not None and isinstance(ca_cert, str):
201
+ return make_path_relative(ca_cert, base_dir, source_string=source_string)
202
+ else:
203
+ return None
204
+
205
+ def ssl_arguments_decompose(ca_cert:Union[bool,str,None], *, default_ssl:bool=True) -> Tuple[bool, Union[str,None]]:
206
+ """
207
+ Decompose requirements argument verify into boolean and path to a certificate file.
208
+
209
+ :param ca_cert:
210
+ :param default_ssl: option to indicate if SSL should be enabled if ca_cert is None
211
+ :return: Tuple ssl, ssl_certfile
212
+ """
213
+ if ca_cert is None:
214
+ return default_ssl, None
215
+ elif isinstance(ca_cert, bool):
216
+ return ca_cert, None
217
+ elif isinstance(ca_cert, str):
218
+ return True, ca_cert
219
+
220
+ ## Auxiliary functions ------------------
221
+ def assert_or_raise(condition: bool, e: Exception) -> None:
222
+ if not condition:
223
+ raise e
224
+
225
+ def find_duplicates(list_str:Iterable) -> list:
226
+ seen = set()
227
+ uniq = []
228
+ duplicates = []
229
+ for x in list_str:
230
+ if x not in seen:
231
+ seen.add(x)
232
+ uniq.append(x)
233
+ else:
234
+ duplicates.append(x)
235
+ return duplicates
236
+
237
+ def dict_recursive_update(d:dict,u:dict) -> dict:
238
+ for k,v in u.items():
239
+ if isinstance(v, dict):
240
+ d[k] = dict_recursive_update(d.get(k, {}),v)
241
+ else:
242
+ d[k] = v
243
+ return d
244
+
245
+ def _bool_from_string(string:str, default_value:Union[bool,None]=False) -> Union[bool,None]:
246
+ if isinstance(string, bool):
247
+ return string
248
+ else:
249
+ keyword = string.lower().strip()
250
+ if keyword == "true":
251
+ return True
252
+ elif keyword == "false":
253
+ return False
254
+ else:
255
+ return default_value
256
+
257
+ def _string_from_element(element: pd.Series, empty_value=None) -> str:
258
+ if isinstance(element, pd.Series):
259
+ value = element.values[0]
260
+ else:
261
+ value = element
262
+ if ((value is None)
263
+ or (isinstance(value, numbers.Number) and np.isnan(value))
264
+ or (isinstance(value, str) and len(value) == 0)):
265
+ return empty_value
266
+ else:
267
+ return value
268
+
269
+ def bytes_to_megabytes(size_bytes:int) -> float:
270
+ return round(size_bytes / 1024 / 1024, 2)
271
+
272
+ ## json
273
+ def _jsons_repl_func(match):
274
+ return " ".join(match.group().split())
275
+ def to_jsons_indent_lists_single_line(obj, *args, reduced_size:bool=False, **kwargs) -> str:
276
+ """
277
+ Modified json representation of an object.
278
+ Lists with strings / integers are displayed on one line.
279
+
280
+ :param obj: object to encode
281
+ :param args: args to pass to json.dumps()
282
+ :param reduced_size: option to not indent the json output (not human-readable)
283
+ :param kwargs: kwargs to pass to json.dumps()
284
+ :return:
285
+ """
286
+ if reduced_size:
287
+ return json.dumps(obj, *args, **kwargs)
288
+ else:
289
+ output = json.dumps(obj, *args, indent=4, **kwargs)
290
+ output = re.sub(r"(?<=\[)[^\[\]\{\}]+(?=\])", _jsons_repl_func, output)
291
+ # output = re.sub(r"(?<=\{)[^\[\]\{\}]+(?=\})", _jsons_repl_func, output)
292
+ return output
293
+