ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,896 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
from abc import ABC
|
|
7
|
+
from typing import List, Dict, Callable, Union, Any, Generator, Sequence, Tuple, Collection
|
|
8
|
+
from collections import OrderedDict
|
|
9
|
+
import time
|
|
10
|
+
import copy
|
|
11
|
+
from warnings import warn
|
|
12
|
+
import argparse
|
|
13
|
+
import shlex
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
from requests.auth import AuthBase
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ckanapi_harvesters.auxiliary.error_level_message import ContextErrorLevelMessage, ErrorLevel
|
|
21
|
+
from ckanapi_harvesters.auxiliary.external_code_import import unlock_external_code_execution
|
|
22
|
+
from ckanapi_harvesters.auxiliary.ckan_configuration import download_external_resource_urls, \
|
|
23
|
+
unlock_external_url_resource_download, allow_no_ca, unlock_no_ca
|
|
24
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
|
|
25
|
+
from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
|
|
26
|
+
from ckanapi_harvesters.auxiliary.urls import urlsep, url_join
|
|
27
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import RequestType, max_len_debug_print, assert_or_raise
|
|
28
|
+
from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
|
|
29
|
+
from ckanapi_harvesters.auxiliary.ckan_action import CkanActionResponse, CkanActionError, CkanNotFoundError
|
|
30
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import (MaxRequestsCountError, UnexpectedError, InvalidParameterError,
|
|
31
|
+
ExternalUrlLockedError, UrlError, NoCAVerificationError, RequestError)
|
|
32
|
+
from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
|
|
33
|
+
from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
|
|
34
|
+
from ckanapi_harvesters.ckan_api.ckan_api_params import CkanApiParamsBasic, CkanApiDebug
|
|
35
|
+
|
|
36
|
+
CKAN_API_VERSION = 3
|
|
37
|
+
|
|
38
|
+
use_ckan_owner_org_as_default:bool = True # the owner_org field of CkanApi is destined to default the owner organization (or else it should be None)
|
|
39
|
+
ckan_request_proxy_default_auth_if_ckan:bool = True # fill authentification headers for requests with CkanApi requests proxy method if same domain is used by default
|
|
40
|
+
|
|
41
|
+
## Abstract class
|
|
42
|
+
class CkanApiABC(ABC):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
## Main class ------------------
|
|
48
|
+
class CkanApiBase(CkanApiABC):
|
|
49
|
+
"""
|
|
50
|
+
CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
|
|
51
|
+
This class implements the basic parameters and request functions.
|
|
52
|
+
"""
|
|
53
|
+
CKAN_URL_ENVIRON = "CKAN_URL"
|
|
54
|
+
|
|
55
|
+
def __init__(self, url:str=None, *, proxies:Union[str,dict,ProxyConfig]=None,
|
|
56
|
+
apikey:Union[str,CkanApiKey]=None, apikey_file:str=None,
|
|
57
|
+
owner_org:str=None, params:CkanApiParamsBasic=None,
|
|
58
|
+
identifier=None):
|
|
59
|
+
"""
|
|
60
|
+
CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
|
|
61
|
+
|
|
62
|
+
:param url: url of the CKAN server
|
|
63
|
+
:param proxies: proxies to use for requests
|
|
64
|
+
:param apikey: way to provide the API key directly (optional)
|
|
65
|
+
:param apikey_file: path to a file containing a valid API key in the first line of text (optional)
|
|
66
|
+
:param owner_org: name of the organization to limit package_search (optional)
|
|
67
|
+
:param params: other connection/behavior parameters
|
|
68
|
+
:param identifier: identifier of the ckan client
|
|
69
|
+
"""
|
|
70
|
+
if identifier is None: identifier = ""
|
|
71
|
+
if apikey is None or not isinstance(apikey, CkanApiKey):
|
|
72
|
+
apikey = CkanApiKey(apikey=apikey)
|
|
73
|
+
if apikey_file is not None:
|
|
74
|
+
apikey.apikey_file = apikey_file
|
|
75
|
+
if params is None:
|
|
76
|
+
params = CkanApiParamsBasic()
|
|
77
|
+
if proxies is not None:
|
|
78
|
+
params.proxies = proxies
|
|
79
|
+
self.identifier = identifier # variable for debugging purposes
|
|
80
|
+
self._ckan_url: str = ""
|
|
81
|
+
self.apikey: CkanApiKey = apikey
|
|
82
|
+
self.owner_org: Union[str, None] = owner_org # name of the organization to limit package_search (optional)
|
|
83
|
+
self.params: CkanApiParamsBasic = params
|
|
84
|
+
self.ckan_session: Union[requests.Session, None] = None
|
|
85
|
+
self.extern_session: Union[requests.Session, None] = None
|
|
86
|
+
if apikey_file is not None and apikey is None:
|
|
87
|
+
self.load_apikey()
|
|
88
|
+
self.debug: CkanApiDebug = CkanApiDebug()
|
|
89
|
+
# properties
|
|
90
|
+
self.url = url # url of the CKAN server (property)
|
|
91
|
+
|
|
92
|
+
def __del__(self):
|
|
93
|
+
self.disconnect()
|
|
94
|
+
self.apikey.__del__()
|
|
95
|
+
|
|
96
|
+
def __copy__(self):
|
|
97
|
+
return self.copy()
|
|
98
|
+
|
|
99
|
+
def copy(self, new_identifier:str=None, *, dest=None):
|
|
100
|
+
"""
|
|
101
|
+
Returns a copy of the current instance.
|
|
102
|
+
Useful to use an initialized ckan object in a multithreaded context. Each thread would have its own copy.
|
|
103
|
+
It is recommended to purge the last response before doing a copy (with purge_map=False)
|
|
104
|
+
"""
|
|
105
|
+
if dest is None:
|
|
106
|
+
dest = CkanApiBase()
|
|
107
|
+
dest._ckan_url = self._ckan_url
|
|
108
|
+
dest.params = self.params.copy()
|
|
109
|
+
dest.ckan_session = None
|
|
110
|
+
dest.extern_session = None
|
|
111
|
+
dest.owner_org = self.owner_org
|
|
112
|
+
dest.debug = CkanApiDebug()
|
|
113
|
+
dest.apikey = self.apikey.copy()
|
|
114
|
+
# post-copy operations
|
|
115
|
+
if new_identifier is not None:
|
|
116
|
+
dest.identifier = new_identifier
|
|
117
|
+
dest.purge()
|
|
118
|
+
# this only sets the session objects to None but lets the original instance's session open
|
|
119
|
+
dest.session = None
|
|
120
|
+
dest.extern_session = None
|
|
121
|
+
return dest
|
|
122
|
+
|
|
123
|
+
def __str__(self) -> str:
|
|
124
|
+
"""
|
|
125
|
+
String representation of the instance, for debugging purposes.
|
|
126
|
+
|
|
127
|
+
:return: URL representing the CKAN server
|
|
128
|
+
"""
|
|
129
|
+
return f"CKAN <{self.url}> {str(self.identifier)}"
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def url(self) -> str:
|
|
133
|
+
return self._ckan_url
|
|
134
|
+
@url.setter
|
|
135
|
+
def url(self, url:str) -> None:
|
|
136
|
+
# ensure the ckan url ends with '/' (see is_url_internal)
|
|
137
|
+
if url is None:
|
|
138
|
+
self._ckan_url = None
|
|
139
|
+
elif url.lower().strip() == environ_keyword: # keyword
|
|
140
|
+
self.init_from_environ(init_api_key=False)
|
|
141
|
+
elif not url.endswith(urlsep):
|
|
142
|
+
self._ckan_url = url + urlsep
|
|
143
|
+
else:
|
|
144
|
+
self._ckan_url = url
|
|
145
|
+
|
|
146
|
+
def _init_session(self, *, internal:bool=False):
|
|
147
|
+
"""
|
|
148
|
+
Initialize the session objects which are used to perform requests with this CKAN instance.
|
|
149
|
+
This method can be overloaded to fit your needs (proxies, certificates, cookies, headers, etc.).
|
|
150
|
+
|
|
151
|
+
:param internal:
|
|
152
|
+
:return:
|
|
153
|
+
"""
|
|
154
|
+
if internal:
|
|
155
|
+
if self.ckan_session is None:
|
|
156
|
+
# the use of a session object will improve performance
|
|
157
|
+
self.ckan_session = requests.Session()
|
|
158
|
+
if self.params.proxies is not None:
|
|
159
|
+
self.ckan_session.proxies.update(self.params.proxies)
|
|
160
|
+
self.ckan_session.auth = self.params.proxy_auth
|
|
161
|
+
self.ckan_session.verify = self.params.ckan_ca
|
|
162
|
+
self.ckan_session.headers = self.params.ckan_headers
|
|
163
|
+
# API key is applied in the headers of each request
|
|
164
|
+
else:
|
|
165
|
+
if self.extern_session is None:
|
|
166
|
+
self.extern_session = requests # do not persist cookies between domains & requests to external resources are not meant to be numerous
|
|
167
|
+
# self.extern_session = requests.Session()
|
|
168
|
+
# if self.params.proxies is not None:
|
|
169
|
+
# self.extern_session.proxies.update(self.params.proxies)
|
|
170
|
+
# self.extern_session.auth = self.params.proxy_auth
|
|
171
|
+
# self.extern_session.verify = self.params.extern_ca
|
|
172
|
+
# self.extern_session.headers = self.params.http_headers
|
|
173
|
+
|
|
174
|
+
def connect(self):
|
|
175
|
+
self.test_ckan_login(raise_error=True)
|
|
176
|
+
|
|
177
|
+
def disconnect(self):
|
|
178
|
+
if self.ckan_session is not None:
|
|
179
|
+
self.ckan_session.close()
|
|
180
|
+
if self.extern_session is not None and isinstance(self.extern_session, requests.Session):
|
|
181
|
+
self.extern_session.close()
|
|
182
|
+
self.ckan_session = None
|
|
183
|
+
self.extern_session = None
|
|
184
|
+
|
|
185
|
+
def full_unlock(self, unlock:bool=True,
|
|
186
|
+
*, no_ca:bool=None, external_url_resource_download:bool=None) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Function to unlock full capabilities of the CKAN API
|
|
189
|
+
|
|
190
|
+
:param unlock:
|
|
191
|
+
:return:
|
|
192
|
+
"""
|
|
193
|
+
if no_ca is not None:
|
|
194
|
+
unlock_no_ca(no_ca)
|
|
195
|
+
if external_url_resource_download is not None:
|
|
196
|
+
unlock_external_url_resource_download(external_url_resource_download)
|
|
197
|
+
|
|
198
|
+
def prepare_for_multithreading(self, mode_reduced:bool=True) -> None:
|
|
199
|
+
"""
|
|
200
|
+
This method disables unnecessary writes to this object.
|
|
201
|
+
It is recommended to enable the reduced writes mode in a multithreaded context.
|
|
202
|
+
Do not forget to reset sessions at the beginning of each thread.
|
|
203
|
+
|
|
204
|
+
:param mode_reduced:
|
|
205
|
+
:return:
|
|
206
|
+
"""
|
|
207
|
+
self.debug.store_last_response = not mode_reduced
|
|
208
|
+
self.debug.store_last_response_debug_info = not mode_reduced
|
|
209
|
+
if mode_reduced:
|
|
210
|
+
self.disconnect()
|
|
211
|
+
|
|
212
|
+
def purge(self) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Erase temporary data stored in this object
|
|
215
|
+
|
|
216
|
+
:param purge_map: whether to purge the map created with map_resources
|
|
217
|
+
"""
|
|
218
|
+
self.debug.last_response = None
|
|
219
|
+
self.debug.ckan_request_counter = 0
|
|
220
|
+
self.debug.extern_request_counter = 0
|
|
221
|
+
self.debug.last_response_request_count = 0
|
|
222
|
+
self.debug.multi_requests_last_successful_offset = 0
|
|
223
|
+
self.debug.last_response_elapsed_time = 0.0
|
|
224
|
+
|
|
225
|
+
def set_limits(self, limit_read:Union[int,None]) -> None:
|
|
226
|
+
"""
|
|
227
|
+
Set default query limits. If only one argument is provided, it applies to both limits.
|
|
228
|
+
|
|
229
|
+
:param limit_read: default limit for read requests
|
|
230
|
+
:return:
|
|
231
|
+
"""
|
|
232
|
+
self.params.default_limit_read = limit_read
|
|
233
|
+
self.params.default_limit_list = limit_read
|
|
234
|
+
|
|
235
|
+
def set_verbosity(self, verbosity:bool=True, verbose_extra:bool=None) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Enable/disable full verbose output
|
|
238
|
+
:param verbosity: boolean. Cannot be None
|
|
239
|
+
:return:
|
|
240
|
+
"""
|
|
241
|
+
self.params.verbose_multi_requests = verbosity
|
|
242
|
+
self.params.verbose_request = verbosity
|
|
243
|
+
self.params.verbose_request_error = verbosity
|
|
244
|
+
if verbose_extra is not None:
|
|
245
|
+
self.params.verbose_extra = verbose_extra
|
|
246
|
+
|
|
247
|
+
def set_proxies(self, proxies:Union[str,dict,ProxyConfig], *, default_proxies:dict=None, proxy_headers:dict=None) -> None:
|
|
248
|
+
"""
|
|
249
|
+
Set up the proxy configuration
|
|
250
|
+
|
|
251
|
+
:param proxies: string or proxies dict or ProxyConfig object.
|
|
252
|
+
If a string is provided, it must be an url to a proxy or one of the following values:
|
|
253
|
+
- "environ": use the proxies specified in the environment variables "http_proxy" and "https_proxy"
|
|
254
|
+
- "noproxy": do not use any proxies
|
|
255
|
+
- "unspecified": do not specify the proxies
|
|
256
|
+
- "default": use value provided by default_proxies
|
|
257
|
+
:param default_proxies: proxies used if proxies="default"
|
|
258
|
+
:param proxy_headers: headers used to access the proxies, generally for authentication
|
|
259
|
+
:return:
|
|
260
|
+
"""
|
|
261
|
+
self.params._proxy_config = ProxyConfig.from_str_or_config(proxies,
|
|
262
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
263
|
+
|
|
264
|
+
def init_from_environ(self, *, init_api_key:bool=True, error_not_found:bool=False) -> None:
|
|
265
|
+
"""
|
|
266
|
+
Initialize CKAN from environment variables.
|
|
267
|
+
|
|
268
|
+
- `CKAN_URL` for the url of the CKAN server.
|
|
269
|
+
|
|
270
|
+
And optionally:
|
|
271
|
+
- `CKAN_API_KEY`: for the raw API key (it is not recommended to store API key in an environment variable)
|
|
272
|
+
- `CKAN_API_KEY_FILE`: path to a file containing a valid API key in the first line of text
|
|
273
|
+
|
|
274
|
+
:param error_not_found: raise an error if the API key file was not found
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
ckan_url = os.environ.get(self.CKAN_URL_ENVIRON) # "CKAN_URL"
|
|
278
|
+
if ckan_url is not None:
|
|
279
|
+
assert not ckan_url.lower().strip() == environ_keyword # this value would create an infinite loop
|
|
280
|
+
self.url = ckan_url
|
|
281
|
+
if init_api_key:
|
|
282
|
+
self.apikey.load_from_environ(error_not_found=error_not_found)
|
|
283
|
+
|
|
284
|
+
def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
285
|
+
"""
|
|
286
|
+
Define or add CLI arguments to initialize a CKAN API connection
|
|
287
|
+
parser help message:
|
|
288
|
+
|
|
289
|
+
CKAN API connection parameters initialization
|
|
290
|
+
|
|
291
|
+
options:
|
|
292
|
+
-h, --help show this help message and exit
|
|
293
|
+
--ckan-url CKAN_URL CKAN URL
|
|
294
|
+
--apikey APIKEY CKAN API key
|
|
295
|
+
--apikey-file APIKEY_FILE
|
|
296
|
+
Path to a file containing the CKAN API key (first line)
|
|
297
|
+
--policy-file POLICY_FILE
|
|
298
|
+
Path to a file containing the CKAN data format policy (json format)
|
|
299
|
+
--owner-org OWNER_ORG
|
|
300
|
+
CKAN Owner Organization
|
|
301
|
+
--default-limit DEFAULT_LIMIT
|
|
302
|
+
Default number of rows per request
|
|
303
|
+
--verbose VERBOSE Option to set verbosity
|
|
304
|
+
|
|
305
|
+
:param parser: option to provide an existing parser to add the specific fields needed to initialize a CKAN API connection
|
|
306
|
+
:return:
|
|
307
|
+
"""
|
|
308
|
+
if parser is None:
|
|
309
|
+
parser = argparse.ArgumentParser(description="CKAN API connection parameters initialization")
|
|
310
|
+
parser.add_argument("--ckan-url", type=str,
|
|
311
|
+
help="CKAN URL")
|
|
312
|
+
CkanApiKey._setup_cli_parser(parser) # add arguments --apikey-file --apikey
|
|
313
|
+
self.params._setup_cli_ckan_parser__params(parser)
|
|
314
|
+
# parser.add_argument("--external-code", action="store_true",
|
|
315
|
+
# help="Enable external code execution for builder (only enable for trusted sources)")
|
|
316
|
+
return parser
|
|
317
|
+
|
|
318
|
+
def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None, error_not_found:bool=True,
|
|
319
|
+
default_proxies:dict=None, proxy_headers:dict=None) -> None:
|
|
320
|
+
"""
|
|
321
|
+
Apply the arguments parsed by the argument parser defined by _setup_cli_ckan_parser
|
|
322
|
+
|
|
323
|
+
:param args:
|
|
324
|
+
:param base_dir: base directory to find the CKAN API key file, if a relative path is provided
|
|
325
|
+
(recommended: leave None to use cwd)
|
|
326
|
+
:param error_not_found: option to raise an exception if the CKAN API key file is not found
|
|
327
|
+
:param default_proxies: proxies used if proxies="default"
|
|
328
|
+
:param proxy_headers: headers used to access the proxies, generally for authentication
|
|
329
|
+
:return:
|
|
330
|
+
"""
|
|
331
|
+
if args.ckan_url is not None:
|
|
332
|
+
self.url = args.ckan_url
|
|
333
|
+
self.apikey._cli_args_apply(args, base_dir=base_dir, error_not_found=error_not_found)
|
|
334
|
+
self.params._cli_ckan_args_apply(args, base_dir=base_dir, error_not_found=error_not_found,
|
|
335
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
336
|
+
if args.default_limit is not None:
|
|
337
|
+
self.set_limits(args.default_limit)
|
|
338
|
+
if args.verbose is not None:
|
|
339
|
+
self.set_verbosity(args.verbose)
|
|
340
|
+
# if args.external_code:
|
|
341
|
+
# unlock_external_code_execution()
|
|
342
|
+
print(args)
|
|
343
|
+
|
|
344
|
+
def initialize_from_cli_args(self, *, args:Sequence[str]=None, base_dir:str=None,
|
|
345
|
+
error_not_found:bool=True, parser:argparse.ArgumentParser=None,
|
|
346
|
+
default_proxies:dict=None, proxy_headers:dict=None) -> None:
|
|
347
|
+
"""
|
|
348
|
+
Intialize the CKAN API connection from command line arguments.
|
|
349
|
+
|
|
350
|
+
:param args: Option to provide arguments from another source.
|
|
351
|
+
:return:
|
|
352
|
+
"""
|
|
353
|
+
parser = self._setup_cli_ckan_parser(parser)
|
|
354
|
+
args_parsed = parser.parse_args(args)
|
|
355
|
+
self._cli_ckan_args_apply(args_parsed, base_dir=base_dir, error_not_found=error_not_found,
|
|
356
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
357
|
+
|
|
358
|
+
def input_cli_args(self, *, base_dir:str=None, error_not_found:bool=True, only_if_necessary:bool=False,
|
|
359
|
+
default_proxies:dict=None, proxy_headers:dict=None):
|
|
360
|
+
"""
|
|
361
|
+
Initialize the query for initialization parameters in the command-line format in the console window.
|
|
362
|
+
|
|
363
|
+
:return:
|
|
364
|
+
"""
|
|
365
|
+
if only_if_necessary and (self.url is not None and not self.apikey.is_empty()): # and self.proxy_object.is_defined()):
|
|
366
|
+
return
|
|
367
|
+
options_string = input("Please enter CKAN connection CLI arguments: ")
|
|
368
|
+
self.initialize_from_options_string(options_string, base_dir=base_dir, error_not_found=error_not_found,
|
|
369
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
370
|
+
|
|
371
|
+
def initialize_from_options_string(self, options_string:str=None, base_dir:str=None,
|
|
372
|
+
error_not_found:bool=True, parser:argparse.ArgumentParser=None,
|
|
373
|
+
default_proxies:dict=None, proxy_headers:dict=None) -> None:
|
|
374
|
+
parser = self._setup_cli_ckan_parser(parser)
|
|
375
|
+
args = parser.parse_args(shlex.split(options_string))
|
|
376
|
+
self._cli_ckan_args_apply(args, base_dir=base_dir, error_not_found=error_not_found,
|
|
377
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
378
|
+
|
|
379
|
+
def input_missing_info(self, *, base_dir:str=None, input_args:bool=False, input_args_if_necessary:bool=False,
|
|
380
|
+
input_apikey:bool=True, error_not_found:bool=True):
|
|
381
|
+
"""
|
|
382
|
+
Ask user information in the console window.
|
|
383
|
+
|
|
384
|
+
:param input_owner_org: option to ask for the owner organization.
|
|
385
|
+
:return:
|
|
386
|
+
"""
|
|
387
|
+
if input_args or input_args_if_necessary:
|
|
388
|
+
self.input_cli_args(base_dir=base_dir, error_not_found=error_not_found, only_if_necessary=input_args_if_necessary)
|
|
389
|
+
if self.url is None:
|
|
390
|
+
ckan_url = input("Please enter the CKAN URL: ")
|
|
391
|
+
self.url = ckan_url
|
|
392
|
+
if self.apikey.is_empty() and input_apikey:
|
|
393
|
+
self.apikey.input()
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
## Error management ------------------
|
|
397
|
+
def _error_print_debug_response(self, response:requests.Response, *,
|
|
398
|
+
url:str=None, params:dict=None, json:dict=None, error:Exception=None, headers:dict=None):
|
|
399
|
+
if self.params.verbose_request_error:
|
|
400
|
+
print(f"{self.identifier} CKAN Response error details ({str(self)})")
|
|
401
|
+
print(" ")
|
|
402
|
+
if response is None:
|
|
403
|
+
print(f"Problematic request did not obtain response ({url})")
|
|
404
|
+
else:
|
|
405
|
+
print(f"Problematic response code {response.status_code}:")
|
|
406
|
+
if response is None:
|
|
407
|
+
pass
|
|
408
|
+
elif isinstance(response.content, bytes):
|
|
409
|
+
print_str = response.content.decode()
|
|
410
|
+
else:
|
|
411
|
+
print_str = response.content
|
|
412
|
+
print(print_str[:max_len_debug_print])
|
|
413
|
+
if len(print_str) > max_len_debug_print:
|
|
414
|
+
print("[...]")
|
|
415
|
+
if error is not None:
|
|
416
|
+
print(" ")
|
|
417
|
+
print("Exception error message:")
|
|
418
|
+
print(str(error))
|
|
419
|
+
if response is None:
|
|
420
|
+
if params is not None:
|
|
421
|
+
print(" ")
|
|
422
|
+
print("Request params:")
|
|
423
|
+
print(params)
|
|
424
|
+
if json is not None:
|
|
425
|
+
print(" ")
|
|
426
|
+
print("Request json:")
|
|
427
|
+
print(json)
|
|
428
|
+
else:
|
|
429
|
+
print(" ")
|
|
430
|
+
print("Request URL:")
|
|
431
|
+
print(response.request.url)
|
|
432
|
+
print(" ")
|
|
433
|
+
print("Request body:")
|
|
434
|
+
if isinstance(response.request.body, bytes):
|
|
435
|
+
print_str = response.request.body.decode()
|
|
436
|
+
elif response.request.body is not None:
|
|
437
|
+
print_str = response.request.body
|
|
438
|
+
else:
|
|
439
|
+
print_str = "None"
|
|
440
|
+
print(print_str[:max_len_debug_print])
|
|
441
|
+
if len(print_str) > max_len_debug_print:
|
|
442
|
+
print("[...]")
|
|
443
|
+
print(" ")
|
|
444
|
+
print("Response body:")
|
|
445
|
+
if response.text is not None:
|
|
446
|
+
print_str = response.text
|
|
447
|
+
else:
|
|
448
|
+
print_str = "None"
|
|
449
|
+
print(print_str[:max_len_debug_print])
|
|
450
|
+
if len(print_str) > max_len_debug_print:
|
|
451
|
+
print("[...]")
|
|
452
|
+
print(" ")
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
## Authentification ------------------
|
|
456
|
+
def load_apikey(self, apikey_file:str=None, base_dir:str=None, error_not_found:bool=True):
|
|
457
|
+
"""
|
|
458
|
+
Load the CKAN API key from file.
|
|
459
|
+
The file should contain a valid API key in the first line of text.
|
|
460
|
+
|
|
461
|
+
:param apikey_file: API key file (optional if specified at the creation of the object)
|
|
462
|
+
:param base_dir: base directory, if the apikey_file is a relative path
|
|
463
|
+
:return:
|
|
464
|
+
"""
|
|
465
|
+
self.apikey.load_apikey(apikey_file=apikey_file, base_dir=base_dir, error_not_found=error_not_found)
|
|
466
|
+
|
|
467
|
+
def _prepare_headers(self, headers:dict=None, include_ckan_auth:bool=False) -> dict:
|
|
468
|
+
"""
|
|
469
|
+
Prepare headers for a request. If the request is destined to the CKAN server,
|
|
470
|
+
include authentication headers, if API key was provided.
|
|
471
|
+
|
|
472
|
+
:param headers: initial headers
|
|
473
|
+
:param include_ckan_auth: boolean to include CKAN authentication headers
|
|
474
|
+
:return:
|
|
475
|
+
"""
|
|
476
|
+
if headers is None:
|
|
477
|
+
headers = {}
|
|
478
|
+
if self.params.user_agent is not None:
|
|
479
|
+
headers["User-Agent"] = str(self.params.user_agent)
|
|
480
|
+
headers.update(self.params._proxy_config.proxy_headers)
|
|
481
|
+
headers.update(self.params.http_headers)
|
|
482
|
+
if include_ckan_auth:
|
|
483
|
+
headers.update(self.params.ckan_headers)
|
|
484
|
+
headers.update(self.apikey.get_auth_header())
|
|
485
|
+
return headers
|
|
486
|
+
|
|
487
|
+
@staticmethod
|
|
488
|
+
def unlock_no_ca(value:bool=True):
|
|
489
|
+
"""
|
|
490
|
+
This function enables you to disable the CA verification of the CKAN server.
|
|
491
|
+
|
|
492
|
+
__Warning__:
|
|
493
|
+
Only allow in a local environment!
|
|
494
|
+
"""
|
|
495
|
+
unlock_no_ca(value)
|
|
496
|
+
|
|
497
|
+
@staticmethod
|
|
498
|
+
def unlock_external_url_resource_download(value:bool=True):
|
|
499
|
+
"""
|
|
500
|
+
This function enables the download of resources external from the CKAN server.
|
|
501
|
+
"""
|
|
502
|
+
unlock_external_url_resource_download(value)
|
|
503
|
+
|
|
504
|
+
def prepare_arguments_for_url_download_request(self, url:str, *,
|
|
505
|
+
auth_if_ckan:bool=None, headers:dict=None, verify:Union[bool,str,None]=None) \
|
|
506
|
+
-> Tuple[bool, dict]:
|
|
507
|
+
"""
|
|
508
|
+
Include CKAN authentication headers only if the URL points to the CKAN server.
|
|
509
|
+
|
|
510
|
+
:param url: target URL
|
|
511
|
+
:param headers: initial headers
|
|
512
|
+
:param auth_if_ckan: option to include CKAN authentication headers if the url is recognized as part of the CKAN server.
|
|
513
|
+
:return:
|
|
514
|
+
"""
|
|
515
|
+
if auth_if_ckan is None:
|
|
516
|
+
auth_if_ckan = ckan_request_proxy_default_auth_if_ckan
|
|
517
|
+
verify_ca = verify
|
|
518
|
+
url_is_internal = self.is_url_internal(url)
|
|
519
|
+
if url_is_internal:
|
|
520
|
+
headers = self._prepare_headers(headers, include_ckan_auth=auth_if_ckan)
|
|
521
|
+
if verify is None:
|
|
522
|
+
verify_ca = self.params.ckan_ca
|
|
523
|
+
elif not download_external_resource_urls:
|
|
524
|
+
raise ExternalUrlLockedError(url)
|
|
525
|
+
else:
|
|
526
|
+
headers = self._prepare_headers(headers, include_ckan_auth=False)
|
|
527
|
+
if verify is None:
|
|
528
|
+
verify_ca = self.params.extern_ca
|
|
529
|
+
msg = f"Request to external url: {url}"
|
|
530
|
+
warn(msg)
|
|
531
|
+
request_kwargs = dict(headers=headers, verify=verify_ca)
|
|
532
|
+
return url_is_internal and auth_if_ckan, request_kwargs
|
|
533
|
+
|
|
534
|
+
def download_url_proxy(self, url:str, *, method:str=None, auth_if_ckan:bool=None,
|
|
535
|
+
proxies:dict=None, headers:dict=None, auth: Union[AuthBase, Tuple[str,str]]=None, verify:Union[bool,str,None]=None) -> requests.Response:
|
|
536
|
+
"""
|
|
537
|
+
Download a URL using the CKAN parameters (proxy, authentication etc.)
|
|
538
|
+
|
|
539
|
+
:param url:
|
|
540
|
+
:param proxies:
|
|
541
|
+
:param headers:
|
|
542
|
+
:return:
|
|
543
|
+
"""
|
|
544
|
+
if proxies is None: proxies = self.params.proxies
|
|
545
|
+
if method is None:
|
|
546
|
+
method = "GET"
|
|
547
|
+
if auth is None:
|
|
548
|
+
auth = self.params.proxy_auth
|
|
549
|
+
url_is_internal_auth, request_kwargs = self.prepare_arguments_for_url_download_request(url, auth_if_ckan=auth_if_ckan,
|
|
550
|
+
headers=headers, verify=verify)
|
|
551
|
+
response = None
|
|
552
|
+
self._init_session(internal=url_is_internal_auth)
|
|
553
|
+
try:
|
|
554
|
+
if self.params.dry_run:
|
|
555
|
+
response = requests.Response()
|
|
556
|
+
elif url_is_internal_auth:
|
|
557
|
+
self.debug.ckan_request_counter += 1
|
|
558
|
+
response = self.ckan_session.request(method, url, timeout=self.params.requests_timeout,
|
|
559
|
+
proxies=proxies, **request_kwargs, auth=auth)
|
|
560
|
+
else:
|
|
561
|
+
self.debug.extern_request_counter += 1
|
|
562
|
+
response = self.extern_session.request(method, url, timeout=self.params.requests_timeout,
|
|
563
|
+
proxies=proxies, **request_kwargs, auth=auth)
|
|
564
|
+
except Exception as e:
|
|
565
|
+
self._error_print_debug_response(response, url=url, headers=headers, error=e)
|
|
566
|
+
raise e from e
|
|
567
|
+
self.debug.last_response_request_count = 1
|
|
568
|
+
if self.params.store_last_response:
|
|
569
|
+
self.debug.last_response = response
|
|
570
|
+
return response
|
|
571
|
+
|
|
572
|
+
def download_url_proxy_test_head(self, url:str, *, raise_error:bool=False, auth_if_ckan:bool=None,
|
|
573
|
+
proxies:dict=None, headers:dict=None, auth: Union[AuthBase, Tuple[str,str]]=None,
|
|
574
|
+
verify:Union[bool,str,None]=None, context:str=None) \
|
|
575
|
+
-> Union[None,ContextErrorLevelMessage]:
|
|
576
|
+
"""
|
|
577
|
+
This sends a HEAD request to the url using the CKAN connexion parameters via download_url_proxy.
|
|
578
|
+
The resource is not downloaded but the headers indicate if the url is valid.
|
|
579
|
+
|
|
580
|
+
:return: None if successful
|
|
581
|
+
"""
|
|
582
|
+
if context is None:
|
|
583
|
+
context = "URL"
|
|
584
|
+
try:
|
|
585
|
+
response = self.download_url_proxy(url, method="HEAD", auth_if_ckan=auth_if_ckan, proxies=proxies, headers=headers, auth=auth, verify=verify)
|
|
586
|
+
except Exception as e:
|
|
587
|
+
if raise_error:
|
|
588
|
+
raise e from e
|
|
589
|
+
return ContextErrorLevelMessage(context, ErrorLevel.Error, f"Failed to query url {url}: {str(e)}")
|
|
590
|
+
if response.ok and response.status_code == 200:
|
|
591
|
+
return None
|
|
592
|
+
else:
|
|
593
|
+
if raise_error:
|
|
594
|
+
raise RequestError(f"Failed to query url {url}: status {response.status_code} {response.reason}")
|
|
595
|
+
return ContextErrorLevelMessage(context, ErrorLevel.Error, f"Failed to query url: {url}: status {response.status_code} {response.reason}")
|
|
596
|
+
|
|
597
|
+
## API calls ------------------
|
|
598
|
+
def _error_empty_url(self, raise_error:bool=True) -> bool:
|
|
599
|
+
if self.url is None or self.url == "":
|
|
600
|
+
if raise_error:
|
|
601
|
+
raise UrlError("CKAN URL was not specified")
|
|
602
|
+
return True
|
|
603
|
+
return False
|
|
604
|
+
|
|
605
|
+
def _get_api_url(self, category:str=None):
|
|
606
|
+
"""
|
|
607
|
+
Returns the base API url and appends the category
|
|
608
|
+
|
|
609
|
+
:param category: usually, "action"
|
|
610
|
+
:return:
|
|
611
|
+
"""
|
|
612
|
+
self._error_empty_url()
|
|
613
|
+
base = url_join(self.url, "api/3")
|
|
614
|
+
if category is not None:
|
|
615
|
+
return base + urlsep + category
|
|
616
|
+
else:
|
|
617
|
+
return base
|
|
618
|
+
|
|
619
|
+
def _api_action_request(self, action:str, *, method:RequestType, params:dict=None,
|
|
620
|
+
headers:dict=None,
|
|
621
|
+
data:Union[dict,str,bytes]=None, json:dict=None, files:List[tuple]=None) -> CkanActionResponse:
|
|
622
|
+
"""
|
|
623
|
+
Send API action request and return response.
|
|
624
|
+
|
|
625
|
+
:param action: action name
|
|
626
|
+
:param method: GET / POST
|
|
627
|
+
:param params: params to set in the url
|
|
628
|
+
:param data: information to encode in the request body (only for POST method)
|
|
629
|
+
:param json: information to encode as JSON in the request json (only for POST method)
|
|
630
|
+
:param files: files to upload in the request (only for POST method)
|
|
631
|
+
:param headers: headers for the request (authentication tokens are added by the function)
|
|
632
|
+
:return:
|
|
633
|
+
"""
|
|
634
|
+
if params is None: params = {}
|
|
635
|
+
base = self._get_api_url("action")
|
|
636
|
+
url = base + urlsep + action
|
|
637
|
+
headers = self._prepare_headers(headers, include_ckan_auth=True)
|
|
638
|
+
if self.params.verbose_request:
|
|
639
|
+
if json is not None:
|
|
640
|
+
params_str = "json=" + str(json) + " / "
|
|
641
|
+
else:
|
|
642
|
+
params_str = ""
|
|
643
|
+
params_str = params_str + str(params)
|
|
644
|
+
if data is not None:
|
|
645
|
+
params_str = params_str + "data=" + str(data)[:max_len_debug_print] + " / "
|
|
646
|
+
if len(str(data)) > max_len_debug_print:
|
|
647
|
+
params_str = params_str + "[...]"
|
|
648
|
+
params_str = params_str[:max_len_debug_print]
|
|
649
|
+
print(f"{self.identifier} API action '{action}' with arguments {params_str}")
|
|
650
|
+
start = time.time()
|
|
651
|
+
self.debug.ckan_request_counter += 1
|
|
652
|
+
response = None
|
|
653
|
+
self._init_session(internal=True)
|
|
654
|
+
try:
|
|
655
|
+
if self.params.dry_run:
|
|
656
|
+
response = requests.Response()
|
|
657
|
+
elif method == RequestType.Get:
|
|
658
|
+
assert_or_raise(data is None, UnexpectedError("data"))
|
|
659
|
+
response = self.ckan_session.get(url, params=params, headers=headers, timeout=self.params.requests_timeout,
|
|
660
|
+
proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
|
|
661
|
+
else:
|
|
662
|
+
response = self.ckan_session.post(url, data=data, headers=headers, params=params, files=files, json=json,
|
|
663
|
+
timeout=self.params.requests_timeout,
|
|
664
|
+
proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
|
|
665
|
+
except Exception as e:
|
|
666
|
+
self._error_print_debug_response(response, url=url, params=params, headers=headers, json=json, error=e)
|
|
667
|
+
raise e from e
|
|
668
|
+
end = time.time()
|
|
669
|
+
if self.params.verbose_request and not self.params.dry_run:
|
|
670
|
+
print(f"{self.identifier} API action '{action}' done in {end-start} seconds. Received {len(response.content)} bytes")
|
|
671
|
+
if self.params.store_last_response:
|
|
672
|
+
self.debug.last_response = response
|
|
673
|
+
if self.params.store_last_response_debug_info:
|
|
674
|
+
self.debug.last_response_elapsed_time = end - start
|
|
675
|
+
self.debug.last_response_request_count = 1
|
|
676
|
+
return CkanActionResponse(response, self.params.dry_run)
|
|
677
|
+
|
|
678
|
+
def api_action_call(self, action:str, *, method:RequestType, params:dict=None,
|
|
679
|
+
headers:dict=None,
|
|
680
|
+
data:dict=None, json:dict=None, files:List[tuple]=None) -> CkanActionResponse:
|
|
681
|
+
# function alias of _api_action_request
|
|
682
|
+
return self._api_action_request(action=action, method=method, params=params, headers=headers, data=data, json=json, files=files)
|
|
683
|
+
|
|
684
|
+
def _url_request(self, path:str, *, method:RequestType, params:dict=None, headers:dict=None,
|
|
685
|
+
data:dict=None, json:dict=None, files:List[tuple]=None) -> requests.Response:
|
|
686
|
+
"""
|
|
687
|
+
Send request to server and return response.
|
|
688
|
+
|
|
689
|
+
:param path: relative path to server url
|
|
690
|
+
:param method: GET / POST
|
|
691
|
+
:param params: params to set in the url
|
|
692
|
+
:param data: information to encode in the request body (only for POST method)
|
|
693
|
+
:param headers: headers for the request (authentication tokens are added by the function)
|
|
694
|
+
:return:
|
|
695
|
+
"""
|
|
696
|
+
if params is None: params = {}
|
|
697
|
+
self._error_empty_url()
|
|
698
|
+
url = url_join(self.url, path)
|
|
699
|
+
headers = self._prepare_headers(headers, include_ckan_auth=True)
|
|
700
|
+
if self.params.verbose_request:
|
|
701
|
+
if json is not None:
|
|
702
|
+
params_str = str(json) + " / "
|
|
703
|
+
else:
|
|
704
|
+
params_str = ""
|
|
705
|
+
params_str = params_str + str(params)
|
|
706
|
+
params_str = params_str[:min(len(params_str), max_len_debug_print)]
|
|
707
|
+
print(f"{self.identifier} URL call {url} with arguments {params_str}")
|
|
708
|
+
start = time.time()
|
|
709
|
+
self.debug.ckan_request_counter += 1
|
|
710
|
+
response = None
|
|
711
|
+
self._init_session(internal=True)
|
|
712
|
+
try:
|
|
713
|
+
if self.params.dry_run:
|
|
714
|
+
response = requests.Response()
|
|
715
|
+
elif method == RequestType.Get:
|
|
716
|
+
response = self.ckan_session.get(url, params=params, headers=headers, timeout=self.params.requests_timeout,
|
|
717
|
+
proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
|
|
718
|
+
else:
|
|
719
|
+
response = self.ckan_session.post(url, data=data, headers=headers, params=params, timeout=self.params.requests_timeout,
|
|
720
|
+
json=json, files=files,
|
|
721
|
+
proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
|
|
722
|
+
except Exception as e:
|
|
723
|
+
self._error_print_debug_response(response, url=url, params=params, headers=headers, json=json, error=e)
|
|
724
|
+
raise e from e
|
|
725
|
+
end = time.time()
|
|
726
|
+
if self.params.verbose_request:
|
|
727
|
+
print(f"{self.identifier} URL call {url} done in {end-start} seconds. Received {len(response.content)} bytes")
|
|
728
|
+
if self.params.store_last_response:
|
|
729
|
+
self.debug.last_response = response
|
|
730
|
+
if self.params.store_last_response_debug_info:
|
|
731
|
+
self.debug.last_response_elapsed_time = end - start
|
|
732
|
+
self.debug.last_response_request_count = 1
|
|
733
|
+
return response
|
|
734
|
+
|
|
735
|
+
def api_help_show(self, action_name:str, *, print_output:bool=True) -> str:
|
|
736
|
+
"""
|
|
737
|
+
API help command on a given action.
|
|
738
|
+
|
|
739
|
+
:param action_name:
|
|
740
|
+
:param print_output: Option to print the output in the command line
|
|
741
|
+
:return:
|
|
742
|
+
"""
|
|
743
|
+
response = self._api_action_request("help_show", method=RequestType.Get, params={"name": action_name})
|
|
744
|
+
if response.success:
|
|
745
|
+
if print_output:
|
|
746
|
+
print(action_name + " help:")
|
|
747
|
+
print(response.result)
|
|
748
|
+
return response.result
|
|
749
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
750
|
+
raise CkanNotFoundError(self, "Action", response)
|
|
751
|
+
else:
|
|
752
|
+
if print_output:
|
|
753
|
+
print(f"No documentation found for action '{action_name}'")
|
|
754
|
+
raise response.default_error(self)
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
## Multiple queries with limited responses until full contents are obtained ------------------
|
|
758
|
+
def _request_all_results_generator(self, api_fun:Callable, *, params:dict=None,
|
|
759
|
+
limit:int=None, offset:int=0, search_all:bool=True,
|
|
760
|
+
**kwargs) -> Generator[Any, Any, None]:
|
|
761
|
+
"""
|
|
762
|
+
Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
|
|
763
|
+
Lazy auxiliary function which yields a result for each request.
|
|
764
|
+
|
|
765
|
+
:param api_fun: function to call, typically a unitary request function
|
|
766
|
+
:param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
|
|
767
|
+
:param limit: api_fun must accept limit argument in order to update the limit value
|
|
768
|
+
:param offset: api_fun must accept offset argument in order to update the offset value
|
|
769
|
+
:param search_all: if False, only the first request is operated
|
|
770
|
+
:param kwargs: additional keyword arguments to pass to api_fun
|
|
771
|
+
:return:
|
|
772
|
+
"""
|
|
773
|
+
if params is None:
|
|
774
|
+
params = {}
|
|
775
|
+
if limit is None:
|
|
776
|
+
limit = self.params.default_limit_read
|
|
777
|
+
if limit is not None:
|
|
778
|
+
# params["limit"] = limit
|
|
779
|
+
assert_or_raise(limit > 0, InvalidParameterError("limit"))
|
|
780
|
+
if offset is None:
|
|
781
|
+
offset = 0
|
|
782
|
+
# params["offset"] = offset
|
|
783
|
+
if self.params.store_last_response_debug_info:
|
|
784
|
+
self.debug.multi_requests_last_successful_offset = offset
|
|
785
|
+
start = time.time()
|
|
786
|
+
requests_count = 1
|
|
787
|
+
n_received = 0
|
|
788
|
+
if self.params.verbose_multi_requests:
|
|
789
|
+
print(f"{self.identifier} Multi-requests no. {requests_count} - Requesting {limit} results from {api_fun.__name__}...")
|
|
790
|
+
result_add: Union[pd.DataFrame, CkanActionResponse, Collection] = api_fun(params=params, limit=limit, offset=offset, **kwargs)
|
|
791
|
+
if self.params.store_last_response_debug_info:
|
|
792
|
+
self.debug.multi_requests_last_successful_offset = offset
|
|
793
|
+
self.debug.last_response_request_count = requests_count
|
|
794
|
+
offset += len(result_add)
|
|
795
|
+
n_received += len(result_add)
|
|
796
|
+
yield result_add
|
|
797
|
+
current = time.time()
|
|
798
|
+
timeout = (current - start) > self.params.multi_requests_timeout
|
|
799
|
+
flag = search_all and len(result_add) > 0 and requests_count < self.params.max_requests_count and not timeout
|
|
800
|
+
while flag:
|
|
801
|
+
if self.params.multi_requests_time_between_requests > 0:
|
|
802
|
+
time.sleep(self.params.multi_requests_time_between_requests)
|
|
803
|
+
# params["offset"] = offset
|
|
804
|
+
requests_count += 1
|
|
805
|
+
if self.params.verbose_multi_requests:
|
|
806
|
+
print(f"{self.identifier} Multi-requests no. {requests_count} - Requesting {limit} results from {api_fun.__name__}...")
|
|
807
|
+
result_add = api_fun(params=params, limit=limit, offset=offset, **kwargs)
|
|
808
|
+
if self.params.store_last_response_debug_info:
|
|
809
|
+
self.debug.multi_requests_last_successful_offset = offset
|
|
810
|
+
self.debug.last_response_request_count = requests_count
|
|
811
|
+
offset += len(result_add)
|
|
812
|
+
n_received += len(result_add)
|
|
813
|
+
yield result_add
|
|
814
|
+
current = time.time()
|
|
815
|
+
timeout = (current - start) > self.params.multi_requests_timeout
|
|
816
|
+
flag = len(result_add) > 0 and requests_count < self.params.max_requests_count and not timeout
|
|
817
|
+
if timeout:
|
|
818
|
+
raise TimeoutError()
|
|
819
|
+
if requests_count >= self.params.max_requests_count:
|
|
820
|
+
raise MaxRequestsCountError()
|
|
821
|
+
current = time.time()
|
|
822
|
+
if self.params.verbose_multi_requests:
|
|
823
|
+
print(f"{self.identifier} Multi-requests {api_fun.__name__} done in {requests_count} calls and {round(current - start, 2)} seconds. {n_received} lines received.")
|
|
824
|
+
return
|
|
825
|
+
|
|
826
|
+
def _request_all_results_df(self, api_fun:Callable, *, params:dict=None, list_attrs:bool=True,
|
|
827
|
+
limit:int=None, offset:int=0, search_all:bool=True,
|
|
828
|
+
**kwargs) -> pd.DataFrame:
|
|
829
|
+
"""
|
|
830
|
+
Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
|
|
831
|
+
DataFrame implementation returns the concatenated DataFrame from the unitary function calls.
|
|
832
|
+
|
|
833
|
+
:param api_fun: function to call, typically a unitary request function
|
|
834
|
+
:param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
|
|
835
|
+
:param limit: api_fun must accept limit argument in order to update the limit value
|
|
836
|
+
:param offset: api_fun must accept offset argument in order to update the offset value
|
|
837
|
+
:param search_all: if False, only the first request is operated
|
|
838
|
+
:param list_attrs: option to aggregate DataFrame attrs field into lists. # False not tested
|
|
839
|
+
:param kwargs: additional keyword arguments to pass to api_fun
|
|
840
|
+
:return:
|
|
841
|
+
"""
|
|
842
|
+
start = time.time()
|
|
843
|
+
iter = self._request_all_results_generator(api_fun=api_fun, params=params,
|
|
844
|
+
limit=limit, offset=offset, search_all=search_all, **kwargs)
|
|
845
|
+
requests_count = 1
|
|
846
|
+
df = next(iter)
|
|
847
|
+
if list_attrs:
|
|
848
|
+
df.attrs = {key: [value] for key, value in df.attrs.items()}
|
|
849
|
+
for df_add in iter:
|
|
850
|
+
requests_count += 1
|
|
851
|
+
if len(df_add) > 0:
|
|
852
|
+
if list_attrs:
|
|
853
|
+
attrs = df.attrs
|
|
854
|
+
df = pd.concat([df, df_add])
|
|
855
|
+
if list_attrs:
|
|
856
|
+
df.attrs = {key: value + [df_add.attrs[key]] for key, value in attrs.items()}
|
|
857
|
+
current = time.time()
|
|
858
|
+
df.attrs["requests_count"] = requests_count
|
|
859
|
+
df.attrs["elapsed_time"] = (current - start)
|
|
860
|
+
return df
|
|
861
|
+
|
|
862
|
+
def _request_all_results_list(self, api_fun:Callable, *, params:dict=None,
|
|
863
|
+
limit:int=None, offset:int=0, search_all:bool=True, **kwargs) -> Union[List[CkanActionResponse], list]:
|
|
864
|
+
"""
|
|
865
|
+
Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
|
|
866
|
+
List implementation returns the list of the unitary function return values.
|
|
867
|
+
|
|
868
|
+
:param api_fun: function to call, typically a unitary request function
|
|
869
|
+
:param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
|
|
870
|
+
:param limit: api_fun must accept limit argument in order to update the limit value
|
|
871
|
+
:param offset: api_fun must accept offset argument in order to update the offset value
|
|
872
|
+
:param search_all: if False, only the first request is operated
|
|
873
|
+
:param kwargs: additional keyword arguments to pass to api_fun
|
|
874
|
+
:return:
|
|
875
|
+
"""
|
|
876
|
+
return list(self._request_all_results_generator(api_fun=api_fun, params=params, limit=limit, offset=offset,
|
|
877
|
+
search_all=search_all, **kwargs))
|
|
878
|
+
|
|
879
|
+
def is_url_internal(self, url:str) -> bool:
|
|
880
|
+
"""
|
|
881
|
+
Tests whether a url points to the same server as the CKAN url.
|
|
882
|
+
|
|
883
|
+
:param url:
|
|
884
|
+
:return:
|
|
885
|
+
"""
|
|
886
|
+
# TODO: improve the url matching test
|
|
887
|
+
return url.startswith(self.url)
|
|
888
|
+
|
|
889
|
+
def test_ckan_url_reachable(self, raise_error:bool=False) -> bool:
|
|
890
|
+
"""
|
|
891
|
+
Test if the CKAN URL is reachable with a HEAD request.
|
|
892
|
+
This does not check it is really a CKAN server and does not check authentication.
|
|
893
|
+
"""
|
|
894
|
+
error_message = self.download_url_proxy_test_head(self.url, raise_error=raise_error, context="CKAN URL test")
|
|
895
|
+
return error_message is None
|
|
896
|
+
|