ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,896 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ """
6
+ from abc import ABC
7
+ from typing import List, Dict, Callable, Union, Any, Generator, Sequence, Tuple, Collection
8
+ from collections import OrderedDict
9
+ import time
10
+ import copy
11
+ from warnings import warn
12
+ import argparse
13
+ import shlex
14
+ import os
15
+
16
+ import requests
17
+ from requests.auth import AuthBase
18
+ import pandas as pd
19
+
20
+ from ckanapi_harvesters.auxiliary.error_level_message import ContextErrorLevelMessage, ErrorLevel
21
+ from ckanapi_harvesters.auxiliary.external_code_import import unlock_external_code_execution
22
+ from ckanapi_harvesters.auxiliary.ckan_configuration import download_external_resource_urls, \
23
+ unlock_external_url_resource_download, allow_no_ca, unlock_no_ca
24
+ from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
25
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
26
+ from ckanapi_harvesters.auxiliary.urls import urlsep, url_join
27
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import RequestType, max_len_debug_print, assert_or_raise
28
+ from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
29
+ from ckanapi_harvesters.auxiliary.ckan_action import CkanActionResponse, CkanActionError, CkanNotFoundError
30
+ from ckanapi_harvesters.auxiliary.ckan_errors import (MaxRequestsCountError, UnexpectedError, InvalidParameterError,
31
+ ExternalUrlLockedError, UrlError, NoCAVerificationError, RequestError)
32
+ from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
33
+ from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
34
+ from ckanapi_harvesters.ckan_api.ckan_api_params import CkanApiParamsBasic, CkanApiDebug
35
+
36
+ CKAN_API_VERSION = 3
37
+
38
+ use_ckan_owner_org_as_default:bool = True # the owner_org field of CkanApi is destined to default the owner organization (or else it should be None)
39
+ ckan_request_proxy_default_auth_if_ckan:bool = True # fill authentification headers for requests with CkanApi requests proxy method if same domain is used by default
40
+
41
+ ## Abstract class
42
+ class CkanApiABC(ABC):
43
+ pass
44
+
45
+
46
+
47
+ ## Main class ------------------
48
+ class CkanApiBase(CkanApiABC):
49
+ """
50
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
51
+ This class implements the basic parameters and request functions.
52
+ """
53
+ CKAN_URL_ENVIRON = "CKAN_URL"
54
+
55
+ def __init__(self, url:str=None, *, proxies:Union[str,dict,ProxyConfig]=None,
56
+ apikey:Union[str,CkanApiKey]=None, apikey_file:str=None,
57
+ owner_org:str=None, params:CkanApiParamsBasic=None,
58
+ identifier=None):
59
+ """
60
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
61
+
62
+ :param url: url of the CKAN server
63
+ :param proxies: proxies to use for requests
64
+ :param apikey: way to provide the API key directly (optional)
65
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
66
+ :param owner_org: name of the organization to limit package_search (optional)
67
+ :param params: other connection/behavior parameters
68
+ :param identifier: identifier of the ckan client
69
+ """
70
+ if identifier is None: identifier = ""
71
+ if apikey is None or not isinstance(apikey, CkanApiKey):
72
+ apikey = CkanApiKey(apikey=apikey)
73
+ if apikey_file is not None:
74
+ apikey.apikey_file = apikey_file
75
+ if params is None:
76
+ params = CkanApiParamsBasic()
77
+ if proxies is not None:
78
+ params.proxies = proxies
79
+ self.identifier = identifier # variable for debugging purposes
80
+ self._ckan_url: str = ""
81
+ self.apikey: CkanApiKey = apikey
82
+ self.owner_org: Union[str, None] = owner_org # name of the organization to limit package_search (optional)
83
+ self.params: CkanApiParamsBasic = params
84
+ self.ckan_session: Union[requests.Session, None] = None
85
+ self.extern_session: Union[requests.Session, None] = None
86
+ if apikey_file is not None and apikey is None:
87
+ self.load_apikey()
88
+ self.debug: CkanApiDebug = CkanApiDebug()
89
+ # properties
90
+ self.url = url # url of the CKAN server (property)
91
+
92
+ def __del__(self):
93
+ self.disconnect()
94
+ self.apikey.__del__()
95
+
96
+ def __copy__(self):
97
+ return self.copy()
98
+
99
+ def copy(self, new_identifier:str=None, *, dest=None):
100
+ """
101
+ Returns a copy of the current instance.
102
+ Useful to use an initialized ckan object in a multithreaded context. Each thread would have its own copy.
103
+ It is recommended to purge the last response before doing a copy (with purge_map=False)
104
+ """
105
+ if dest is None:
106
+ dest = CkanApiBase()
107
+ dest._ckan_url = self._ckan_url
108
+ dest.params = self.params.copy()
109
+ dest.ckan_session = None
110
+ dest.extern_session = None
111
+ dest.owner_org = self.owner_org
112
+ dest.debug = CkanApiDebug()
113
+ dest.apikey = self.apikey.copy()
114
+ # post-copy operations
115
+ if new_identifier is not None:
116
+ dest.identifier = new_identifier
117
+ dest.purge()
118
+ # this only sets the session objects to None but lets the original instance's session open
119
+ dest.session = None
120
+ dest.extern_session = None
121
+ return dest
122
+
123
+ def __str__(self) -> str:
124
+ """
125
+ String representation of the instance, for debugging purposes.
126
+
127
+ :return: URL representing the CKAN server
128
+ """
129
+ return f"CKAN <{self.url}> {str(self.identifier)}"
130
+
131
+ @property
132
+ def url(self) -> str:
133
+ return self._ckan_url
134
+ @url.setter
135
+ def url(self, url:str) -> None:
136
+ # ensure the ckan url ends with '/' (see is_url_internal)
137
+ if url is None:
138
+ self._ckan_url = None
139
+ elif url.lower().strip() == environ_keyword: # keyword
140
+ self.init_from_environ(init_api_key=False)
141
+ elif not url.endswith(urlsep):
142
+ self._ckan_url = url + urlsep
143
+ else:
144
+ self._ckan_url = url
145
+
146
+ def _init_session(self, *, internal:bool=False):
147
+ """
148
+ Initialize the session objects which are used to perform requests with this CKAN instance.
149
+ This method can be overloaded to fit your needs (proxies, certificates, cookies, headers, etc.).
150
+
151
+ :param internal:
152
+ :return:
153
+ """
154
+ if internal:
155
+ if self.ckan_session is None:
156
+ # the use of a session object will improve performance
157
+ self.ckan_session = requests.Session()
158
+ if self.params.proxies is not None:
159
+ self.ckan_session.proxies.update(self.params.proxies)
160
+ self.ckan_session.auth = self.params.proxy_auth
161
+ self.ckan_session.verify = self.params.ckan_ca
162
+ self.ckan_session.headers = self.params.ckan_headers
163
+ # API key is applied in the headers of each request
164
+ else:
165
+ if self.extern_session is None:
166
+ self.extern_session = requests # do not persist cookies between domains & requests to external resources are not meant to be numerous
167
+ # self.extern_session = requests.Session()
168
+ # if self.params.proxies is not None:
169
+ # self.extern_session.proxies.update(self.params.proxies)
170
+ # self.extern_session.auth = self.params.proxy_auth
171
+ # self.extern_session.verify = self.params.extern_ca
172
+ # self.extern_session.headers = self.params.http_headers
173
+
174
+ def connect(self):
175
+ self.test_ckan_login(raise_error=True)
176
+
177
+ def disconnect(self):
178
+ if self.ckan_session is not None:
179
+ self.ckan_session.close()
180
+ if self.extern_session is not None and isinstance(self.extern_session, requests.Session):
181
+ self.extern_session.close()
182
+ self.ckan_session = None
183
+ self.extern_session = None
184
+
185
+ def full_unlock(self, unlock:bool=True,
186
+ *, no_ca:bool=None, external_url_resource_download:bool=None) -> None:
187
+ """
188
+ Function to unlock full capabilities of the CKAN API
189
+
190
+ :param unlock:
191
+ :return:
192
+ """
193
+ if no_ca is not None:
194
+ unlock_no_ca(no_ca)
195
+ if external_url_resource_download is not None:
196
+ unlock_external_url_resource_download(external_url_resource_download)
197
+
198
+ def prepare_for_multithreading(self, mode_reduced:bool=True) -> None:
199
+ """
200
+ This method disables unnecessary writes to this object.
201
+ It is recommended to enable the reduced writes mode in a multithreaded context.
202
+ Do not forget to reset sessions at the beginning of each thread.
203
+
204
+ :param mode_reduced:
205
+ :return:
206
+ """
207
+ self.debug.store_last_response = not mode_reduced
208
+ self.debug.store_last_response_debug_info = not mode_reduced
209
+ if mode_reduced:
210
+ self.disconnect()
211
+
212
+ def purge(self) -> None:
213
+ """
214
+ Erase temporary data stored in this object
215
+
216
+ :param purge_map: whether to purge the map created with map_resources
217
+ """
218
+ self.debug.last_response = None
219
+ self.debug.ckan_request_counter = 0
220
+ self.debug.extern_request_counter = 0
221
+ self.debug.last_response_request_count = 0
222
+ self.debug.multi_requests_last_successful_offset = 0
223
+ self.debug.last_response_elapsed_time = 0.0
224
+
225
+ def set_limits(self, limit_read:Union[int,None]) -> None:
226
+ """
227
+ Set default query limits. If only one argument is provided, it applies to both limits.
228
+
229
+ :param limit_read: default limit for read requests
230
+ :return:
231
+ """
232
+ self.params.default_limit_read = limit_read
233
+ self.params.default_limit_list = limit_read
234
+
235
+ def set_verbosity(self, verbosity:bool=True, verbose_extra:bool=None) -> None:
236
+ """
237
+ Enable/disable full verbose output
238
+ :param verbosity: boolean. Cannot be None
239
+ :return:
240
+ """
241
+ self.params.verbose_multi_requests = verbosity
242
+ self.params.verbose_request = verbosity
243
+ self.params.verbose_request_error = verbosity
244
+ if verbose_extra is not None:
245
+ self.params.verbose_extra = verbose_extra
246
+
247
+ def set_proxies(self, proxies:Union[str,dict,ProxyConfig], *, default_proxies:dict=None, proxy_headers:dict=None) -> None:
248
+ """
249
+ Set up the proxy configuration
250
+
251
+ :param proxies: string or proxies dict or ProxyConfig object.
252
+ If a string is provided, it must be an url to a proxy or one of the following values:
253
+ - "environ": use the proxies specified in the environment variables "http_proxy" and "https_proxy"
254
+ - "noproxy": do not use any proxies
255
+ - "unspecified": do not specify the proxies
256
+ - "default": use value provided by default_proxies
257
+ :param default_proxies: proxies used if proxies="default"
258
+ :param proxy_headers: headers used to access the proxies, generally for authentication
259
+ :return:
260
+ """
261
+ self.params._proxy_config = ProxyConfig.from_str_or_config(proxies,
262
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
263
+
264
+ def init_from_environ(self, *, init_api_key:bool=True, error_not_found:bool=False) -> None:
265
+ """
266
+ Initialize CKAN from environment variables.
267
+
268
+ - `CKAN_URL` for the url of the CKAN server.
269
+
270
+ And optionally:
271
+ - `CKAN_API_KEY`: for the raw API key (it is not recommended to store API key in an environment variable)
272
+ - `CKAN_API_KEY_FILE`: path to a file containing a valid API key in the first line of text
273
+
274
+ :param error_not_found: raise an error if the API key file was not found
275
+ :return:
276
+ """
277
+ ckan_url = os.environ.get(self.CKAN_URL_ENVIRON) # "CKAN_URL"
278
+ if ckan_url is not None:
279
+ assert not ckan_url.lower().strip() == environ_keyword # this value would create an infinite loop
280
+ self.url = ckan_url
281
+ if init_api_key:
282
+ self.apikey.load_from_environ(error_not_found=error_not_found)
283
+
284
+ def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
285
+ """
286
+ Define or add CLI arguments to initialize a CKAN API connection
287
+ parser help message:
288
+
289
+ CKAN API connection parameters initialization
290
+
291
+ options:
292
+ -h, --help show this help message and exit
293
+ --ckan-url CKAN_URL CKAN URL
294
+ --apikey APIKEY CKAN API key
295
+ --apikey-file APIKEY_FILE
296
+ Path to a file containing the CKAN API key (first line)
297
+ --policy-file POLICY_FILE
298
+ Path to a file containing the CKAN data format policy (json format)
299
+ --owner-org OWNER_ORG
300
+ CKAN Owner Organization
301
+ --default-limit DEFAULT_LIMIT
302
+ Default number of rows per request
303
+ --verbose VERBOSE Option to set verbosity
304
+
305
+ :param parser: option to provide an existing parser to add the specific fields needed to initialize a CKAN API connection
306
+ :return:
307
+ """
308
+ if parser is None:
309
+ parser = argparse.ArgumentParser(description="CKAN API connection parameters initialization")
310
+ parser.add_argument("--ckan-url", type=str,
311
+ help="CKAN URL")
312
+ CkanApiKey._setup_cli_parser(parser) # add arguments --apikey-file --apikey
313
+ self.params._setup_cli_ckan_parser__params(parser)
314
+ # parser.add_argument("--external-code", action="store_true",
315
+ # help="Enable external code execution for builder (only enable for trusted sources)")
316
+ return parser
317
+
318
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None, error_not_found:bool=True,
319
+ default_proxies:dict=None, proxy_headers:dict=None) -> None:
320
+ """
321
+ Apply the arguments parsed by the argument parser defined by _setup_cli_ckan_parser
322
+
323
+ :param args:
324
+ :param base_dir: base directory to find the CKAN API key file, if a relative path is provided
325
+ (recommended: leave None to use cwd)
326
+ :param error_not_found: option to raise an exception if the CKAN API key file is not found
327
+ :param default_proxies: proxies used if proxies="default"
328
+ :param proxy_headers: headers used to access the proxies, generally for authentication
329
+ :return:
330
+ """
331
+ if args.ckan_url is not None:
332
+ self.url = args.ckan_url
333
+ self.apikey._cli_args_apply(args, base_dir=base_dir, error_not_found=error_not_found)
334
+ self.params._cli_ckan_args_apply(args, base_dir=base_dir, error_not_found=error_not_found,
335
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
336
+ if args.default_limit is not None:
337
+ self.set_limits(args.default_limit)
338
+ if args.verbose is not None:
339
+ self.set_verbosity(args.verbose)
340
+ # if args.external_code:
341
+ # unlock_external_code_execution()
342
+ print(args)
343
+
344
+ def initialize_from_cli_args(self, *, args:Sequence[str]=None, base_dir:str=None,
345
+ error_not_found:bool=True, parser:argparse.ArgumentParser=None,
346
+ default_proxies:dict=None, proxy_headers:dict=None) -> None:
347
+ """
348
+ Intialize the CKAN API connection from command line arguments.
349
+
350
+ :param args: Option to provide arguments from another source.
351
+ :return:
352
+ """
353
+ parser = self._setup_cli_ckan_parser(parser)
354
+ args_parsed = parser.parse_args(args)
355
+ self._cli_ckan_args_apply(args_parsed, base_dir=base_dir, error_not_found=error_not_found,
356
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
357
+
358
+ def input_cli_args(self, *, base_dir:str=None, error_not_found:bool=True, only_if_necessary:bool=False,
359
+ default_proxies:dict=None, proxy_headers:dict=None):
360
+ """
361
+ Initialize the query for initialization parameters in the command-line format in the console window.
362
+
363
+ :return:
364
+ """
365
+ if only_if_necessary and (self.url is not None and not self.apikey.is_empty()): # and self.proxy_object.is_defined()):
366
+ return
367
+ options_string = input("Please enter CKAN connection CLI arguments: ")
368
+ self.initialize_from_options_string(options_string, base_dir=base_dir, error_not_found=error_not_found,
369
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
370
+
371
+ def initialize_from_options_string(self, options_string:str=None, base_dir:str=None,
372
+ error_not_found:bool=True, parser:argparse.ArgumentParser=None,
373
+ default_proxies:dict=None, proxy_headers:dict=None) -> None:
374
+ parser = self._setup_cli_ckan_parser(parser)
375
+ args = parser.parse_args(shlex.split(options_string))
376
+ self._cli_ckan_args_apply(args, base_dir=base_dir, error_not_found=error_not_found,
377
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
378
+
379
+ def input_missing_info(self, *, base_dir:str=None, input_args:bool=False, input_args_if_necessary:bool=False,
380
+ input_apikey:bool=True, error_not_found:bool=True):
381
+ """
382
+ Ask user information in the console window.
383
+
384
+ :param input_owner_org: option to ask for the owner organization.
385
+ :return:
386
+ """
387
+ if input_args or input_args_if_necessary:
388
+ self.input_cli_args(base_dir=base_dir, error_not_found=error_not_found, only_if_necessary=input_args_if_necessary)
389
+ if self.url is None:
390
+ ckan_url = input("Please enter the CKAN URL: ")
391
+ self.url = ckan_url
392
+ if self.apikey.is_empty() and input_apikey:
393
+ self.apikey.input()
394
+
395
+
396
+ ## Error management ------------------
397
+ def _error_print_debug_response(self, response:requests.Response, *,
398
+ url:str=None, params:dict=None, json:dict=None, error:Exception=None, headers:dict=None):
399
+ if self.params.verbose_request_error:
400
+ print(f"{self.identifier} CKAN Response error details ({str(self)})")
401
+ print(" ")
402
+ if response is None:
403
+ print(f"Problematic request did not obtain response ({url})")
404
+ else:
405
+ print(f"Problematic response code {response.status_code}:")
406
+ if response is None:
407
+ pass
408
+ elif isinstance(response.content, bytes):
409
+ print_str = response.content.decode()
410
+ else:
411
+ print_str = response.content
412
+ print(print_str[:max_len_debug_print])
413
+ if len(print_str) > max_len_debug_print:
414
+ print("[...]")
415
+ if error is not None:
416
+ print(" ")
417
+ print("Exception error message:")
418
+ print(str(error))
419
+ if response is None:
420
+ if params is not None:
421
+ print(" ")
422
+ print("Request params:")
423
+ print(params)
424
+ if json is not None:
425
+ print(" ")
426
+ print("Request json:")
427
+ print(json)
428
+ else:
429
+ print(" ")
430
+ print("Request URL:")
431
+ print(response.request.url)
432
+ print(" ")
433
+ print("Request body:")
434
+ if isinstance(response.request.body, bytes):
435
+ print_str = response.request.body.decode()
436
+ elif response.request.body is not None:
437
+ print_str = response.request.body
438
+ else:
439
+ print_str = "None"
440
+ print(print_str[:max_len_debug_print])
441
+ if len(print_str) > max_len_debug_print:
442
+ print("[...]")
443
+ print(" ")
444
+ print("Response body:")
445
+ if response.text is not None:
446
+ print_str = response.text
447
+ else:
448
+ print_str = "None"
449
+ print(print_str[:max_len_debug_print])
450
+ if len(print_str) > max_len_debug_print:
451
+ print("[...]")
452
+ print(" ")
453
+
454
+
455
+ ## Authentification ------------------
456
+ def load_apikey(self, apikey_file:str=None, base_dir:str=None, error_not_found:bool=True):
457
+ """
458
+ Load the CKAN API key from file.
459
+ The file should contain a valid API key in the first line of text.
460
+
461
+ :param apikey_file: API key file (optional if specified at the creation of the object)
462
+ :param base_dir: base directory, if the apikey_file is a relative path
463
+ :return:
464
+ """
465
+ self.apikey.load_apikey(apikey_file=apikey_file, base_dir=base_dir, error_not_found=error_not_found)
466
+
467
+ def _prepare_headers(self, headers:dict=None, include_ckan_auth:bool=False) -> dict:
468
+ """
469
+ Prepare headers for a request. If the request is destined to the CKAN server,
470
+ include authentication headers, if API key was provided.
471
+
472
+ :param headers: initial headers
473
+ :param include_ckan_auth: boolean to include CKAN authentication headers
474
+ :return:
475
+ """
476
+ if headers is None:
477
+ headers = {}
478
+ if self.params.user_agent is not None:
479
+ headers["User-Agent"] = str(self.params.user_agent)
480
+ headers.update(self.params._proxy_config.proxy_headers)
481
+ headers.update(self.params.http_headers)
482
+ if include_ckan_auth:
483
+ headers.update(self.params.ckan_headers)
484
+ headers.update(self.apikey.get_auth_header())
485
+ return headers
486
+
487
+ @staticmethod
488
+ def unlock_no_ca(value:bool=True):
489
+ """
490
+ This function enables you to disable the CA verification of the CKAN server.
491
+
492
+ __Warning__:
493
+ Only allow in a local environment!
494
+ """
495
+ unlock_no_ca(value)
496
+
497
+ @staticmethod
498
+ def unlock_external_url_resource_download(value:bool=True):
499
+ """
500
+ This function enables the download of resources external from the CKAN server.
501
+ """
502
+ unlock_external_url_resource_download(value)
503
+
504
+ def prepare_arguments_for_url_download_request(self, url:str, *,
505
+ auth_if_ckan:bool=None, headers:dict=None, verify:Union[bool,str,None]=None) \
506
+ -> Tuple[bool, dict]:
507
+ """
508
+ Include CKAN authentication headers only if the URL points to the CKAN server.
509
+
510
+ :param url: target URL
511
+ :param headers: initial headers
512
+ :param auth_if_ckan: option to include CKAN authentication headers if the url is recognized as part of the CKAN server.
513
+ :return:
514
+ """
515
+ if auth_if_ckan is None:
516
+ auth_if_ckan = ckan_request_proxy_default_auth_if_ckan
517
+ verify_ca = verify
518
+ url_is_internal = self.is_url_internal(url)
519
+ if url_is_internal:
520
+ headers = self._prepare_headers(headers, include_ckan_auth=auth_if_ckan)
521
+ if verify is None:
522
+ verify_ca = self.params.ckan_ca
523
+ elif not download_external_resource_urls:
524
+ raise ExternalUrlLockedError(url)
525
+ else:
526
+ headers = self._prepare_headers(headers, include_ckan_auth=False)
527
+ if verify is None:
528
+ verify_ca = self.params.extern_ca
529
+ msg = f"Request to external url: {url}"
530
+ warn(msg)
531
+ request_kwargs = dict(headers=headers, verify=verify_ca)
532
+ return url_is_internal and auth_if_ckan, request_kwargs
533
+
534
+ def download_url_proxy(self, url:str, *, method:str=None, auth_if_ckan:bool=None,
535
+ proxies:dict=None, headers:dict=None, auth: Union[AuthBase, Tuple[str,str]]=None, verify:Union[bool,str,None]=None) -> requests.Response:
536
+ """
537
+ Download a URL using the CKAN parameters (proxy, authentication etc.)
538
+
539
+ :param url:
540
+ :param proxies:
541
+ :param headers:
542
+ :return:
543
+ """
544
+ if proxies is None: proxies = self.params.proxies
545
+ if method is None:
546
+ method = "GET"
547
+ if auth is None:
548
+ auth = self.params.proxy_auth
549
+ url_is_internal_auth, request_kwargs = self.prepare_arguments_for_url_download_request(url, auth_if_ckan=auth_if_ckan,
550
+ headers=headers, verify=verify)
551
+ response = None
552
+ self._init_session(internal=url_is_internal_auth)
553
+ try:
554
+ if self.params.dry_run:
555
+ response = requests.Response()
556
+ elif url_is_internal_auth:
557
+ self.debug.ckan_request_counter += 1
558
+ response = self.ckan_session.request(method, url, timeout=self.params.requests_timeout,
559
+ proxies=proxies, **request_kwargs, auth=auth)
560
+ else:
561
+ self.debug.extern_request_counter += 1
562
+ response = self.extern_session.request(method, url, timeout=self.params.requests_timeout,
563
+ proxies=proxies, **request_kwargs, auth=auth)
564
+ except Exception as e:
565
+ self._error_print_debug_response(response, url=url, headers=headers, error=e)
566
+ raise e from e
567
+ self.debug.last_response_request_count = 1
568
+ if self.params.store_last_response:
569
+ self.debug.last_response = response
570
+ return response
571
+
572
+ def download_url_proxy_test_head(self, url:str, *, raise_error:bool=False, auth_if_ckan:bool=None,
573
+ proxies:dict=None, headers:dict=None, auth: Union[AuthBase, Tuple[str,str]]=None,
574
+ verify:Union[bool,str,None]=None, context:str=None) \
575
+ -> Union[None,ContextErrorLevelMessage]:
576
+ """
577
+ This sends a HEAD request to the url using the CKAN connexion parameters via download_url_proxy.
578
+ The resource is not downloaded but the headers indicate if the url is valid.
579
+
580
+ :return: None if successful
581
+ """
582
+ if context is None:
583
+ context = "URL"
584
+ try:
585
+ response = self.download_url_proxy(url, method="HEAD", auth_if_ckan=auth_if_ckan, proxies=proxies, headers=headers, auth=auth, verify=verify)
586
+ except Exception as e:
587
+ if raise_error:
588
+ raise e from e
589
+ return ContextErrorLevelMessage(context, ErrorLevel.Error, f"Failed to query url {url}: {str(e)}")
590
+ if response.ok and response.status_code == 200:
591
+ return None
592
+ else:
593
+ if raise_error:
594
+ raise RequestError(f"Failed to query url {url}: status {response.status_code} {response.reason}")
595
+ return ContextErrorLevelMessage(context, ErrorLevel.Error, f"Failed to query url: {url}: status {response.status_code} {response.reason}")
596
+
597
+ ## API calls ------------------
598
+ def _error_empty_url(self, raise_error:bool=True) -> bool:
599
+ if self.url is None or self.url == "":
600
+ if raise_error:
601
+ raise UrlError("CKAN URL was not specified")
602
+ return True
603
+ return False
604
+
605
+ def _get_api_url(self, category:str=None):
606
+ """
607
+ Returns the base API url and appends the category
608
+
609
+ :param category: usually, "action"
610
+ :return:
611
+ """
612
+ self._error_empty_url()
613
+ base = url_join(self.url, "api/3")
614
+ if category is not None:
615
+ return base + urlsep + category
616
+ else:
617
+ return base
618
+
619
+ def _api_action_request(self, action:str, *, method:RequestType, params:dict=None,
620
+ headers:dict=None,
621
+ data:Union[dict,str,bytes]=None, json:dict=None, files:List[tuple]=None) -> CkanActionResponse:
622
+ """
623
+ Send API action request and return response.
624
+
625
+ :param action: action name
626
+ :param method: GET / POST
627
+ :param params: params to set in the url
628
+ :param data: information to encode in the request body (only for POST method)
629
+ :param json: information to encode as JSON in the request json (only for POST method)
630
+ :param files: files to upload in the request (only for POST method)
631
+ :param headers: headers for the request (authentication tokens are added by the function)
632
+ :return:
633
+ """
634
+ if params is None: params = {}
635
+ base = self._get_api_url("action")
636
+ url = base + urlsep + action
637
+ headers = self._prepare_headers(headers, include_ckan_auth=True)
638
+ if self.params.verbose_request:
639
+ if json is not None:
640
+ params_str = "json=" + str(json) + " / "
641
+ else:
642
+ params_str = ""
643
+ params_str = params_str + str(params)
644
+ if data is not None:
645
+ params_str = params_str + "data=" + str(data)[:max_len_debug_print] + " / "
646
+ if len(str(data)) > max_len_debug_print:
647
+ params_str = params_str + "[...]"
648
+ params_str = params_str[:max_len_debug_print]
649
+ print(f"{self.identifier} API action '{action}' with arguments {params_str}")
650
+ start = time.time()
651
+ self.debug.ckan_request_counter += 1
652
+ response = None
653
+ self._init_session(internal=True)
654
+ try:
655
+ if self.params.dry_run:
656
+ response = requests.Response()
657
+ elif method == RequestType.Get:
658
+ assert_or_raise(data is None, UnexpectedError("data"))
659
+ response = self.ckan_session.get(url, params=params, headers=headers, timeout=self.params.requests_timeout,
660
+ proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
661
+ else:
662
+ response = self.ckan_session.post(url, data=data, headers=headers, params=params, files=files, json=json,
663
+ timeout=self.params.requests_timeout,
664
+ proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
665
+ except Exception as e:
666
+ self._error_print_debug_response(response, url=url, params=params, headers=headers, json=json, error=e)
667
+ raise e from e
668
+ end = time.time()
669
+ if self.params.verbose_request and not self.params.dry_run:
670
+ print(f"{self.identifier} API action '{action}' done in {end-start} seconds. Received {len(response.content)} bytes")
671
+ if self.params.store_last_response:
672
+ self.debug.last_response = response
673
+ if self.params.store_last_response_debug_info:
674
+ self.debug.last_response_elapsed_time = end - start
675
+ self.debug.last_response_request_count = 1
676
+ return CkanActionResponse(response, self.params.dry_run)
677
+
678
+ def api_action_call(self, action:str, *, method:RequestType, params:dict=None,
679
+ headers:dict=None,
680
+ data:dict=None, json:dict=None, files:List[tuple]=None) -> CkanActionResponse:
681
+ # function alias of _api_action_request
682
+ return self._api_action_request(action=action, method=method, params=params, headers=headers, data=data, json=json, files=files)
683
+
684
+ def _url_request(self, path:str, *, method:RequestType, params:dict=None, headers:dict=None,
685
+ data:dict=None, json:dict=None, files:List[tuple]=None) -> requests.Response:
686
+ """
687
+ Send request to server and return response.
688
+
689
+ :param path: relative path to server url
690
+ :param method: GET / POST
691
+ :param params: params to set in the url
692
+ :param data: information to encode in the request body (only for POST method)
693
+ :param headers: headers for the request (authentication tokens are added by the function)
694
+ :return:
695
+ """
696
+ if params is None: params = {}
697
+ self._error_empty_url()
698
+ url = url_join(self.url, path)
699
+ headers = self._prepare_headers(headers, include_ckan_auth=True)
700
+ if self.params.verbose_request:
701
+ if json is not None:
702
+ params_str = str(json) + " / "
703
+ else:
704
+ params_str = ""
705
+ params_str = params_str + str(params)
706
+ params_str = params_str[:min(len(params_str), max_len_debug_print)]
707
+ print(f"{self.identifier} URL call {url} with arguments {params_str}")
708
+ start = time.time()
709
+ self.debug.ckan_request_counter += 1
710
+ response = None
711
+ self._init_session(internal=True)
712
+ try:
713
+ if self.params.dry_run:
714
+ response = requests.Response()
715
+ elif method == RequestType.Get:
716
+ response = self.ckan_session.get(url, params=params, headers=headers, timeout=self.params.requests_timeout,
717
+ proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
718
+ else:
719
+ response = self.ckan_session.post(url, data=data, headers=headers, params=params, timeout=self.params.requests_timeout,
720
+ json=json, files=files,
721
+ proxies=self.params.proxies, verify=self.params.ckan_ca, auth=self.params.proxy_auth)
722
+ except Exception as e:
723
+ self._error_print_debug_response(response, url=url, params=params, headers=headers, json=json, error=e)
724
+ raise e from e
725
+ end = time.time()
726
+ if self.params.verbose_request:
727
+ print(f"{self.identifier} URL call {url} done in {end-start} seconds. Received {len(response.content)} bytes")
728
+ if self.params.store_last_response:
729
+ self.debug.last_response = response
730
+ if self.params.store_last_response_debug_info:
731
+ self.debug.last_response_elapsed_time = end - start
732
+ self.debug.last_response_request_count = 1
733
+ return response
734
+
735
+ def api_help_show(self, action_name:str, *, print_output:bool=True) -> str:
736
+ """
737
+ API help command on a given action.
738
+
739
+ :param action_name:
740
+ :param print_output: Option to print the output in the command line
741
+ :return:
742
+ """
743
+ response = self._api_action_request("help_show", method=RequestType.Get, params={"name": action_name})
744
+ if response.success:
745
+ if print_output:
746
+ print(action_name + " help:")
747
+ print(response.result)
748
+ return response.result
749
+ elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
750
+ raise CkanNotFoundError(self, "Action", response)
751
+ else:
752
+ if print_output:
753
+ print(f"No documentation found for action '{action_name}'")
754
+ raise response.default_error(self)
755
+
756
+
757
+ ## Multiple queries with limited responses until full contents are obtained ------------------
758
+ def _request_all_results_generator(self, api_fun:Callable, *, params:dict=None,
759
+ limit:int=None, offset:int=0, search_all:bool=True,
760
+ **kwargs) -> Generator[Any, Any, None]:
761
+ """
762
+ Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
763
+ Lazy auxiliary function which yields a result for each request.
764
+
765
+ :param api_fun: function to call, typically a unitary request function
766
+ :param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
767
+ :param limit: api_fun must accept limit argument in order to update the limit value
768
+ :param offset: api_fun must accept offset argument in order to update the offset value
769
+ :param search_all: if False, only the first request is operated
770
+ :param kwargs: additional keyword arguments to pass to api_fun
771
+ :return:
772
+ """
773
+ if params is None:
774
+ params = {}
775
+ if limit is None:
776
+ limit = self.params.default_limit_read
777
+ if limit is not None:
778
+ # params["limit"] = limit
779
+ assert_or_raise(limit > 0, InvalidParameterError("limit"))
780
+ if offset is None:
781
+ offset = 0
782
+ # params["offset"] = offset
783
+ if self.params.store_last_response_debug_info:
784
+ self.debug.multi_requests_last_successful_offset = offset
785
+ start = time.time()
786
+ requests_count = 1
787
+ n_received = 0
788
+ if self.params.verbose_multi_requests:
789
+ print(f"{self.identifier} Multi-requests no. {requests_count} - Requesting {limit} results from {api_fun.__name__}...")
790
+ result_add: Union[pd.DataFrame, CkanActionResponse, Collection] = api_fun(params=params, limit=limit, offset=offset, **kwargs)
791
+ if self.params.store_last_response_debug_info:
792
+ self.debug.multi_requests_last_successful_offset = offset
793
+ self.debug.last_response_request_count = requests_count
794
+ offset += len(result_add)
795
+ n_received += len(result_add)
796
+ yield result_add
797
+ current = time.time()
798
+ timeout = (current - start) > self.params.multi_requests_timeout
799
+ flag = search_all and len(result_add) > 0 and requests_count < self.params.max_requests_count and not timeout
800
+ while flag:
801
+ if self.params.multi_requests_time_between_requests > 0:
802
+ time.sleep(self.params.multi_requests_time_between_requests)
803
+ # params["offset"] = offset
804
+ requests_count += 1
805
+ if self.params.verbose_multi_requests:
806
+ print(f"{self.identifier} Multi-requests no. {requests_count} - Requesting {limit} results from {api_fun.__name__}...")
807
+ result_add = api_fun(params=params, limit=limit, offset=offset, **kwargs)
808
+ if self.params.store_last_response_debug_info:
809
+ self.debug.multi_requests_last_successful_offset = offset
810
+ self.debug.last_response_request_count = requests_count
811
+ offset += len(result_add)
812
+ n_received += len(result_add)
813
+ yield result_add
814
+ current = time.time()
815
+ timeout = (current - start) > self.params.multi_requests_timeout
816
+ flag = len(result_add) > 0 and requests_count < self.params.max_requests_count and not timeout
817
+ if timeout:
818
+ raise TimeoutError()
819
+ if requests_count >= self.params.max_requests_count:
820
+ raise MaxRequestsCountError()
821
+ current = time.time()
822
+ if self.params.verbose_multi_requests:
823
+ print(f"{self.identifier} Multi-requests {api_fun.__name__} done in {requests_count} calls and {round(current - start, 2)} seconds. {n_received} lines received.")
824
+ return
825
+
826
+ def _request_all_results_df(self, api_fun:Callable, *, params:dict=None, list_attrs:bool=True,
827
+ limit:int=None, offset:int=0, search_all:bool=True,
828
+ **kwargs) -> pd.DataFrame:
829
+ """
830
+ Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
831
+ DataFrame implementation returns the concatenated DataFrame from the unitary function calls.
832
+
833
+ :param api_fun: function to call, typically a unitary request function
834
+ :param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
835
+ :param limit: api_fun must accept limit argument in order to update the limit value
836
+ :param offset: api_fun must accept offset argument in order to update the offset value
837
+ :param search_all: if False, only the first request is operated
838
+ :param list_attrs: option to aggregate DataFrame attrs field into lists. # False not tested
839
+ :param kwargs: additional keyword arguments to pass to api_fun
840
+ :return:
841
+ """
842
+ start = time.time()
843
+ iter = self._request_all_results_generator(api_fun=api_fun, params=params,
844
+ limit=limit, offset=offset, search_all=search_all, **kwargs)
845
+ requests_count = 1
846
+ df = next(iter)
847
+ if list_attrs:
848
+ df.attrs = {key: [value] for key, value in df.attrs.items()}
849
+ for df_add in iter:
850
+ requests_count += 1
851
+ if len(df_add) > 0:
852
+ if list_attrs:
853
+ attrs = df.attrs
854
+ df = pd.concat([df, df_add])
855
+ if list_attrs:
856
+ df.attrs = {key: value + [df_add.attrs[key]] for key, value in attrs.items()}
857
+ current = time.time()
858
+ df.attrs["requests_count"] = requests_count
859
+ df.attrs["elapsed_time"] = (current - start)
860
+ return df
861
+
862
+ def _request_all_results_list(self, api_fun:Callable, *, params:dict=None,
863
+ limit:int=None, offset:int=0, search_all:bool=True, **kwargs) -> Union[List[CkanActionResponse], list]:
864
+ """
865
+ Multiply request with a limited length until no more data is transmitted thanks to the offset parameter.
866
+ List implementation returns the list of the unitary function return values.
867
+
868
+ :param api_fun: function to call, typically a unitary request function
869
+ :param params: api_fun must accept params argument in order to transmit other values and enforce the offset parameter
870
+ :param limit: api_fun must accept limit argument in order to update the limit value
871
+ :param offset: api_fun must accept offset argument in order to update the offset value
872
+ :param search_all: if False, only the first request is operated
873
+ :param kwargs: additional keyword arguments to pass to api_fun
874
+ :return:
875
+ """
876
+ return list(self._request_all_results_generator(api_fun=api_fun, params=params, limit=limit, offset=offset,
877
+ search_all=search_all, **kwargs))
878
+
879
+ def is_url_internal(self, url:str) -> bool:
880
+ """
881
+ Tests whether a url points to the same server as the CKAN url.
882
+
883
+ :param url:
884
+ :return:
885
+ """
886
+ # TODO: improve the url matching test
887
+ return url.startswith(self.url)
888
+
889
+ def test_ckan_url_reachable(self, raise_error:bool=False) -> bool:
890
+ """
891
+ Test if the CKAN URL is reachable with a HEAD request.
892
+ This does not check it is really a CKAN server and does not check authentication.
893
+ """
894
+ error_message = self.download_url_proxy_test_head(self.url, raise_error=raise_error, context="CKAN URL test")
895
+ return error_message is None
896
+