ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1028 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
from typing import List, Dict, Callable, Union, Any, Generator, Sequence, Tuple, Collection
|
|
7
|
+
from collections import OrderedDict
|
|
8
|
+
import time
|
|
9
|
+
import copy
|
|
10
|
+
from warnings import warn
|
|
11
|
+
import argparse
|
|
12
|
+
|
|
13
|
+
from ckanapi_harvesters.auxiliary.ckan_model import (CkanPackageInfo, CkanLicenseInfo, CkanDataStoreInfo, CkanResourceInfo,
|
|
14
|
+
CkanOrganizationInfo, CkanViewInfo, CkanField, CkanUserInfo,
|
|
15
|
+
CkanGroupInfo, CkanCollaboration, CkanCapacity)
|
|
16
|
+
from ckanapi_harvesters.auxiliary.urls import urlsep, url_join
|
|
17
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import RequestType
|
|
18
|
+
from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
|
|
19
|
+
from ckanapi_harvesters.auxiliary.ckan_action import CkanActionResponse, CkanActionError, CkanNotFoundError
|
|
20
|
+
from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
|
|
21
|
+
from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
|
|
22
|
+
from ckanapi_harvesters.ckan_api.ckan_api_params import CkanApiParamsBasic
|
|
23
|
+
from ckanapi_harvesters.ckan_api.ckan_api_0_base import CkanApiBase, use_ckan_owner_org_as_default
|
|
24
|
+
from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_upload_2_geom import CkanDataCleanerUploadGeom
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## Main class ------------------
|
|
28
|
+
class CkanApiMap(CkanApiBase):
|
|
29
|
+
"""
|
|
30
|
+
CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
|
|
31
|
+
This class implements the resource mapping capabilities to obtain resource ids necessary for the requests.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, url:str=None, *, proxies:Union[str,dict,ProxyConfig]=None,
|
|
35
|
+
apikey:Union[str,CkanApiKey]=None, apikey_file:str=None,
|
|
36
|
+
owner_org:str=None, params:CkanApiParamsBasic=None,
|
|
37
|
+
map:CkanMap=None,
|
|
38
|
+
identifier=None):
|
|
39
|
+
"""
|
|
40
|
+
CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
|
|
41
|
+
|
|
42
|
+
:param url: url of the CKAN server
|
|
43
|
+
:param proxies: proxies to use for requests
|
|
44
|
+
:param apikey: way to provide the API key directly (optional)
|
|
45
|
+
:param apikey_file: path to a file containing a valid API key in the first line of text (optional)
|
|
46
|
+
:param owner_org: name of the organization to limit package_search (optional)
|
|
47
|
+
:param params: other connection/behavior parameters
|
|
48
|
+
:param map: map of known resources
|
|
49
|
+
:param identifier: identifier of the ckan client
|
|
50
|
+
"""
|
|
51
|
+
super().__init__(url=url, proxies=proxies, apikey=apikey, apikey_file=apikey_file,
|
|
52
|
+
owner_org=owner_org, params=params, identifier=identifier)
|
|
53
|
+
if map is None:
|
|
54
|
+
map = CkanMap()
|
|
55
|
+
self.map: CkanMap = map
|
|
56
|
+
|
|
57
|
+
def copy(self, new_identifier:str=None, *, dest=None):
|
|
58
|
+
"""
|
|
59
|
+
Returns a copy of the current instance.
|
|
60
|
+
Useful to use an initialized ckan object in a multithreaded context. Each thread would have its own copy.
|
|
61
|
+
It is recommended to purge the last response before doing a copy (with purge_map=False)
|
|
62
|
+
"""
|
|
63
|
+
if dest is None:
|
|
64
|
+
dest = CkanApiMap()
|
|
65
|
+
super().copy(dest=dest)
|
|
66
|
+
dest.map = self.map.copy()
|
|
67
|
+
return dest
|
|
68
|
+
|
|
69
|
+
def purge(self, purge_map:bool=False) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Erase temporary data stored in this object
|
|
72
|
+
|
|
73
|
+
:param purge_map: whether to purge the map created with map_resources
|
|
74
|
+
"""
|
|
75
|
+
super().purge()
|
|
76
|
+
if purge_map:
|
|
77
|
+
if self.params.verbose_request:
|
|
78
|
+
print("CKAN purge map")
|
|
79
|
+
self.map.purge()
|
|
80
|
+
|
|
81
|
+
def set_owner_org(self, owner_org:str, *, error_not_found:bool=True) -> None:
|
|
82
|
+
"""
|
|
83
|
+
Set the default owner organization.
|
|
84
|
+
|
|
85
|
+
:param owner_org: owner organization name, title or id.
|
|
86
|
+
:return:
|
|
87
|
+
"""
|
|
88
|
+
if owner_org is None:
|
|
89
|
+
self.owner_org = None
|
|
90
|
+
else:
|
|
91
|
+
organization_info = self.get_organization_info_or_request(owner_org, error_not_found=error_not_found)
|
|
92
|
+
self.owner_org = organization_info.get_owner_org() if organization_info is not None else None
|
|
93
|
+
|
|
94
|
+
def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
95
|
+
parser = super()._setup_cli_ckan_parser(parser=parser)
|
|
96
|
+
parser.add_argument("--owner-org", type=str,
|
|
97
|
+
help="CKAN Owner Organization")
|
|
98
|
+
return parser
|
|
99
|
+
|
|
100
|
+
def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None, error_not_found:bool=True,
|
|
101
|
+
default_proxies:dict=None, proxy_headers:dict=None) -> None:
|
|
102
|
+
super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
|
|
103
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
104
|
+
if args.owner_org is not None:
|
|
105
|
+
self.set_owner_org(args.owner_org)
|
|
106
|
+
print(args)
|
|
107
|
+
|
|
108
|
+
def input_missing_info(self, *, base_dir:str=None, input_args:bool=False, input_args_if_necessary:bool=False,
|
|
109
|
+
input_apikey:bool=True, error_not_found:bool=True, input_owner_org:bool=False):
|
|
110
|
+
"""
|
|
111
|
+
Ask user information in the console window.
|
|
112
|
+
|
|
113
|
+
:param input_owner_org: option to ask for the owner organization.
|
|
114
|
+
:return:
|
|
115
|
+
"""
|
|
116
|
+
super().input_missing_info(base_dir=base_dir, input_args=input_args,
|
|
117
|
+
input_args_if_necessary=input_args_if_necessary,
|
|
118
|
+
input_apikey=input_apikey, error_not_found=error_not_found)
|
|
119
|
+
if self.owner_org is None and input_owner_org:
|
|
120
|
+
owner_org = input("Please enter owner organization name or title: ")
|
|
121
|
+
self.set_owner_org(owner_org)
|
|
122
|
+
|
|
123
|
+
## Resource mapping ------------------
|
|
124
|
+
def _enrich_resource_info(self, resource_info:CkanResourceInfo, *,
|
|
125
|
+
datastore_info:bool=False, resource_view_list:bool=False) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Perform additional optional queries to add more information on a resource.
|
|
128
|
+
|
|
129
|
+
:param resource_info:
|
|
130
|
+
:param datastore_info: option to query datastore_info
|
|
131
|
+
:param resource_view_list: option to query resource_view_list
|
|
132
|
+
:return:
|
|
133
|
+
"""
|
|
134
|
+
resource_id = resource_info.id
|
|
135
|
+
resource_name = resource_info.name
|
|
136
|
+
if datastore_info:
|
|
137
|
+
try:
|
|
138
|
+
db_info = self.datastore_info(resource_id, display_request_not_found=False)
|
|
139
|
+
resource_info.datastore_info = db_info
|
|
140
|
+
resource_info.datastore_info_error = None
|
|
141
|
+
except Exception as e:
|
|
142
|
+
resource_info.datastore_info = None
|
|
143
|
+
resource_info.datastore_info_error = {"error": str(e)}
|
|
144
|
+
else:
|
|
145
|
+
resource_info.datastore_info = None
|
|
146
|
+
if resource_view_list:
|
|
147
|
+
resource_info.update_view(self.resource_view_list(resource_id), view_list=True)
|
|
148
|
+
else:
|
|
149
|
+
resource_info.views = None
|
|
150
|
+
|
|
151
|
+
def set_default_map_mode(self, datastore_info:bool=None, resource_view_list:bool=None,
|
|
152
|
+
organization_info:bool=None, license_list:bool=None) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Set up the optional queries orchestrated by the map_resources function
|
|
155
|
+
|
|
156
|
+
:param datastore_info:
|
|
157
|
+
:param resource_view_list:
|
|
158
|
+
:param organization_info:
|
|
159
|
+
:param license_list:
|
|
160
|
+
:return:
|
|
161
|
+
"""
|
|
162
|
+
if datastore_info is None:
|
|
163
|
+
datastore_info = self.map._mapping_query_datastore_info
|
|
164
|
+
if resource_view_list is None:
|
|
165
|
+
resource_view_list = self.map._mapping_query_resource_view_list
|
|
166
|
+
if license_list is None:
|
|
167
|
+
license_list = self.map._mapping_query_license_list
|
|
168
|
+
if organization_info is None:
|
|
169
|
+
organization_info = self.map._mapping_query_organization_info
|
|
170
|
+
self.map._mapping_query_datastore_info = datastore_info
|
|
171
|
+
self.map._mapping_query_resource_view_list = resource_view_list
|
|
172
|
+
self.map._mapping_query_license_list = license_list
|
|
173
|
+
self.map._mapping_query_organization_info = organization_info
|
|
174
|
+
|
|
175
|
+
def complete_package_list(self, package_list:Union[str, List[str]]=None,
|
|
176
|
+
*, owner_org:str=None, params:dict=None) -> List[str]:
|
|
177
|
+
"""
|
|
178
|
+
This function can list all packages of a CKAN server, for an organization or keeps the list as is.
|
|
179
|
+
It is an auxiliary function to initialize a package_list argument
|
|
180
|
+
"""
|
|
181
|
+
if package_list is None:
|
|
182
|
+
package_info_list = self.package_search_all(owner_org=owner_org, params=params)
|
|
183
|
+
package_list = [e.id for e in package_info_list]
|
|
184
|
+
if isinstance(package_list, str):
|
|
185
|
+
package_list = [package_list]
|
|
186
|
+
return package_list
|
|
187
|
+
|
|
188
|
+
def map_resources(self, package_list:Union[str, List[str]]=None, *, params:dict=None,
|
|
189
|
+
datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None, license_list:bool=None,
|
|
190
|
+
only_missing:bool=True, error_not_found:bool=True,
|
|
191
|
+
owner_org:str=None) -> CkanMap:
|
|
192
|
+
"""
|
|
193
|
+
Map the resources of a given package to obtain resource ids associated with the package name and resources within the pacakge.
|
|
194
|
+
NB: Packages were previously referred to as DataSets in previous CKAN implementations.
|
|
195
|
+
A same name can be shared between multiple resources within a package. The first occurrence is used as a reference
|
|
196
|
+
and a warning is issued in this case.
|
|
197
|
+
|
|
198
|
+
:param package_list: list of packages to request (optional, by default, the result of package_search is used)
|
|
199
|
+
:param params: optional parameters to pass to API calls (not recommended)
|
|
200
|
+
:param datastore_info: option to enable the request of api_datastore_info. This will return information about
|
|
201
|
+
the DataStore fields, aliases and row count. It is required to enable search of a DataStore by alias.
|
|
202
|
+
:param resource_view_list: option to enable the request of view_list API for each resource
|
|
203
|
+
:param organization_info: option to enable the request of organization_list API before any other request
|
|
204
|
+
:param license_list: option to enable the request of license_list API
|
|
205
|
+
:param only_missing: option to disable the request of already mapped packages
|
|
206
|
+
:param error_not_found: option to ignore the packages which were not found by the API (do not raise an error)
|
|
207
|
+
:param owner_org: option to filter packages of a given organization (only if package_search is used)
|
|
208
|
+
:return:
|
|
209
|
+
"""
|
|
210
|
+
start = time.time()
|
|
211
|
+
self.set_default_map_mode(datastore_info=datastore_info, resource_view_list=resource_view_list,
|
|
212
|
+
organization_info=organization_info, license_list=license_list)
|
|
213
|
+
datastore_info = self.map._mapping_query_datastore_info
|
|
214
|
+
resource_view_list = self.map._mapping_query_resource_view_list
|
|
215
|
+
license_list = self.map._mapping_query_license_list
|
|
216
|
+
organization_info = self.map._mapping_query_organization_info
|
|
217
|
+
|
|
218
|
+
if organization_info:
|
|
219
|
+
if owner_org is None:
|
|
220
|
+
self.organization_list_all(cancel_if_present=True)
|
|
221
|
+
else:
|
|
222
|
+
self.get_organization_info_or_request(owner_org)
|
|
223
|
+
|
|
224
|
+
package_list = self.complete_package_list(package_list=package_list, owner_org=owner_org, params=params)
|
|
225
|
+
|
|
226
|
+
for name in package_list:
|
|
227
|
+
pkg_info = self.map.get_package_info(name, error_not_mapped=False)
|
|
228
|
+
if ((not only_missing) or pkg_info is None
|
|
229
|
+
or (datastore_info and not pkg_info.requested_datastore_info)):
|
|
230
|
+
try:
|
|
231
|
+
pkg_info = self.package_show(name, params=params)
|
|
232
|
+
except CkanNotFoundError as e:
|
|
233
|
+
if error_not_found:
|
|
234
|
+
raise e from e # rethrow
|
|
235
|
+
else:
|
|
236
|
+
continue
|
|
237
|
+
package_id = pkg_info.id
|
|
238
|
+
package_name = pkg_info.name
|
|
239
|
+
pkg_info.resources_id_index = {} # reset
|
|
240
|
+
for j, resource_info in enumerate(pkg_info.package_resources.values()):
|
|
241
|
+
resource_id = resource_info.id
|
|
242
|
+
resource_name = resource_info.name
|
|
243
|
+
resource_info.index_in_package = j
|
|
244
|
+
self._enrich_resource_info(resource_info, datastore_info=datastore_info, resource_view_list=resource_view_list)
|
|
245
|
+
if resource_name not in pkg_info.resources_id_index.keys():
|
|
246
|
+
pkg_info.resources_id_index[resource_name] = resource_id
|
|
247
|
+
pkg_info.resources_id_index_counts[resource_name] = 1
|
|
248
|
+
else:
|
|
249
|
+
pkg_info.resources_id_index_counts[resource_name] += 1
|
|
250
|
+
msg = "Two or more resources with same name {} were found during mapping.".format(resource_name)
|
|
251
|
+
warn(msg)
|
|
252
|
+
pkg_info.requested_datastore_info = datastore_info
|
|
253
|
+
if pkg_info.organization_info is not None:
|
|
254
|
+
self.map._update_organization_info(pkg_info.organization_info)
|
|
255
|
+
self.map.packages_id_index[package_name] = package_id
|
|
256
|
+
self.map.packages[package_id] = pkg_info
|
|
257
|
+
self.map.resources.update({resource_info.id: resource_info for resource_info in pkg_info.package_resources.values()})
|
|
258
|
+
if license_list:
|
|
259
|
+
self._api_license_list(params=params)
|
|
260
|
+
current = time.time()
|
|
261
|
+
if self.params.verbose_multi_requests:
|
|
262
|
+
print(f"{self.identifier} Resources mapped in {current - start} seconds")
|
|
263
|
+
return self.map.copy()
|
|
264
|
+
|
|
265
|
+
def remap_resources(self, *, params=None, purge:bool=True,
|
|
266
|
+
datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None, license_list:bool=None):
|
|
267
|
+
"""
|
|
268
|
+
Perform a new request on previously mapped packages.
|
|
269
|
+
|
|
270
|
+
:param params:
|
|
271
|
+
:param purge: option to reset the map before remapping.
|
|
272
|
+
:param datastore_info: enforce the request of api_datastore_info
|
|
273
|
+
:param resource_view_list: enforce the request of view_list API for each resource
|
|
274
|
+
:param license_list: enforce the request of license_list API
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
package_list = list(self.map.packages_id_index.keys())
|
|
278
|
+
if purge:
|
|
279
|
+
self.map.purge()
|
|
280
|
+
return self.map_resources(package_list, params=params,
|
|
281
|
+
datastore_info=datastore_info, resource_view_list=resource_view_list,
|
|
282
|
+
organization_info=organization_info, license_list=license_list)
|
|
283
|
+
|
|
284
|
+
def get_resource_id_or_request(self, resource_name:str, package_name:str, *,
|
|
285
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
286
|
+
error_not_found:bool=True) -> Union[str,None]:
|
|
287
|
+
resource_id = self.map.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
288
|
+
if resource_id is None and request_missing:
|
|
289
|
+
if package_name is not None:
|
|
290
|
+
self.map_resources(package_name)
|
|
291
|
+
resource_id = self.map.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
292
|
+
else:
|
|
293
|
+
try:
|
|
294
|
+
resource_info = self.resource_show(resource_id)
|
|
295
|
+
resource_id = resource_info.id
|
|
296
|
+
except CkanNotFoundError as e:
|
|
297
|
+
if error_not_found:
|
|
298
|
+
raise e from e
|
|
299
|
+
else:
|
|
300
|
+
resource_id = None
|
|
301
|
+
return resource_id
|
|
302
|
+
|
|
303
|
+
def get_resource_info_or_request(self, resource_name:str, package_name:str=None, *,
|
|
304
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
305
|
+
error_not_found:bool=True) -> Union[CkanResourceInfo,None]:
|
|
306
|
+
resource_id = self.get_resource_id_or_request(resource_name, package_name, error_not_mapped=error_not_mapped,
|
|
307
|
+
request_missing=request_missing, error_not_found=error_not_found)
|
|
308
|
+
if resource_id is None:
|
|
309
|
+
return None
|
|
310
|
+
return self.get_resource_info_or_request_of_id(resource_id, request_missing=request_missing,
|
|
311
|
+
error_not_mapped=error_not_mapped, error_not_found=error_not_found)
|
|
312
|
+
|
|
313
|
+
def get_resource_info_or_request_of_id(self, resource_id:str, *,
|
|
314
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
315
|
+
error_not_found:bool=True) -> Union[CkanResourceInfo,None]:
|
|
316
|
+
"""
|
|
317
|
+
Get information on a resource if present in the map or perform request.
|
|
318
|
+
Recommended: self.map.get_resource_info() rather than this for this usage because resource information is returned
|
|
319
|
+
when calling package_info during the mapping process.
|
|
320
|
+
|
|
321
|
+
:param resource_id: resource id
|
|
322
|
+
:param request_missing: confirm to perform the request if the information is missing
|
|
323
|
+
:param error_not_mapped: raise error if the resource is not mapped
|
|
324
|
+
:return:
|
|
325
|
+
"""
|
|
326
|
+
resource_info = self.map.get_resource_info(resource_id, error_not_mapped=error_not_mapped)
|
|
327
|
+
if resource_info is not None:
|
|
328
|
+
return resource_info
|
|
329
|
+
elif request_missing:
|
|
330
|
+
try:
|
|
331
|
+
return self.resource_show(resource_id)
|
|
332
|
+
except CkanNotFoundError as e:
|
|
333
|
+
if error_not_found:
|
|
334
|
+
raise e from e
|
|
335
|
+
else:
|
|
336
|
+
return None
|
|
337
|
+
else:
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
def get_datastore_info_or_request(self, resource_name:str, package_name:str=None, *,
|
|
341
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
342
|
+
error_not_found:bool=True) -> Union[CkanDataStoreInfo,None]:
|
|
343
|
+
"""
|
|
344
|
+
Get information on a DataStore if present in the map or perform request.
|
|
345
|
+
|
|
346
|
+
:param resource_name: resource name or id
|
|
347
|
+
:param package_name: package name or id (required if the resource name is provided)
|
|
348
|
+
:param request_missing: confirm to perform the request if the information is missing
|
|
349
|
+
:param error_not_mapped: raise error if the resource is not mapped
|
|
350
|
+
:return:
|
|
351
|
+
"""
|
|
352
|
+
resource_id = self.map.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
353
|
+
if resource_id is None and request_missing and package_name is not None:
|
|
354
|
+
self.map_resources(package_name, error_not_found=error_not_found)
|
|
355
|
+
resource_id = self.map.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
356
|
+
if resource_id is not None:
|
|
357
|
+
return self.get_datastore_info_or_request_of_id(resource_id, request_missing=request_missing, error_not_found=error_not_found)
|
|
358
|
+
else:
|
|
359
|
+
return None # resource not mapped
|
|
360
|
+
|
|
361
|
+
def get_datastore_info_or_request_of_id(self, resource_id: str, *,
|
|
362
|
+
request_missing: bool = True, error_not_mapped: bool = False,
|
|
363
|
+
error_not_found: bool = True) -> Union[CkanDataStoreInfo, None]:
|
|
364
|
+
"""
|
|
365
|
+
Get information on a DataStore if present in the map or perform request.
|
|
366
|
+
|
|
367
|
+
:param resource_id: resource id
|
|
368
|
+
:param request_missing: confirm to perform the request if the information is missing
|
|
369
|
+
:param error_not_mapped: raise error if the resource is not mapped
|
|
370
|
+
:return:
|
|
371
|
+
"""
|
|
372
|
+
datastore_info = self.map.get_datastore_info(resource_id, error_not_mapped=False)
|
|
373
|
+
if datastore_info is not None:
|
|
374
|
+
return datastore_info
|
|
375
|
+
elif request_missing:
|
|
376
|
+
try:
|
|
377
|
+
return self.datastore_info(resource_id)
|
|
378
|
+
except CkanNotFoundError as e:
|
|
379
|
+
if error_not_found:
|
|
380
|
+
raise e from e
|
|
381
|
+
else:
|
|
382
|
+
return None
|
|
383
|
+
else:
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
def get_datastore_fields_or_request(self, resource_id:str, *,
|
|
387
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
388
|
+
error_not_found:bool=True, return_list:bool=False) -> Union[List[dict], OrderedDict[str,CkanField],None]:
|
|
389
|
+
datastore_info = self.get_datastore_info_or_request_of_id(resource_id, error_not_mapped=error_not_mapped,
|
|
390
|
+
request_missing=request_missing, error_not_found=error_not_found)
|
|
391
|
+
if datastore_info is not None and datastore_info.fields_dict is not None:
|
|
392
|
+
if not return_list:
|
|
393
|
+
return datastore_info.fields_dict
|
|
394
|
+
else:
|
|
395
|
+
return [field_info.to_ckan_dict() for field_info in datastore_info.fields_dict.values()]
|
|
396
|
+
else:
|
|
397
|
+
return None
|
|
398
|
+
|
|
399
|
+
def get_resource_view_list_or_request(self, resource_id:str, error_not_found:bool=True) -> Union[List[CkanViewInfo],None]:
|
|
400
|
+
"""
|
|
401
|
+
Returns either the resource view list which was already received or emits a new query for this information.
|
|
402
|
+
|
|
403
|
+
:param resource_id:
|
|
404
|
+
:param error_not_found:
|
|
405
|
+
:return:
|
|
406
|
+
"""
|
|
407
|
+
resource_info = self.get_resource_info_or_request_of_id(resource_id, error_not_found=error_not_found)
|
|
408
|
+
if resource_info is None:
|
|
409
|
+
return None
|
|
410
|
+
elif not resource_info.view_is_full_list:
|
|
411
|
+
resource_info.update_view(self.resource_view_list(resource_id))
|
|
412
|
+
return list(resource_info.views.values())
|
|
413
|
+
|
|
414
|
+
def get_package_info_or_request(self, package_name:str, *,
|
|
415
|
+
request_missing:bool=True, error_not_mapped:bool=False, error_not_found:bool=True,
|
|
416
|
+
datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None,
|
|
417
|
+
license_list:bool=None,) -> Union[CkanPackageInfo,None]:
|
|
418
|
+
"""
|
|
419
|
+
Get information on a Package if present in the map or perform request.
|
|
420
|
+
|
|
421
|
+
:param package_name: package name or id
|
|
422
|
+
:param request_missing: confirm to perform the request if the information is missing
|
|
423
|
+
:param error_not_mapped: raise error if the resource is not mapped
|
|
424
|
+
:return:
|
|
425
|
+
"""
|
|
426
|
+
package_info = self.map.get_package_info(package_name, error_not_mapped=error_not_mapped)
|
|
427
|
+
if package_info is not None:
|
|
428
|
+
return package_info
|
|
429
|
+
elif request_missing:
|
|
430
|
+
self.map_resources(package_name, error_not_found=error_not_found,
|
|
431
|
+
datastore_info=datastore_info, resource_view_list=resource_view_list,
|
|
432
|
+
organization_info=organization_info, license_list=license_list) # request DataStore information if parameterized for
|
|
433
|
+
return self.map.get_package_info(package_name, error_not_mapped=error_not_mapped)
|
|
434
|
+
else:
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
def get_organization_info_or_request(self, organization_name:str, *,
|
|
438
|
+
request_missing:bool=True, error_not_mapped:bool=False,
|
|
439
|
+
error_not_found:bool=True) -> Union[CkanOrganizationInfo,None]:
|
|
440
|
+
"""
|
|
441
|
+
Get information on a Package if present in the map or perform request.
|
|
442
|
+
|
|
443
|
+
:param organization_name: organization name or id
|
|
444
|
+
:param request_missing: confirm to perform the request if the information is missing
|
|
445
|
+
:param error_not_mapped: raise error if the resource is not mapped
|
|
446
|
+
:return:
|
|
447
|
+
"""
|
|
448
|
+
organization_info = self.map.get_organization_info(organization_name, error_not_mapped=error_not_mapped)
|
|
449
|
+
if organization_info is not None:
|
|
450
|
+
return organization_info
|
|
451
|
+
elif request_missing:
|
|
452
|
+
try:
|
|
453
|
+
return self.organization_show(organization_name)
|
|
454
|
+
except CkanNotFoundError as e:
|
|
455
|
+
if error_not_found:
|
|
456
|
+
raise e from e
|
|
457
|
+
else:
|
|
458
|
+
return None
|
|
459
|
+
else:
|
|
460
|
+
return None
|
|
461
|
+
|
|
462
|
+
## API calls needed to make the map and auxiliary API functions ------------------
|
|
463
|
+
def _api_package_search(self, *, params:dict=None, owner_org:str=None, filter:dict=None, q:str=None,
|
|
464
|
+
include_private:bool=True, include_drafts:bool=False, sort:str=None,
|
|
465
|
+
facet:bool=False, limit:int=None, offset:int=None) -> List[CkanPackageInfo]:
|
|
466
|
+
"""
|
|
467
|
+
API call to package_search.
|
|
468
|
+
|
|
469
|
+
:param owner_org: ability to filter packages by owner_org
|
|
470
|
+
:param filter: dict of filters to apply, which translate to the API fq argument
|
|
471
|
+
fq documentation: any filter queries to apply. Note: +site_id:{ckan_site_id} is added to this string prior to the query being executed.
|
|
472
|
+
:param q: the solr query. Optional. Default is '*:*'
|
|
473
|
+
:param include_private: if True, private datasets will be included in the results. Only private datasets from the user’s organizations will be returned and sysadmins will be returned all private datasets. Optional, the default is False in the API
|
|
474
|
+
:param include_drafts: if True, draft datasets will be included in the results. A user will only be returned their own draft datasets, and a sysadmin will be returned all draft datasets. Optional, the default is False.
|
|
475
|
+
:param sort: sorting of the search results. Optional. Default: 'score desc, metadata_modified desc'. As per the solr documentation, this is a comma-separated string of field names and sort-orderings.
|
|
476
|
+
:param facet: whether to enable faceted results. Default: True in API.
|
|
477
|
+
:param limit: maximum number of results to return. Translatees to the API rows argument.
|
|
478
|
+
:param offset: the offset in the complete result for where the set of returned datasets should begin. Translatees to the API start argument.
|
|
479
|
+
:param params: other parameters to pass to package_search
|
|
480
|
+
:return:
|
|
481
|
+
"""
|
|
482
|
+
if params is None: params = {}
|
|
483
|
+
if limit is None: limit = self.params.default_limit_list
|
|
484
|
+
if limit is not None:
|
|
485
|
+
params["rows"] = limit
|
|
486
|
+
if offset is not None:
|
|
487
|
+
params["start"] = offset
|
|
488
|
+
if owner_org is None and use_ckan_owner_org_as_default:
|
|
489
|
+
owner_org = self.owner_org
|
|
490
|
+
if owner_org is not None:
|
|
491
|
+
owner_org_info = self.get_organization_info_or_request(owner_org)
|
|
492
|
+
owner_org = owner_org_info.id
|
|
493
|
+
if filter is None: filter = {}
|
|
494
|
+
filter["owner_org"] = owner_org
|
|
495
|
+
if q is not None:
|
|
496
|
+
params["q"] = q
|
|
497
|
+
if filter is not None:
|
|
498
|
+
params["fq"] = '+'.join([f"{key}:{value}" for key, value in filter.items()])
|
|
499
|
+
if sort is not None:
|
|
500
|
+
params["sort"] = sort
|
|
501
|
+
if facet is not None:
|
|
502
|
+
params["facet"] = facet # what are facets?
|
|
503
|
+
if include_private is not None:
|
|
504
|
+
params["include_private"] = include_private
|
|
505
|
+
if include_drafts is not None:
|
|
506
|
+
params["include_drafts"] = include_drafts
|
|
507
|
+
response = self._api_action_request("package_search", method=RequestType.Get, params=params)
|
|
508
|
+
if response.dry_run:
|
|
509
|
+
return []
|
|
510
|
+
elif response.success:
|
|
511
|
+
package_info_list = [CkanPackageInfo(e) for e in response.result["results"]]
|
|
512
|
+
self.map._update_package_info(package_info_list)
|
|
513
|
+
return package_info_list
|
|
514
|
+
else:
|
|
515
|
+
raise response.default_error(self)
|
|
516
|
+
|
|
517
|
+
def _api_package_search_all(self, *, params:dict=None, owner_org:str=None, filter:dict=None, q:str=None,
|
|
518
|
+
include_private:bool=True, include_drafts:bool=False, sort:str=None,
|
|
519
|
+
facet:bool=False, limit:int=None, offset:int=None, search_all:bool=True) -> List[CkanPackageInfo]:
|
|
520
|
+
"""
|
|
521
|
+
API call to package_search until an empty list is received.
|
|
522
|
+
|
|
523
|
+
:see: _api_package_search()
|
|
524
|
+
:param owner_org: ability to filter packages by owner_org
|
|
525
|
+
:param filter: dict of filters to apply, which translate to the API fq argument
|
|
526
|
+
fq documentation: any filter queries to apply. Note: +site_id:{ckan_site_id} is added to this string prior to the query being executed.
|
|
527
|
+
:param q: the solr query. Optional. Default is '*:*'
|
|
528
|
+
:param include_private: if True, private datasets will be included in the results. Only private datasets from the user’s organizations will be returned and sysadmins will be returned all private datasets. Optional, the default is False in the API
|
|
529
|
+
:param include_drafts: if True, draft datasets will be included in the results. A user will only be returned their own draft datasets, and a sysadmin will be returned all draft datasets. Optional, the default is False.
|
|
530
|
+
:param sort: sorting of the search results. Optional. Default: 'score desc, metadata_modified desc'. As per the solr documentation, this is a comma-separated string of field names and sort-orderings.
|
|
531
|
+
:param facet: whether to enable faceted results. Default: True in API.
|
|
532
|
+
:param limit: maximum number of results to return. Translatees to the API rows argument.
|
|
533
|
+
:param offset: the offset in the complete result for where the set of returned datasets should begin. Translatees to the API start argument.
|
|
534
|
+
:param params: other parameters to pass to package_search
|
|
535
|
+
:return:
|
|
536
|
+
"""
|
|
537
|
+
if params is None: params = {}
|
|
538
|
+
responses = self._request_all_results_list(self._api_package_search, params=params, limit=limit, offset=offset,
|
|
539
|
+
owner_org=owner_org, filter=filter, q=q, sort=sort, facet=facet,
|
|
540
|
+
include_private=include_private, include_drafts=include_drafts,
|
|
541
|
+
search_all=search_all)
|
|
542
|
+
return sum(responses, [])
|
|
543
|
+
|
|
544
|
+
def package_search_all(self, *, params:dict=None, owner_org:str=None, filter:dict=None, q:str=None,
|
|
545
|
+
include_private:bool=True, include_drafts:bool=False, sort:str=None,
|
|
546
|
+
facet:bool=False, limit:int=None, offset:int=None, search_all:bool=True) -> List[CkanPackageInfo]:
|
|
547
|
+
# function alias
|
|
548
|
+
return self._api_package_search_all(params=params, owner_org=owner_org, filter=filter, q=q,
|
|
549
|
+
include_private=include_private, include_drafts=include_drafts, sort=sort,
|
|
550
|
+
facet=facet, limit=limit, offset=offset, search_all=search_all)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def _api_package_show(self, package_id, *, params:dict=None) -> CkanPackageInfo:
|
|
554
|
+
"""
|
|
555
|
+
API call to package_show. Returns the information on the package and the resources contained in the package.
|
|
556
|
+
Not recommended for outer use because this method does not return information about the DataStores. Prefer the map_resources method.
|
|
557
|
+
|
|
558
|
+
:see: map_resources()
|
|
559
|
+
:param package_id: package id.
|
|
560
|
+
:param params: See API documentation.
|
|
561
|
+
:return:
|
|
562
|
+
"""
|
|
563
|
+
if params is None: params = {}
|
|
564
|
+
params["id"] = package_id
|
|
565
|
+
response = self._api_action_request("package_show", method=RequestType.Get, params=params)
|
|
566
|
+
if response.success:
|
|
567
|
+
package_info = CkanPackageInfo(response.result)
|
|
568
|
+
# update map
|
|
569
|
+
self.map._update_package_info(package_info)
|
|
570
|
+
return package_info.copy()
|
|
571
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
572
|
+
raise CkanNotFoundError(self, "Package", response)
|
|
573
|
+
else:
|
|
574
|
+
raise response.default_error(self)
|
|
575
|
+
|
|
576
|
+
def package_show(self, package_id, *, params:dict=None) -> CkanPackageInfo:
|
|
577
|
+
# function alias
|
|
578
|
+
return self._api_package_show(package_id=package_id, params=params)
|
|
579
|
+
|
|
580
|
+
def _api_resource_show(self, resource_id, *, params:dict=None) -> CkanResourceInfo:
|
|
581
|
+
"""
|
|
582
|
+
API call to resource_show. Returns the metadata on a resource.
|
|
583
|
+
|
|
584
|
+
:param resource_id: resource id.
|
|
585
|
+
:param params: See API documentation.
|
|
586
|
+
:return:
|
|
587
|
+
"""
|
|
588
|
+
if params is None: params = {}
|
|
589
|
+
params["id"] = resource_id
|
|
590
|
+
response = self._api_action_request("resource_show", method=RequestType.Get, params=params)
|
|
591
|
+
if response.success:
|
|
592
|
+
return CkanResourceInfo(response.result)
|
|
593
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
594
|
+
raise CkanNotFoundError(self, "Resource", response)
|
|
595
|
+
else:
|
|
596
|
+
raise response.default_error(self)
|
|
597
|
+
|
|
598
|
+
def resource_show(self, resource_id, *, params:dict=None) -> CkanResourceInfo:
|
|
599
|
+
# function alias
|
|
600
|
+
return self._api_resource_show(resource_id=resource_id, params=params)
|
|
601
|
+
|
|
602
|
+
def _api_datastore_info(self, resource_id:str, *, params:dict=None, display_request_not_found:bool=True) -> CkanDataStoreInfo:
|
|
603
|
+
"""
|
|
604
|
+
API call to datastore_info. Returns the information on the DataStore. Used to know the number of rows in a DataStore.
|
|
605
|
+
|
|
606
|
+
:param resource_id: resource id.
|
|
607
|
+
:param params: N/A
|
|
608
|
+
:param display_request_not_found: whether to display the request in the command window, in case of a CkanNotFoundError.
|
|
609
|
+
This option is recommended if you are testing whether the resource has a DataStore or not.
|
|
610
|
+
:return:
|
|
611
|
+
"""
|
|
612
|
+
if params is None: params = {}
|
|
613
|
+
params["id"] = resource_id
|
|
614
|
+
response = self._api_action_request("datastore_info", method=RequestType.Post, json=params)
|
|
615
|
+
if response.success:
|
|
616
|
+
datastore_info = CkanDataStoreInfo(response.result)
|
|
617
|
+
self.map._update_datastore_info(datastore_info)
|
|
618
|
+
return datastore_info.copy()
|
|
619
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
620
|
+
raise CkanNotFoundError(self, "DataStore", response, display_request=display_request_not_found)
|
|
621
|
+
else:
|
|
622
|
+
raise response.default_error(self)
|
|
623
|
+
|
|
624
|
+
def datastore_info(self, resource_id:str, *, params:dict=None, display_request_not_found:bool=True) -> CkanDataStoreInfo:
|
|
625
|
+
# function alias
|
|
626
|
+
return self._api_datastore_info(resource_id=resource_id, params=params, display_request_not_found=display_request_not_found)
|
|
627
|
+
|
|
628
|
+
def _api_resource_view_list(self, resource_id:str, *, params:dict=None) -> List[CkanViewInfo]:
|
|
629
|
+
"""
|
|
630
|
+
API call to resource_view_list.
|
|
631
|
+
|
|
632
|
+
:param params: typically, the request can be limited to an organization with the owner_org parameter
|
|
633
|
+
:return:
|
|
634
|
+
"""
|
|
635
|
+
if params is None:
|
|
636
|
+
params = {}
|
|
637
|
+
params["id"] = resource_id
|
|
638
|
+
response = self._api_action_request("resource_view_list", method=RequestType.Get, params=params)
|
|
639
|
+
if response.success:
|
|
640
|
+
view_info_list = [CkanViewInfo(view_dict) for view_dict in response.result]
|
|
641
|
+
self.map._update_view_info(view_info_list, view_list=True)
|
|
642
|
+
return copy.deepcopy(view_info_list)
|
|
643
|
+
else:
|
|
644
|
+
raise response.default_error(self)
|
|
645
|
+
|
|
646
|
+
def resource_view_list(self, resource_id:str, *, params:dict=None) -> List[CkanViewInfo]:
|
|
647
|
+
# function alias
|
|
648
|
+
return self._api_resource_view_list(resource_id=resource_id, params=params)
|
|
649
|
+
|
|
650
|
+
def _api_organization_show(self, id:str, *, params:dict=None) -> CkanOrganizationInfo:
|
|
651
|
+
"""
|
|
652
|
+
API call to organization_show.
|
|
653
|
+
|
|
654
|
+
:param id: organization id or name.
|
|
655
|
+
:param params: typically, the request can be limited to an organization with the owner_org parameter
|
|
656
|
+
:return:
|
|
657
|
+
"""
|
|
658
|
+
if params is None: params = {}
|
|
659
|
+
if id is not None:
|
|
660
|
+
params["id"] = id
|
|
661
|
+
response = self._api_action_request("organization_show", method=RequestType.Get, params=params)
|
|
662
|
+
if response.success:
|
|
663
|
+
organization_info = CkanOrganizationInfo(response.result)
|
|
664
|
+
# update map
|
|
665
|
+
self.map._update_organization_info(organization_info)
|
|
666
|
+
return organization_info.copy()
|
|
667
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
668
|
+
raise CkanNotFoundError(self, "Organization", response)
|
|
669
|
+
else:
|
|
670
|
+
raise response.default_error(self)
|
|
671
|
+
|
|
672
|
+
def organization_show(self, id:str, *, params:dict=None) -> CkanOrganizationInfo:
|
|
673
|
+
# function alias
|
|
674
|
+
return self._api_organization_show(id=id, params=params)
|
|
675
|
+
|
|
676
|
+
def _api_organization_list(self, *, params:dict=None, all_fields:bool=True,
|
|
677
|
+
include_users:bool=False,
|
|
678
|
+
limit:int=None, offset:int=None) -> Union[List[CkanOrganizationInfo], List[str]]:
|
|
679
|
+
"""
|
|
680
|
+
API call to organization_list.
|
|
681
|
+
|
|
682
|
+
:param params: typically, the request can be limited to an organization with the owner_org parameter
|
|
683
|
+
:param all_fields: whether to return full information or only the organization names in a list
|
|
684
|
+
:return:
|
|
685
|
+
"""
|
|
686
|
+
if params is None: params = {}
|
|
687
|
+
if limit is None: limit = self.params.default_limit_list
|
|
688
|
+
if limit is not None:
|
|
689
|
+
params["limit"] = limit
|
|
690
|
+
if offset is not None:
|
|
691
|
+
params["offset"] = offset
|
|
692
|
+
params["all_fields"] = all_fields
|
|
693
|
+
params["include_users"] = include_users
|
|
694
|
+
response = self._api_action_request("organization_list", method=RequestType.Get, params=params)
|
|
695
|
+
if response.success:
|
|
696
|
+
if all_fields:
|
|
697
|
+
# returns a list of dicts
|
|
698
|
+
organization_list = [CkanOrganizationInfo(e) for e in response.result]
|
|
699
|
+
# update map
|
|
700
|
+
self.map._update_organization_info(organization_list)
|
|
701
|
+
return copy.deepcopy(organization_list)
|
|
702
|
+
else:
|
|
703
|
+
# returns a list of organization names
|
|
704
|
+
return response.result
|
|
705
|
+
else:
|
|
706
|
+
raise response.default_error(self)
|
|
707
|
+
|
|
708
|
+
def _api_organization_list_all(self, *, params:dict=None, all_fields:bool=True,
|
|
709
|
+
include_users:bool=False,
|
|
710
|
+
limit:int=None, offset:int=None) -> Union[List[CkanOrganizationInfo], List[str]]:
|
|
711
|
+
"""
|
|
712
|
+
API call to organization_list until an empty list is received.
|
|
713
|
+
|
|
714
|
+
:see: _api_organization_list()
|
|
715
|
+
:param params:
|
|
716
|
+
:return:
|
|
717
|
+
"""
|
|
718
|
+
if params is None: params = {}
|
|
719
|
+
responses = self._request_all_results_list(self._api_organization_list, params=params, limit=limit, offset=offset,
|
|
720
|
+
all_fields=all_fields, include_users=include_users)
|
|
721
|
+
self.map.organizations_listed_all = True
|
|
722
|
+
self.map.organizations_listed_all_users = include_users
|
|
723
|
+
return sum(responses, [])
|
|
724
|
+
|
|
725
|
+
def organization_list_all(self, *, cancel_if_present:bool=False, params:dict=None,
|
|
726
|
+
all_fields:bool=True, include_users:bool=False,
|
|
727
|
+
limit:int=None, offset:int=None) -> Union[List[CkanOrganizationInfo], List[str]]:
|
|
728
|
+
"""
|
|
729
|
+
API call to license_list.
|
|
730
|
+
The call can be canceled if the list is already present (not recommended, rather use get_organization_info_or_request).
|
|
731
|
+
|
|
732
|
+
:param params:
|
|
733
|
+
:param cancel_if_present: option to cancel when list is already present.
|
|
734
|
+
:return:
|
|
735
|
+
"""
|
|
736
|
+
if self.map.organizations_listed_all and cancel_if_present \
|
|
737
|
+
and self.map.organizations_listed_all_users == include_users:
|
|
738
|
+
return list(self.map.organizations.values())
|
|
739
|
+
else:
|
|
740
|
+
return self._api_organization_list_all(params=params, all_fields=all_fields, include_users=include_users, limit=limit, offset=offset)
|
|
741
|
+
|
|
742
|
+
def _api_license_list(self, *, params:dict=None) -> List[CkanLicenseInfo]:
|
|
743
|
+
"""
|
|
744
|
+
API call to license_list.
|
|
745
|
+
|
|
746
|
+
:param params:
|
|
747
|
+
:return:
|
|
748
|
+
"""
|
|
749
|
+
if params is None: params = {}
|
|
750
|
+
response = self._api_action_request(f"license_list", method=RequestType.Post, json=params)
|
|
751
|
+
if response.success:
|
|
752
|
+
license_list = [CkanLicenseInfo(license_dict) for license_dict in response.result]
|
|
753
|
+
# update map:
|
|
754
|
+
self.map._update_license_info(license_list)
|
|
755
|
+
return copy.deepcopy(license_list)
|
|
756
|
+
else:
|
|
757
|
+
raise response.default_error(self)
|
|
758
|
+
|
|
759
|
+
def license_list(self, *, cancel_if_present:bool=True, params:dict=None) -> List[CkanLicenseInfo]:
|
|
760
|
+
"""
|
|
761
|
+
API call to license_list. The call can be canceled if the list is already present.
|
|
762
|
+
|
|
763
|
+
:param params:
|
|
764
|
+
:param cancel_if_present: option to cancel when list is already present.
|
|
765
|
+
:return:
|
|
766
|
+
"""
|
|
767
|
+
if len(self.map.licenses) > 0 and cancel_if_present:
|
|
768
|
+
return list(self.map.licenses.values())
|
|
769
|
+
else:
|
|
770
|
+
return self._api_license_list(params=params)
|
|
771
|
+
|
|
772
|
+
def resource_is_datastore(self, resource_id:str) -> bool:
|
|
773
|
+
"""
|
|
774
|
+
Basic test to know whether a resource is DataStore.
|
|
775
|
+
|
|
776
|
+
:param resource_id:
|
|
777
|
+
:return:
|
|
778
|
+
"""
|
|
779
|
+
try:
|
|
780
|
+
datastore_info = self.datastore_info(resource_id, display_request_not_found=False)
|
|
781
|
+
except CkanNotFoundError as e:
|
|
782
|
+
return False
|
|
783
|
+
return True
|
|
784
|
+
|
|
785
|
+
def get_package_page_url(self, package_name:str, *, error_not_found:bool=True) -> str:
|
|
786
|
+
"""
|
|
787
|
+
Get URL of package presentation page in CKAN (landing page).
|
|
788
|
+
|
|
789
|
+
:param package_name:
|
|
790
|
+
:param error_not_found:
|
|
791
|
+
:return:
|
|
792
|
+
"""
|
|
793
|
+
self._error_empty_url()
|
|
794
|
+
package_info = self.get_package_info_or_request(package_name, error_not_found=error_not_found)
|
|
795
|
+
if package_info is not None:
|
|
796
|
+
url = url_join(self.url, "dataset" + urlsep + package_info.name)
|
|
797
|
+
else:
|
|
798
|
+
url = None
|
|
799
|
+
return url
|
|
800
|
+
|
|
801
|
+
def get_resource_page_url(self, resource_name:str, package_name:str=None,
|
|
802
|
+
*, error_not_mapped:bool=True) -> str:
|
|
803
|
+
"""
|
|
804
|
+
Get URL of resource presentation page in CKAN (landing page).
|
|
805
|
+
|
|
806
|
+
:param package_name:
|
|
807
|
+
:return:
|
|
808
|
+
"""
|
|
809
|
+
self._error_empty_url()
|
|
810
|
+
resource_info = self.map.get_resource_info(resource_name, package_name=package_name, error_not_mapped=error_not_mapped)
|
|
811
|
+
if resource_info is not None:
|
|
812
|
+
package_info = self.map.get_package_info(resource_info.package_id)
|
|
813
|
+
url = url_join(self.url, "dataset" + urlsep + package_info.name + urlsep + "resource" + urlsep + resource_info.id)
|
|
814
|
+
else:
|
|
815
|
+
url = None
|
|
816
|
+
return url
|
|
817
|
+
|
|
818
|
+
def test_ckan_connection(self, raise_error:bool=False) -> bool:
|
|
819
|
+
"""
|
|
820
|
+
Test if the CKAN URL aims to a CKAN server by testing the package_search API.
|
|
821
|
+
This does not check authentication.
|
|
822
|
+
"""
|
|
823
|
+
try:
|
|
824
|
+
self.package_search_all(limit=1, search_all=False)
|
|
825
|
+
except CkanActionError as e:
|
|
826
|
+
if e.status_code == 220:
|
|
827
|
+
if raise_error:
|
|
828
|
+
raise e from e
|
|
829
|
+
else:
|
|
830
|
+
return False
|
|
831
|
+
else:
|
|
832
|
+
raise e from e
|
|
833
|
+
return True
|
|
834
|
+
|
|
835
|
+
def _api_user_show(self, *, params:dict=None) -> Union[CkanUserInfo,None]:
|
|
836
|
+
"""
|
|
837
|
+
API call to user_show. With no params, returns the name of the current user logged in.
|
|
838
|
+
|
|
839
|
+
:return: dict with information on the current user
|
|
840
|
+
"""
|
|
841
|
+
if params is None: params = {}
|
|
842
|
+
response = self._api_action_request("user_show", method=RequestType.Get, params=params)
|
|
843
|
+
if response.success:
|
|
844
|
+
user_info = CkanUserInfo(response.result)
|
|
845
|
+
self.map._update_user_info(user_info)
|
|
846
|
+
return user_info.copy()
|
|
847
|
+
elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
|
|
848
|
+
raise CkanNotFoundError(self, "User", response)
|
|
849
|
+
else:
|
|
850
|
+
raise response.default_error(self)
|
|
851
|
+
|
|
852
|
+
def query_current_user(self, *, verbose:bool=None, error_not_found:bool=False) -> Union[CkanUserInfo,None]:
|
|
853
|
+
if verbose is None:
|
|
854
|
+
verbose = self.params.verbose_extra
|
|
855
|
+
try:
|
|
856
|
+
user_info = self._api_user_show()
|
|
857
|
+
except CkanNotFoundError as e:
|
|
858
|
+
if error_not_found:
|
|
859
|
+
raise e from e
|
|
860
|
+
else:
|
|
861
|
+
user_info = None
|
|
862
|
+
if verbose:
|
|
863
|
+
if user_info is not None:
|
|
864
|
+
print("Authenticated as " + user_info.name)
|
|
865
|
+
else:
|
|
866
|
+
print("User not authenticated")
|
|
867
|
+
return user_info
|
|
868
|
+
|
|
869
|
+
def test_ckan_login(self, *, raise_error:bool=False, verbose:bool=None,
|
|
870
|
+
empty_key_connected:bool=True) -> bool:
|
|
871
|
+
user_info = self.query_current_user(verbose=verbose, error_not_found=raise_error and not empty_key_connected)
|
|
872
|
+
if user_info is None:
|
|
873
|
+
if self.apikey.is_empty():
|
|
874
|
+
return empty_key_connected
|
|
875
|
+
else:
|
|
876
|
+
if raise_error:
|
|
877
|
+
raise ConnectionError("The current API key did not authenticate a user")
|
|
878
|
+
return False
|
|
879
|
+
else:
|
|
880
|
+
return True
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
## List users and groups
|
|
884
|
+
def _api_user_list(self, *, params:dict=None) -> List[CkanUserInfo]:
|
|
885
|
+
"""
|
|
886
|
+
API call to user_list.
|
|
887
|
+
|
|
888
|
+
:param params:
|
|
889
|
+
:return:
|
|
890
|
+
"""
|
|
891
|
+
if params is None: params = {}
|
|
892
|
+
response = self._api_action_request(f"user_list", method=RequestType.Post, json=params)
|
|
893
|
+
if response.success:
|
|
894
|
+
user_list = [CkanUserInfo(user_dict) for user_dict in response.result]
|
|
895
|
+
# update map:
|
|
896
|
+
self.map._update_user_info(user_list)
|
|
897
|
+
self.map.users_listed_all = True
|
|
898
|
+
return copy.deepcopy(user_list)
|
|
899
|
+
else:
|
|
900
|
+
raise response.default_error(self)
|
|
901
|
+
|
|
902
|
+
def user_list(self, *, cancel_if_present:bool=False, params:dict=None) -> List[CkanUserInfo]:
|
|
903
|
+
"""
|
|
904
|
+
API call to user_list. The call can be canceled if the list is already present.
|
|
905
|
+
|
|
906
|
+
:param params:
|
|
907
|
+
:param cancel_if_present: option to cancel when list is already present.
|
|
908
|
+
:return:
|
|
909
|
+
"""
|
|
910
|
+
if self.map.users_listed_all > 0 and cancel_if_present:
|
|
911
|
+
return list(self.map.users.values())
|
|
912
|
+
else:
|
|
913
|
+
return self._api_user_list(params=params)
|
|
914
|
+
|
|
915
|
+
def _api_package_collaborator_list(self, package_id:str, *, params:dict=None,
|
|
916
|
+
cancel_if_present:bool=False) -> Dict[str,CkanCollaboration]:
|
|
917
|
+
"""
|
|
918
|
+
API call to package_collaborator_list.
|
|
919
|
+
|
|
920
|
+
:param params:
|
|
921
|
+
:return:
|
|
922
|
+
"""
|
|
923
|
+
if cancel_if_present:
|
|
924
|
+
package_info = self.get_package_info_or_request(package_id)
|
|
925
|
+
if package_info.collaborators is not None:
|
|
926
|
+
return package_info.collaborators
|
|
927
|
+
if params is None: params = {}
|
|
928
|
+
params["id"] = package_id
|
|
929
|
+
response = self._api_action_request(f"package_collaborator_list", method=RequestType.Post, json=params)
|
|
930
|
+
if response.success:
|
|
931
|
+
package_info = self.get_package_info_or_request(package_id)
|
|
932
|
+
package_info.collaborators = {}
|
|
933
|
+
for collaborator_dict in response.result:
|
|
934
|
+
assert (collaborator_dict["package_id"] == package_id)
|
|
935
|
+
package_info.collaborators[collaborator_dict["user_id"]] = CkanCollaboration(d=collaborator_dict)
|
|
936
|
+
return package_info.collaborators
|
|
937
|
+
else:
|
|
938
|
+
raise response.default_error(self)
|
|
939
|
+
|
|
940
|
+
def package_collaborator_list(self, package_id:str, *, params:dict=None,
|
|
941
|
+
cancel_if_present:bool=False) -> Dict[str,CkanCollaboration]:
|
|
942
|
+
return self._api_package_collaborator_list(package_id=package_id, params=params,
|
|
943
|
+
cancel_if_present=cancel_if_present)
|
|
944
|
+
|
|
945
|
+
def _api_group_list(self, *, limit:int=None, offset:int=0,
|
|
946
|
+
all_fields:bool=True, include_users:bool=True,
|
|
947
|
+
params:dict=None) -> Union[List[CkanGroupInfo], List[str]]:
|
|
948
|
+
"""
|
|
949
|
+
API call to group_list.
|
|
950
|
+
|
|
951
|
+
:param params:
|
|
952
|
+
:return:
|
|
953
|
+
"""
|
|
954
|
+
if params is None: params = {}
|
|
955
|
+
if limit is None:
|
|
956
|
+
limit = self.params.default_limit_list
|
|
957
|
+
if limit is not None:
|
|
958
|
+
params["limit"] = limit
|
|
959
|
+
if offset is not None:
|
|
960
|
+
params["offset"] = offset
|
|
961
|
+
params["include_users"] = include_users
|
|
962
|
+
all_fields = all_fields or include_users
|
|
963
|
+
params["all_fields"] = all_fields
|
|
964
|
+
response = self._api_action_request(f"group_list", method=RequestType.Post, json=params)
|
|
965
|
+
if response.success:
|
|
966
|
+
if all_fields:
|
|
967
|
+
group_list = [CkanGroupInfo(group_dict) for group_dict in response.result]
|
|
968
|
+
# update map:
|
|
969
|
+
if include_users:
|
|
970
|
+
for group_info in group_list:
|
|
971
|
+
user_list = [CkanUserInfo(user_dict) for user_dict in group_info.details["users"]]
|
|
972
|
+
self.map._update_user_info(user_list)
|
|
973
|
+
group_info.user_members = {user_info.id: CkanCapacity.from_str(user_dict["capacity"]) for user_info, user_dict in zip(user_list, group_info.details["users"])}
|
|
974
|
+
self.map._update_group_info(group_list)
|
|
975
|
+
return copy.deepcopy(group_list)
|
|
976
|
+
else:
|
|
977
|
+
return response.result # list of names
|
|
978
|
+
else:
|
|
979
|
+
raise response.default_error(self)
|
|
980
|
+
|
|
981
|
+
def _api_group_list_all(self, *, all_fields:bool=True, include_users:bool=True, params:dict=None,
|
|
982
|
+
limit:int=None, offset:int=None) -> Union[List[CkanUserInfo], List[str]]:
|
|
983
|
+
"""
|
|
984
|
+
API call to group_list until an empty list is received.
|
|
985
|
+
|
|
986
|
+
:see: _api_group_list()
|
|
987
|
+
:param params:
|
|
988
|
+
:return:
|
|
989
|
+
"""
|
|
990
|
+
if params is None: params = {}
|
|
991
|
+
responses = self._request_all_results_list(self._api_group_list, params=params, limit=limit, offset=offset,
|
|
992
|
+
all_fields=all_fields, include_users=include_users)
|
|
993
|
+
self.map.groups_listed_all = True
|
|
994
|
+
return sum(responses, [])
|
|
995
|
+
|
|
996
|
+
def group_list_all(self, *, all_fields:bool=True, include_users:bool=True,
|
|
997
|
+
cancel_if_present:bool=False, params:dict=None,
|
|
998
|
+
limit:int=None, offset:int=None) -> Union[List[CkanGroupInfo], List[str]]:
|
|
999
|
+
"""
|
|
1000
|
+
API call to group_list.
|
|
1001
|
+
The call can be canceled if the list is already present (not recommended, rather use get_organization_info_or_request).
|
|
1002
|
+
|
|
1003
|
+
:param params:
|
|
1004
|
+
:param cancel_if_present: option to cancel when list is already present.
|
|
1005
|
+
:return:
|
|
1006
|
+
"""
|
|
1007
|
+
if self.map.groups_listed_all and cancel_if_present:
|
|
1008
|
+
return list(self.map.groups.values())
|
|
1009
|
+
else:
|
|
1010
|
+
return self._api_group_list_all(params=params, all_fields=all_fields, include_users=include_users, limit=limit, offset=offset)
|
|
1011
|
+
|
|
1012
|
+
def map_user_rights(self, cancel_if_present:bool=True):
|
|
1013
|
+
"""
|
|
1014
|
+
Map user and group access rights to the packages currently mapped by CKAN
|
|
1015
|
+
:return:
|
|
1016
|
+
"""
|
|
1017
|
+
self.group_list_all(cancel_if_present=cancel_if_present)
|
|
1018
|
+
self.user_list(cancel_if_present=cancel_if_present)
|
|
1019
|
+
for package_id, package_info in self.map.packages.items():
|
|
1020
|
+
self.package_collaborator_list(package_id, cancel_if_present=cancel_if_present)
|
|
1021
|
+
# merge collaborators with groups of the package
|
|
1022
|
+
package_info.user_access = package_info.collaborators.copy()
|
|
1023
|
+
for group in package_info.groups:
|
|
1024
|
+
group_info = self.map.groups[group.id]
|
|
1025
|
+
for user_id, user_capacity in group_info.user_members.items():
|
|
1026
|
+
if user_id not in package_info.user_access:
|
|
1027
|
+
package_info.user_access[user_id] = CkanCollaboration(user_capacity, None, group_id=group.id)
|
|
1028
|
+
return self.map
|