ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Data model to represent a CKAN database architecture
|
|
5
|
+
"""
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Dict, List, Union
|
|
8
|
+
from warnings import warn
|
|
9
|
+
import copy
|
|
10
|
+
|
|
11
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanResourceInfo, CkanState, CkanDataStoreInfo, \
|
|
12
|
+
CkanOrganizationInfo, CkanLicenseInfo, CkanViewInfo, CkanGroupInfo, CkanUserInfo
|
|
13
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import NotMappedObjectNameError, IntegrityError
|
|
14
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import assert_or_raise
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CkanMapABC(ABC):
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def purge(self):
|
|
20
|
+
raise NotImplementedError()
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def copy(self):
|
|
24
|
+
raise NotImplementedError()
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def to_dict(self) -> dict:
|
|
28
|
+
raise NotImplementedError()
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def update_from_dict(self, data:dict) -> None:
|
|
32
|
+
raise NotImplementedError()
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def from_dict(d: dict) -> "CkanMap":
|
|
37
|
+
raise NotImplementedError()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CkanMap(CkanMapABC):
|
|
41
|
+
"""
|
|
42
|
+
Class to store an image of the CKAN database architecture
|
|
43
|
+
Auxiliary class of CkanApi
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self):
|
|
47
|
+
self.packages:Dict[str,CkanPackageInfo] = {} # package id -> info
|
|
48
|
+
self.packages_id_index:Dict[str, str] = {} # package name -> id
|
|
49
|
+
self.packages_title_index:Dict[str, str] = {} # package title -> id
|
|
50
|
+
self.resources:Dict[str,CkanResourceInfo] = {} # resource id -> info
|
|
51
|
+
self.resource_alias_index:Dict[str,str] = {} # resource alias -> id
|
|
52
|
+
self.licenses:Dict[str,CkanLicenseInfo] = {} # license id -> info
|
|
53
|
+
self.licenses_title_index:Dict[str, str] = {} # license title -> id
|
|
54
|
+
self.organizations:Dict[str,CkanOrganizationInfo] = {} # organization id -> info
|
|
55
|
+
self.organizations_id_index:Dict[str, str] = {} # organization name -> id
|
|
56
|
+
self.organizations_title_index:Dict[str, str] = {} # organization title -> id
|
|
57
|
+
self.users:Dict[str,CkanUserInfo] = {} # user id -> info
|
|
58
|
+
self.users_id_index:Dict[str, str] = {} # user name -> id
|
|
59
|
+
self.groups:Dict[str,CkanGroupInfo] = {} # group id -> info
|
|
60
|
+
self.groups_id_index:Dict[str, str] = {} # group name -> id
|
|
61
|
+
self.groups_title_index:Dict[str, str] = {} # group title -> id
|
|
62
|
+
self.organizations_listed_all:bool = False
|
|
63
|
+
self.organizations_listed_all_users:bool = False
|
|
64
|
+
self.users_listed_all:bool = False
|
|
65
|
+
self.groups_listed_all:bool = False
|
|
66
|
+
self._mapping_query_datastore_info = False # default request for datastore_info during mapping operations
|
|
67
|
+
self._mapping_query_resource_view_list = False # False: do not request resource_view_list by default
|
|
68
|
+
self._mapping_query_license_list = False # False by default
|
|
69
|
+
self._mapping_query_organization_info = False # False by default
|
|
70
|
+
|
|
71
|
+
def purge(self):
|
|
72
|
+
"""
|
|
73
|
+
Erase known package mappings.
|
|
74
|
+
|
|
75
|
+
:return:
|
|
76
|
+
"""
|
|
77
|
+
self.packages:Dict[str,CkanPackageInfo] = {} # package id -> info
|
|
78
|
+
self.packages_id_index:Dict[str, str] = {} # package name -> id
|
|
79
|
+
self.packages_title_index:Dict[str, str] = {} # package title -> id
|
|
80
|
+
self.resources:Dict[str,CkanResourceInfo] = {} # resource id -> info
|
|
81
|
+
self.resource_alias_index:Dict[str,str] = {} # resource alias -> id
|
|
82
|
+
self.licenses:Dict[str,CkanLicenseInfo] = {} # license id -> info
|
|
83
|
+
self.licenses_title_index:Dict[str, str] = {} # license title -> id
|
|
84
|
+
self.organizations:Dict[str,CkanOrganizationInfo] = {} # organization id -> info
|
|
85
|
+
self.organizations_id_index:Dict[str, str] = {} # organization name -> id
|
|
86
|
+
self.organizations_title_index:Dict[str, str] = {} # organization title -> id
|
|
87
|
+
self.organizations_listed_all = False
|
|
88
|
+
|
|
89
|
+
def copy(self) -> "CkanMap":
|
|
90
|
+
return copy.deepcopy(self)
|
|
91
|
+
|
|
92
|
+
def to_dict(self) -> dict:
|
|
93
|
+
return {"packages":[package.to_dict() for package in self.packages.values()],
|
|
94
|
+
"licenses":[license.to_dict() for license in self.licenses.values()],
|
|
95
|
+
"organizations":[organization.to_dict() for organization in self.organizations.values()],
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def update_from_dict(self, data:dict) -> None:
|
|
99
|
+
for package_dict in data["packages"]:
|
|
100
|
+
self._update_package_info(CkanPackageInfo.from_dict(package_dict))
|
|
101
|
+
for license_dict in data["licenses"]:
|
|
102
|
+
self._update_license_info(CkanLicenseInfo.from_dict(license_dict))
|
|
103
|
+
for org_dict in data["organizations"]:
|
|
104
|
+
self._update_organization_info(CkanOrganizationInfo.from_dict(org_dict))
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def from_dict(d: dict) -> "CkanMap":
|
|
108
|
+
map = CkanMap()
|
|
109
|
+
map.update_from_dict(d)
|
|
110
|
+
return map
|
|
111
|
+
|
|
112
|
+
## Resource ID Map navigation ------------------
|
|
113
|
+
def get_package_id(self, package_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
|
|
114
|
+
"""
|
|
115
|
+
Retrieve the package id for a given package name based on the package map.
|
|
116
|
+
|
|
117
|
+
:param package_name: package name or id.
|
|
118
|
+
:return:
|
|
119
|
+
"""
|
|
120
|
+
if package_name is None:
|
|
121
|
+
raise ValueError("package_name cannot be None")
|
|
122
|
+
if package_name in self.packages.keys():
|
|
123
|
+
# recognized package_id
|
|
124
|
+
package_id = package_name
|
|
125
|
+
elif package_name in self.packages_id_index.keys():
|
|
126
|
+
package_id = self.packages_id_index[package_name]
|
|
127
|
+
elif search_title and package_name in self.packages_title_index.keys():
|
|
128
|
+
package_id = self.packages_title_index[package_name]
|
|
129
|
+
elif error_not_mapped:
|
|
130
|
+
raise NotMappedObjectNameError(f"Package {package_name} is not mapped or does not exist.")
|
|
131
|
+
else:
|
|
132
|
+
package_id = None
|
|
133
|
+
return package_id
|
|
134
|
+
|
|
135
|
+
def get_resource_id(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[str,None]:
|
|
136
|
+
"""
|
|
137
|
+
Retrieve the resource id for a given combination of (package name and resource name) based on the package map.
|
|
138
|
+
|
|
139
|
+
:param resource_name: resource alias, name or id.
|
|
140
|
+
:param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
|
|
141
|
+
:return:
|
|
142
|
+
"""
|
|
143
|
+
if resource_name in self.resources.keys():
|
|
144
|
+
# recognized resource_id
|
|
145
|
+
resource_id = resource_name
|
|
146
|
+
elif resource_name in self.resource_alias_index.keys():
|
|
147
|
+
# found resource alias
|
|
148
|
+
resource_id = self.resource_alias_index[resource_name]
|
|
149
|
+
else:
|
|
150
|
+
package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
|
|
151
|
+
if package_id is None and not error_not_mapped:
|
|
152
|
+
return None
|
|
153
|
+
if resource_name in self.packages[package_id].resources_id_index.keys():
|
|
154
|
+
resource_id = self.packages[package_id].resources_id_index[resource_name]
|
|
155
|
+
elif error_not_mapped:
|
|
156
|
+
raise NotMappedObjectNameError(f"Resource {resource_name} is not mapped or does not exist.")
|
|
157
|
+
else:
|
|
158
|
+
resource_id = None
|
|
159
|
+
# sanity check
|
|
160
|
+
if package_name is not None and resource_id is not None:
|
|
161
|
+
resource_info = self.resources[resource_id]
|
|
162
|
+
map_package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
|
|
163
|
+
if map_package_id is not None:
|
|
164
|
+
assert_or_raise(map_package_id == resource_info.package_id, IntegrityError("package_id"))
|
|
165
|
+
return resource_id
|
|
166
|
+
|
|
167
|
+
def get_organization_id(self, organization_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
|
|
168
|
+
"""
|
|
169
|
+
Retrieve the organization id for a given organization name based on the mapped data.
|
|
170
|
+
|
|
171
|
+
:param organization_name: organization name, title or id.
|
|
172
|
+
:return:
|
|
173
|
+
"""
|
|
174
|
+
if organization_name is None:
|
|
175
|
+
raise ValueError("organization_name cannot be None")
|
|
176
|
+
if organization_name in self.organizations.keys():
|
|
177
|
+
# recognized organization_id
|
|
178
|
+
organization_id = organization_name
|
|
179
|
+
elif organization_name in self.organizations_id_index.keys():
|
|
180
|
+
organization_id = self.organizations_id_index[organization_name]
|
|
181
|
+
elif search_title and organization_name in self.organizations_title_index.keys():
|
|
182
|
+
organization_id = self.organizations_title_index[organization_name]
|
|
183
|
+
elif error_not_mapped:
|
|
184
|
+
raise NotMappedObjectNameError(f"Organization {organization_name} is not mapped or does not exist.")
|
|
185
|
+
else:
|
|
186
|
+
organization_id = None
|
|
187
|
+
return organization_id
|
|
188
|
+
|
|
189
|
+
def get_resource_info(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[CkanResourceInfo,None]:
|
|
190
|
+
"""
|
|
191
|
+
Retrieve the information on a given resource.
|
|
192
|
+
|
|
193
|
+
:param resource_name: resource name or id.
|
|
194
|
+
:param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
|
|
195
|
+
:return:
|
|
196
|
+
"""
|
|
197
|
+
resource_id = self.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
198
|
+
if resource_id is not None:
|
|
199
|
+
return self.resources[resource_id]
|
|
200
|
+
else:
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
def get_package_info(self, package_name:str, *, error_not_mapped:bool=True) -> Union[CkanPackageInfo,None]:
|
|
204
|
+
"""
|
|
205
|
+
Retrieve the package info for a given package name based on the package map.
|
|
206
|
+
|
|
207
|
+
:param package_name: package name or id.
|
|
208
|
+
:return:
|
|
209
|
+
"""
|
|
210
|
+
package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
|
|
211
|
+
if package_id is not None:
|
|
212
|
+
return self.packages[package_id]
|
|
213
|
+
else:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def get_organization_info(self, organization_name:str, *, error_not_mapped:bool=True) -> Union[CkanOrganizationInfo,None]:
|
|
217
|
+
"""
|
|
218
|
+
Retrieve the organization info for a given organization name based on the mapped data.
|
|
219
|
+
|
|
220
|
+
:param organization_name: organization name or id.
|
|
221
|
+
:return:
|
|
222
|
+
"""
|
|
223
|
+
organization_id = self.get_organization_id(organization_name, error_not_mapped=error_not_mapped)
|
|
224
|
+
if organization_id is not None:
|
|
225
|
+
return self.organizations[organization_id]
|
|
226
|
+
else:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
def get_organization_for_owner_org(self, organization_name:str, *, error_not_mapped:bool=True) -> Union[CkanOrganizationInfo,None]:
|
|
230
|
+
"""
|
|
231
|
+
Retrieve the organization name for a given organization name based on the mapped data.
|
|
232
|
+
This is the field usually used for the owner_org argument. Calls CkanOrganizationInfo.get_owner_org
|
|
233
|
+
|
|
234
|
+
:param organization_name: organization name or id.
|
|
235
|
+
:return:
|
|
236
|
+
"""
|
|
237
|
+
organization_info = self.get_organization_info(organization_name, error_not_mapped=error_not_mapped)
|
|
238
|
+
if organization_info is not None:
|
|
239
|
+
return organization_info.get_owner_org()
|
|
240
|
+
else:
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
def get_resource_package_id(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[str,None]:
|
|
244
|
+
"""
|
|
245
|
+
Retrieve the package id of a given resource.
|
|
246
|
+
|
|
247
|
+
:param resource_name: resource name or id.
|
|
248
|
+
:param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
|
|
249
|
+
:return:
|
|
250
|
+
"""
|
|
251
|
+
resource_info = self.get_resource_info(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
252
|
+
if resource_info is not None:
|
|
253
|
+
return resource_info.package_id
|
|
254
|
+
else:
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
def get_datastore_info(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[CkanDataStoreInfo,None]:
|
|
258
|
+
"""
|
|
259
|
+
:param resource_name: resource name or id.
|
|
260
|
+
:param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
|
|
261
|
+
:return:
|
|
262
|
+
"""
|
|
263
|
+
resource_info = self.get_resource_info(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
264
|
+
if resource_info is not None:
|
|
265
|
+
if resource_info.datastore_info is not None:
|
|
266
|
+
return resource_info.datastore_info
|
|
267
|
+
elif error_not_mapped:
|
|
268
|
+
raise NotMappedObjectNameError(f"DataStore of resource {resource_name} is not mapped or does not exist.")
|
|
269
|
+
else:
|
|
270
|
+
return None
|
|
271
|
+
else:
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
def get_datastore_len(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[int,None]:
|
|
275
|
+
"""
|
|
276
|
+
Retrieve the number of rows in a DataStore from the mapped data. This requires the map_resources to be called with the option datastore_info=True.
|
|
277
|
+
|
|
278
|
+
:param resource_name: resource name or id.
|
|
279
|
+
:param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
|
|
280
|
+
:return:
|
|
281
|
+
"""
|
|
282
|
+
datastore_info = self.get_datastore_info(resource_name, package_name, error_not_mapped=error_not_mapped)
|
|
283
|
+
if datastore_info is not None:
|
|
284
|
+
return datastore_info.row_count
|
|
285
|
+
else:
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def _update_datastore_len(self, resource_id:str, new_len:int) -> None:
|
|
289
|
+
"""
|
|
290
|
+
Internal function to update the length of a DataStore without making a request.
|
|
291
|
+
|
|
292
|
+
:param resource_id: resource id.
|
|
293
|
+
:param new_len: value to replace
|
|
294
|
+
"""
|
|
295
|
+
resource_info = self.resources[resource_id]
|
|
296
|
+
package_id = resource_info.package_id
|
|
297
|
+
self.resources[resource_id].datastore_info.row_count = new_len
|
|
298
|
+
self.resources[resource_id].datastore_info.details["meta"]["count"] = new_len
|
|
299
|
+
self.packages[package_id].package_resources[resource_id].datastore_info.row_count = new_len
|
|
300
|
+
self.packages[package_id].package_resources[resource_id].datastore_info.details["meta"]["count"] = new_len
|
|
301
|
+
|
|
302
|
+
def _update_datastore_info(self, datastore_info:CkanDataStoreInfo) -> None:
|
|
303
|
+
"""
|
|
304
|
+
Internal function to update the length of a DataStore without making a request.
|
|
305
|
+
"""
|
|
306
|
+
resource_id = datastore_info.resource_id
|
|
307
|
+
if resource_id in self.resources.keys():
|
|
308
|
+
resource_info = self.resources[resource_id]
|
|
309
|
+
package_id = resource_info.package_id
|
|
310
|
+
self.resources[resource_id].datastore_info = datastore_info
|
|
311
|
+
self.packages[package_id].package_resources[resource_id].datastore_info = datastore_info
|
|
312
|
+
self.packages[package_id].resources_id_index[resource_info.name] = resource_id
|
|
313
|
+
if datastore_info is not None and datastore_info.aliases is not None:
|
|
314
|
+
self.resource_alias_index.update({alias: resource_id for alias in datastore_info.aliases})
|
|
315
|
+
|
|
316
|
+
def _update_resource_info(self, resource_info:Union[CkanResourceInfo, List[CkanResourceInfo]]) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Internal function to update the length of a DataStore without making a request.
|
|
319
|
+
"""
|
|
320
|
+
if not(isinstance(resource_info, list)):
|
|
321
|
+
resource_info = [resource_info]
|
|
322
|
+
for res_info in resource_info:
|
|
323
|
+
resource_id = res_info.id
|
|
324
|
+
package_id = res_info.package_id
|
|
325
|
+
res_info.index_in_package = None
|
|
326
|
+
if package_id in self.packages.keys():
|
|
327
|
+
self.packages[package_id].update_resource(res_info)
|
|
328
|
+
self.resources[resource_id] = res_info
|
|
329
|
+
if res_info.datastore_info is not None and res_info.datastore_info.aliases is not None:
|
|
330
|
+
self.resource_alias_index.update({alias: res_info.id for alias in res_info.datastore_info.aliases})
|
|
331
|
+
|
|
332
|
+
def _update_view_info(self, view_info:Union[CkanViewInfo, List[CkanViewInfo]], view_list:bool=False) -> None:
|
|
333
|
+
if isinstance(view_info, CkanViewInfo):
|
|
334
|
+
view_info = [view_info]
|
|
335
|
+
for view_info_update in view_info:
|
|
336
|
+
resource_id = view_info_update.resource_id
|
|
337
|
+
self.resources[resource_id].update_view(view_info_update, view_list=view_list)
|
|
338
|
+
|
|
339
|
+
def _update_package_info(self, package_info:Union[CkanPackageInfo, List[CkanPackageInfo]]) -> None:
|
|
340
|
+
"""
|
|
341
|
+
Internal function to update the information of a package.
|
|
342
|
+
|
|
343
|
+
NB: the indicator pkg_info.requested_datastore_info remains False until map_resources is called.
|
|
344
|
+
"""
|
|
345
|
+
if not(isinstance(package_info, list)):
|
|
346
|
+
package_info = [package_info]
|
|
347
|
+
# already done by __init__:
|
|
348
|
+
# for pkg_info in package_info:
|
|
349
|
+
# pkg_info.resources_id_index.update({resource_info.name: resource_info.id for resource_info in pkg_info.resources})
|
|
350
|
+
self.packages.update({pkg_info.id: pkg_info for pkg_info in package_info})
|
|
351
|
+
self.packages_id_index.update({pkg_info.name: pkg_info.id for pkg_info in package_info})
|
|
352
|
+
self.packages_title_index.update({pkg_info.title: pkg_info.id for pkg_info in package_info})
|
|
353
|
+
for pkg_info in package_info:
|
|
354
|
+
self.resources.update({resource_info.id: resource_info for resource_info in pkg_info.package_resources.values()})
|
|
355
|
+
for resource_info in pkg_info.package_resources.values():
|
|
356
|
+
if resource_info.datastore_info is not None and resource_info.datastore_info.aliases is not None:
|
|
357
|
+
self.resource_alias_index.update({alias: resource_info.id for alias in resource_info.datastore_info.aliases})
|
|
358
|
+
for pkg_info in package_info:
|
|
359
|
+
if pkg_info.organization_info is not None:
|
|
360
|
+
self._update_organization_info(pkg_info.organization_info)
|
|
361
|
+
if pkg_info.groups is not None:
|
|
362
|
+
self._update_group_info(pkg_info.groups)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def get_license_id(self, license_name: str, *, error_not_mapped: bool = True) -> str:
|
|
366
|
+
"""
|
|
367
|
+
Retrieve the ID of a license based on the mapped data.
|
|
368
|
+
|
|
369
|
+
:param license_name: license title or id.
|
|
370
|
+
:return:
|
|
371
|
+
"""
|
|
372
|
+
if license_name is None:
|
|
373
|
+
raise ValueError("license_name cannot be None")
|
|
374
|
+
if license_name in self.licenses.keys():
|
|
375
|
+
# recognized license_id
|
|
376
|
+
license_id = license_name
|
|
377
|
+
elif license_name in self.licenses_title_index.keys():
|
|
378
|
+
license_id = self.licenses_title_index[license_name]
|
|
379
|
+
elif error_not_mapped:
|
|
380
|
+
raise NotMappedObjectNameError(f"License {license_name} is not mapped or does not exist.")
|
|
381
|
+
else:
|
|
382
|
+
license_id = None
|
|
383
|
+
return license_id
|
|
384
|
+
|
|
385
|
+
def get_license_info(self, license_name: str, *, error_not_mapped: bool = True) -> Union[CkanLicenseInfo,None]:
|
|
386
|
+
"""
|
|
387
|
+
Retrieve the information on a license based on the mapped data.
|
|
388
|
+
|
|
389
|
+
:param license_name: license title or id.
|
|
390
|
+
:return:
|
|
391
|
+
"""
|
|
392
|
+
license_id = self.get_license_id(license_name, error_not_mapped=error_not_mapped)
|
|
393
|
+
if license_id is not None:
|
|
394
|
+
return self.licenses[license_id]
|
|
395
|
+
else:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
def _update_license_info(self, license_info: Union[CkanLicenseInfo, List[CkanLicenseInfo]]) -> None:
|
|
399
|
+
"""
|
|
400
|
+
Internal function to update the information on a license.
|
|
401
|
+
"""
|
|
402
|
+
if not (isinstance(license_info, list)):
|
|
403
|
+
license_info = [license_info]
|
|
404
|
+
self.licenses.update({license.id: license for license in license_info})
|
|
405
|
+
self.licenses_title_index.update({license.title: license.id for license in license_info})
|
|
406
|
+
|
|
407
|
+
## Package record changes ------------------
|
|
408
|
+
def _record_package_update(self, pkg_info: CkanPackageInfo) -> None:
|
|
409
|
+
package_id = pkg_info.id
|
|
410
|
+
package_name = pkg_info.name
|
|
411
|
+
self.packages[package_id].update(pkg_info)
|
|
412
|
+
self.packages_id_index[package_name] = package_id
|
|
413
|
+
self.packages_title_index[pkg_info.title] = package_id
|
|
414
|
+
|
|
415
|
+
def _record_package_create(self, pkg_info: CkanPackageInfo) -> None:
|
|
416
|
+
package_id = pkg_info.id
|
|
417
|
+
package_name = pkg_info.name
|
|
418
|
+
self.packages[package_id] = pkg_info
|
|
419
|
+
self.packages_id_index[package_name] = package_id
|
|
420
|
+
self.packages_title_index[pkg_info.title] = package_id
|
|
421
|
+
|
|
422
|
+
def _record_package_delete_state(self, package_id: str) -> None:
|
|
423
|
+
# only pass in delete state
|
|
424
|
+
pkg_info = self.get_package_info(package_id, error_not_mapped=False)
|
|
425
|
+
if pkg_info is not None:
|
|
426
|
+
pkg_info.state = CkanState.Deleted
|
|
427
|
+
|
|
428
|
+
def _record_package_purge_removal(self, package_id:str) -> None:
|
|
429
|
+
# purge = full removal
|
|
430
|
+
pkg_info = self.get_package_info(package_id, error_not_mapped=False)
|
|
431
|
+
if pkg_info is None:
|
|
432
|
+
return
|
|
433
|
+
if package_id in self.packages.keys():
|
|
434
|
+
self.packages.pop(package_id)
|
|
435
|
+
if pkg_info.name in self.packages_id_index.keys():
|
|
436
|
+
self.packages_id_index.pop(pkg_info.name)
|
|
437
|
+
self.packages_title_index.pop(pkg_info.title)
|
|
438
|
+
|
|
439
|
+
## Resource record changes ------------------
|
|
440
|
+
def _record_resource_update(self, resource_info:CkanResourceInfo) -> None:
|
|
441
|
+
resource_id = resource_info.id
|
|
442
|
+
new_resource = resource_id not in self.resources.keys()
|
|
443
|
+
self.resources[resource_id] = resource_info
|
|
444
|
+
if new_resource:
|
|
445
|
+
self.packages[resource_info.package_id].package_resources[resource_id] = resource_info
|
|
446
|
+
self.packages[resource_info.package_id].resources_id_index[resource_info.name] = resource_id
|
|
447
|
+
|
|
448
|
+
def _record_resource_create(self, resource_info:CkanResourceInfo) -> None:
|
|
449
|
+
resource_id = resource_info.id
|
|
450
|
+
new_resource = resource_id not in self.resources.keys()
|
|
451
|
+
self.resources[resource_id] = resource_info
|
|
452
|
+
if new_resource:
|
|
453
|
+
self.packages[resource_info.package_id].package_resources[resource_id] = resource_info
|
|
454
|
+
self.packages[resource_info.package_id].resources_id_index[resource_info.name] = resource_id
|
|
455
|
+
|
|
456
|
+
def _record_resource_delete(self, resource_id:str) -> None:
|
|
457
|
+
if resource_id not in self.resources.keys():
|
|
458
|
+
msg = f"Resource {resource_id} not found in mapped objects"
|
|
459
|
+
warn(msg)
|
|
460
|
+
return
|
|
461
|
+
resource_info = self.resources[resource_id]
|
|
462
|
+
self.resources.pop(resource_id)
|
|
463
|
+
if resource_id in self.packages[resource_info.package_id].package_resources.keys():
|
|
464
|
+
self.packages[resource_info.package_id].package_resources.pop(resource_id)
|
|
465
|
+
if resource_info.name in self.packages[resource_info.package_id].resources_id_index.keys():
|
|
466
|
+
self.packages[resource_info.package_id].resources_id_index.pop(resource_info.name)
|
|
467
|
+
|
|
468
|
+
def _record_datastore_delete(self, resource_id:str) -> None:
|
|
469
|
+
if resource_id not in self.resources.keys():
|
|
470
|
+
msg = f"DataStore {resource_id} not found in mapped objects"
|
|
471
|
+
warn(msg)
|
|
472
|
+
return
|
|
473
|
+
resource_info = self.resources[resource_id]
|
|
474
|
+
resource_info.datastore_info = None
|
|
475
|
+
if resource_id in self.packages[resource_info.package_id].package_resources.keys():
|
|
476
|
+
self.packages[resource_info.package_id].package_resources[resource_id].datastore_info = None
|
|
477
|
+
|
|
478
|
+
## Organization record changes ------------------
|
|
479
|
+
def _update_organization_info(self, organization_info:Union[CkanOrganizationInfo, List[CkanOrganizationInfo]]) -> None:
|
|
480
|
+
"""
|
|
481
|
+
Internal function to update information on an organization.
|
|
482
|
+
"""
|
|
483
|
+
if not(isinstance(organization_info, list)):
|
|
484
|
+
organization_info = [organization_info]
|
|
485
|
+
self.organizations.update({info.id: info for info in organization_info})
|
|
486
|
+
self.organizations_id_index.update({info.name: info.id for info in organization_info})
|
|
487
|
+
self.organizations_title_index.update({info.title: info.id for info in organization_info})
|
|
488
|
+
|
|
489
|
+
## Group and users record changes ------------------
|
|
490
|
+
def _update_group_info(self, group_info:Union[CkanGroupInfo, List[CkanGroupInfo]]) -> None:
|
|
491
|
+
"""
|
|
492
|
+
Internal function to update information on a group.
|
|
493
|
+
"""
|
|
494
|
+
if not(isinstance(group_info, list)):
|
|
495
|
+
group_info = [group_info]
|
|
496
|
+
self.groups.update({info.id: info for info in group_info})
|
|
497
|
+
self.groups_id_index.update({info.name: info.id for info in group_info})
|
|
498
|
+
self.groups_title_index.update({info.title: info.id for info in group_info})
|
|
499
|
+
|
|
500
|
+
def _update_user_info(self, user_info:Union[CkanUserInfo, List[CkanUserInfo]]) -> None:
|
|
501
|
+
"""
|
|
502
|
+
Internal function to update information on a group.
|
|
503
|
+
"""
|
|
504
|
+
if not(isinstance(user_info, list)):
|
|
505
|
+
user_info = [user_info]
|
|
506
|
+
self.users.update({info.id: info for info in user_info})
|
|
507
|
+
self.users_id_index.update({info.name: info.id for info in user_info})
|
|
508
|
+
|
|
509
|
+
|