ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/METADATA +74 -38
  103. ckanapi_harvesters-0.0.2.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,509 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Data model to represent a CKAN database architecture
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from typing import Dict, List, Union
8
+ from warnings import warn
9
+ import copy
10
+
11
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanResourceInfo, CkanState, CkanDataStoreInfo, \
12
+ CkanOrganizationInfo, CkanLicenseInfo, CkanViewInfo, CkanGroupInfo, CkanUserInfo
13
+ from ckanapi_harvesters.auxiliary.ckan_errors import NotMappedObjectNameError, IntegrityError
14
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import assert_or_raise
15
+
16
+
17
+ class CkanMapABC(ABC):
18
+ @abstractmethod
19
+ def purge(self):
20
+ raise NotImplementedError()
21
+
22
+ @abstractmethod
23
+ def copy(self):
24
+ raise NotImplementedError()
25
+
26
+ @abstractmethod
27
+ def to_dict(self) -> dict:
28
+ raise NotImplementedError()
29
+
30
+ @abstractmethod
31
+ def update_from_dict(self, data:dict) -> None:
32
+ raise NotImplementedError()
33
+
34
+ @staticmethod
35
+ @abstractmethod
36
+ def from_dict(d: dict) -> "CkanMap":
37
+ raise NotImplementedError()
38
+
39
+
40
+ class CkanMap(CkanMapABC):
41
+ """
42
+ Class to store an image of the CKAN database architecture
43
+ Auxiliary class of CkanApi
44
+ """
45
+
46
+ def __init__(self):
47
+ self.packages:Dict[str,CkanPackageInfo] = {} # package id -> info
48
+ self.packages_id_index:Dict[str, str] = {} # package name -> id
49
+ self.packages_title_index:Dict[str, str] = {} # package title -> id
50
+ self.resources:Dict[str,CkanResourceInfo] = {} # resource id -> info
51
+ self.resource_alias_index:Dict[str,str] = {} # resource alias -> id
52
+ self.licenses:Dict[str,CkanLicenseInfo] = {} # license id -> info
53
+ self.licenses_title_index:Dict[str, str] = {} # license title -> id
54
+ self.organizations:Dict[str,CkanOrganizationInfo] = {} # organization id -> info
55
+ self.organizations_id_index:Dict[str, str] = {} # organization name -> id
56
+ self.organizations_title_index:Dict[str, str] = {} # organization title -> id
57
+ self.users:Dict[str,CkanUserInfo] = {} # user id -> info
58
+ self.users_id_index:Dict[str, str] = {} # user name -> id
59
+ self.groups:Dict[str,CkanGroupInfo] = {} # group id -> info
60
+ self.groups_id_index:Dict[str, str] = {} # group name -> id
61
+ self.groups_title_index:Dict[str, str] = {} # group title -> id
62
+ self.organizations_listed_all:bool = False
63
+ self.organizations_listed_all_users:bool = False
64
+ self.users_listed_all:bool = False
65
+ self.groups_listed_all:bool = False
66
+ self._mapping_query_datastore_info = False # default request for datastore_info during mapping operations
67
+ self._mapping_query_resource_view_list = False # False: do not request resource_view_list by default
68
+ self._mapping_query_license_list = False # False by default
69
+ self._mapping_query_organization_info = False # False by default
70
+
71
+ def purge(self):
72
+ """
73
+ Erase known package mappings.
74
+
75
+ :return:
76
+ """
77
+ self.packages:Dict[str,CkanPackageInfo] = {} # package id -> info
78
+ self.packages_id_index:Dict[str, str] = {} # package name -> id
79
+ self.packages_title_index:Dict[str, str] = {} # package title -> id
80
+ self.resources:Dict[str,CkanResourceInfo] = {} # resource id -> info
81
+ self.resource_alias_index:Dict[str,str] = {} # resource alias -> id
82
+ self.licenses:Dict[str,CkanLicenseInfo] = {} # license id -> info
83
+ self.licenses_title_index:Dict[str, str] = {} # license title -> id
84
+ self.organizations:Dict[str,CkanOrganizationInfo] = {} # organization id -> info
85
+ self.organizations_id_index:Dict[str, str] = {} # organization name -> id
86
+ self.organizations_title_index:Dict[str, str] = {} # organization title -> id
87
+ self.organizations_listed_all = False
88
+
89
+ def copy(self) -> "CkanMap":
90
+ return copy.deepcopy(self)
91
+
92
+ def to_dict(self) -> dict:
93
+ return {"packages":[package.to_dict() for package in self.packages.values()],
94
+ "licenses":[license.to_dict() for license in self.licenses.values()],
95
+ "organizations":[organization.to_dict() for organization in self.organizations.values()],
96
+ }
97
+
98
+ def update_from_dict(self, data:dict) -> None:
99
+ for package_dict in data["packages"]:
100
+ self._update_package_info(CkanPackageInfo.from_dict(package_dict))
101
+ for license_dict in data["licenses"]:
102
+ self._update_license_info(CkanLicenseInfo.from_dict(license_dict))
103
+ for org_dict in data["organizations"]:
104
+ self._update_organization_info(CkanOrganizationInfo.from_dict(org_dict))
105
+
106
+ @staticmethod
107
+ def from_dict(d: dict) -> "CkanMap":
108
+ map = CkanMap()
109
+ map.update_from_dict(d)
110
+ return map
111
+
112
+ ## Resource ID Map navigation ------------------
113
+ def get_package_id(self, package_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
114
+ """
115
+ Retrieve the package id for a given package name based on the package map.
116
+
117
+ :param package_name: package name or id.
118
+ :return:
119
+ """
120
+ if package_name is None:
121
+ raise ValueError("package_name cannot be None")
122
+ if package_name in self.packages.keys():
123
+ # recognized package_id
124
+ package_id = package_name
125
+ elif package_name in self.packages_id_index.keys():
126
+ package_id = self.packages_id_index[package_name]
127
+ elif search_title and package_name in self.packages_title_index.keys():
128
+ package_id = self.packages_title_index[package_name]
129
+ elif error_not_mapped:
130
+ raise NotMappedObjectNameError(f"Package {package_name} is not mapped or does not exist.")
131
+ else:
132
+ package_id = None
133
+ return package_id
134
+
135
+ def get_resource_id(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[str,None]:
136
+ """
137
+ Retrieve the resource id for a given combination of (package name and resource name) based on the package map.
138
+
139
+ :param resource_name: resource alias, name or id.
140
+ :param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
141
+ :return:
142
+ """
143
+ if resource_name in self.resources.keys():
144
+ # recognized resource_id
145
+ resource_id = resource_name
146
+ elif resource_name in self.resource_alias_index.keys():
147
+ # found resource alias
148
+ resource_id = self.resource_alias_index[resource_name]
149
+ else:
150
+ package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
151
+ if package_id is None and not error_not_mapped:
152
+ return None
153
+ if resource_name in self.packages[package_id].resources_id_index.keys():
154
+ resource_id = self.packages[package_id].resources_id_index[resource_name]
155
+ elif error_not_mapped:
156
+ raise NotMappedObjectNameError(f"Resource {resource_name} is not mapped or does not exist.")
157
+ else:
158
+ resource_id = None
159
+ # sanity check
160
+ if package_name is not None and resource_id is not None:
161
+ resource_info = self.resources[resource_id]
162
+ map_package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
163
+ if map_package_id is not None:
164
+ assert_or_raise(map_package_id == resource_info.package_id, IntegrityError("package_id"))
165
+ return resource_id
166
+
167
+ def get_organization_id(self, organization_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
168
+ """
169
+ Retrieve the organization id for a given organization name based on the mapped data.
170
+
171
+ :param organization_name: organization name, title or id.
172
+ :return:
173
+ """
174
+ if organization_name is None:
175
+ raise ValueError("organization_name cannot be None")
176
+ if organization_name in self.organizations.keys():
177
+ # recognized organization_id
178
+ organization_id = organization_name
179
+ elif organization_name in self.organizations_id_index.keys():
180
+ organization_id = self.organizations_id_index[organization_name]
181
+ elif search_title and organization_name in self.organizations_title_index.keys():
182
+ organization_id = self.organizations_title_index[organization_name]
183
+ elif error_not_mapped:
184
+ raise NotMappedObjectNameError(f"Organization {organization_name} is not mapped or does not exist.")
185
+ else:
186
+ organization_id = None
187
+ return organization_id
188
+
189
+ def get_resource_info(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[CkanResourceInfo,None]:
190
+ """
191
+ Retrieve the information on a given resource.
192
+
193
+ :param resource_name: resource name or id.
194
+ :param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
195
+ :return:
196
+ """
197
+ resource_id = self.get_resource_id(resource_name, package_name, error_not_mapped=error_not_mapped)
198
+ if resource_id is not None:
199
+ return self.resources[resource_id]
200
+ else:
201
+ return None
202
+
203
+ def get_package_info(self, package_name:str, *, error_not_mapped:bool=True) -> Union[CkanPackageInfo,None]:
204
+ """
205
+ Retrieve the package info for a given package name based on the package map.
206
+
207
+ :param package_name: package name or id.
208
+ :return:
209
+ """
210
+ package_id = self.get_package_id(package_name, error_not_mapped=error_not_mapped)
211
+ if package_id is not None:
212
+ return self.packages[package_id]
213
+ else:
214
+ return None
215
+
216
+ def get_organization_info(self, organization_name:str, *, error_not_mapped:bool=True) -> Union[CkanOrganizationInfo,None]:
217
+ """
218
+ Retrieve the organization info for a given organization name based on the mapped data.
219
+
220
+ :param organization_name: organization name or id.
221
+ :return:
222
+ """
223
+ organization_id = self.get_organization_id(organization_name, error_not_mapped=error_not_mapped)
224
+ if organization_id is not None:
225
+ return self.organizations[organization_id]
226
+ else:
227
+ return None
228
+
229
+ def get_organization_for_owner_org(self, organization_name:str, *, error_not_mapped:bool=True) -> Union[CkanOrganizationInfo,None]:
230
+ """
231
+ Retrieve the organization name for a given organization name based on the mapped data.
232
+ This is the field usually used for the owner_org argument. Calls CkanOrganizationInfo.get_owner_org
233
+
234
+ :param organization_name: organization name or id.
235
+ :return:
236
+ """
237
+ organization_info = self.get_organization_info(organization_name, error_not_mapped=error_not_mapped)
238
+ if organization_info is not None:
239
+ return organization_info.get_owner_org()
240
+ else:
241
+ return None
242
+
243
+ def get_resource_package_id(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[str,None]:
244
+ """
245
+ Retrieve the package id of a given resource.
246
+
247
+ :param resource_name: resource name or id.
248
+ :param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
249
+ :return:
250
+ """
251
+ resource_info = self.get_resource_info(resource_name, package_name, error_not_mapped=error_not_mapped)
252
+ if resource_info is not None:
253
+ return resource_info.package_id
254
+ else:
255
+ return None
256
+
257
+ def get_datastore_info(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[CkanDataStoreInfo,None]:
258
+ """
259
+ :param resource_name: resource name or id.
260
+ :param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
261
+ :return:
262
+ """
263
+ resource_info = self.get_resource_info(resource_name, package_name, error_not_mapped=error_not_mapped)
264
+ if resource_info is not None:
265
+ if resource_info.datastore_info is not None:
266
+ return resource_info.datastore_info
267
+ elif error_not_mapped:
268
+ raise NotMappedObjectNameError(f"DataStore of resource {resource_name} is not mapped or does not exist.")
269
+ else:
270
+ return None
271
+ else:
272
+ return None
273
+
274
+ def get_datastore_len(self, resource_name:str, package_name:str=None, *, error_not_mapped:bool=True) -> Union[int,None]:
275
+ """
276
+ Retrieve the number of rows in a DataStore from the mapped data. This requires the map_resources to be called with the option datastore_info=True.
277
+
278
+ :param resource_name: resource name or id.
279
+ :param package_name: package name or id (required if resource_name is a resource name). An integrity check is performed if given.
280
+ :return:
281
+ """
282
+ datastore_info = self.get_datastore_info(resource_name, package_name, error_not_mapped=error_not_mapped)
283
+ if datastore_info is not None:
284
+ return datastore_info.row_count
285
+ else:
286
+ return None
287
+
288
+ def _update_datastore_len(self, resource_id:str, new_len:int) -> None:
289
+ """
290
+ Internal function to update the length of a DataStore without making a request.
291
+
292
+ :param resource_id: resource id.
293
+ :param new_len: value to replace
294
+ """
295
+ resource_info = self.resources[resource_id]
296
+ package_id = resource_info.package_id
297
+ self.resources[resource_id].datastore_info.row_count = new_len
298
+ self.resources[resource_id].datastore_info.details["meta"]["count"] = new_len
299
+ self.packages[package_id].package_resources[resource_id].datastore_info.row_count = new_len
300
+ self.packages[package_id].package_resources[resource_id].datastore_info.details["meta"]["count"] = new_len
301
+
302
+ def _update_datastore_info(self, datastore_info:CkanDataStoreInfo) -> None:
303
+ """
304
+ Internal function to update the length of a DataStore without making a request.
305
+ """
306
+ resource_id = datastore_info.resource_id
307
+ if resource_id in self.resources.keys():
308
+ resource_info = self.resources[resource_id]
309
+ package_id = resource_info.package_id
310
+ self.resources[resource_id].datastore_info = datastore_info
311
+ self.packages[package_id].package_resources[resource_id].datastore_info = datastore_info
312
+ self.packages[package_id].resources_id_index[resource_info.name] = resource_id
313
+ if datastore_info is not None and datastore_info.aliases is not None:
314
+ self.resource_alias_index.update({alias: resource_id for alias in datastore_info.aliases})
315
+
316
+ def _update_resource_info(self, resource_info:Union[CkanResourceInfo, List[CkanResourceInfo]]) -> None:
317
+ """
318
+ Internal function to update the length of a DataStore without making a request.
319
+ """
320
+ if not(isinstance(resource_info, list)):
321
+ resource_info = [resource_info]
322
+ for res_info in resource_info:
323
+ resource_id = res_info.id
324
+ package_id = res_info.package_id
325
+ res_info.index_in_package = None
326
+ if package_id in self.packages.keys():
327
+ self.packages[package_id].update_resource(res_info)
328
+ self.resources[resource_id] = res_info
329
+ if res_info.datastore_info is not None and res_info.datastore_info.aliases is not None:
330
+ self.resource_alias_index.update({alias: res_info.id for alias in res_info.datastore_info.aliases})
331
+
332
+ def _update_view_info(self, view_info:Union[CkanViewInfo, List[CkanViewInfo]], view_list:bool=False) -> None:
333
+ if isinstance(view_info, CkanViewInfo):
334
+ view_info = [view_info]
335
+ for view_info_update in view_info:
336
+ resource_id = view_info_update.resource_id
337
+ self.resources[resource_id].update_view(view_info_update, view_list=view_list)
338
+
339
+ def _update_package_info(self, package_info:Union[CkanPackageInfo, List[CkanPackageInfo]]) -> None:
340
+ """
341
+ Internal function to update the information of a package.
342
+
343
+ NB: the indicator pkg_info.requested_datastore_info remains False until map_resources is called.
344
+ """
345
+ if not(isinstance(package_info, list)):
346
+ package_info = [package_info]
347
+ # already done by __init__:
348
+ # for pkg_info in package_info:
349
+ # pkg_info.resources_id_index.update({resource_info.name: resource_info.id for resource_info in pkg_info.resources})
350
+ self.packages.update({pkg_info.id: pkg_info for pkg_info in package_info})
351
+ self.packages_id_index.update({pkg_info.name: pkg_info.id for pkg_info in package_info})
352
+ self.packages_title_index.update({pkg_info.title: pkg_info.id for pkg_info in package_info})
353
+ for pkg_info in package_info:
354
+ self.resources.update({resource_info.id: resource_info for resource_info in pkg_info.package_resources.values()})
355
+ for resource_info in pkg_info.package_resources.values():
356
+ if resource_info.datastore_info is not None and resource_info.datastore_info.aliases is not None:
357
+ self.resource_alias_index.update({alias: resource_info.id for alias in resource_info.datastore_info.aliases})
358
+ for pkg_info in package_info:
359
+ if pkg_info.organization_info is not None:
360
+ self._update_organization_info(pkg_info.organization_info)
361
+ if pkg_info.groups is not None:
362
+ self._update_group_info(pkg_info.groups)
363
+
364
+
365
+ def get_license_id(self, license_name: str, *, error_not_mapped: bool = True) -> str:
366
+ """
367
+ Retrieve the ID of a license based on the mapped data.
368
+
369
+ :param license_name: license title or id.
370
+ :return:
371
+ """
372
+ if license_name is None:
373
+ raise ValueError("license_name cannot be None")
374
+ if license_name in self.licenses.keys():
375
+ # recognized license_id
376
+ license_id = license_name
377
+ elif license_name in self.licenses_title_index.keys():
378
+ license_id = self.licenses_title_index[license_name]
379
+ elif error_not_mapped:
380
+ raise NotMappedObjectNameError(f"License {license_name} is not mapped or does not exist.")
381
+ else:
382
+ license_id = None
383
+ return license_id
384
+
385
+ def get_license_info(self, license_name: str, *, error_not_mapped: bool = True) -> Union[CkanLicenseInfo,None]:
386
+ """
387
+ Retrieve the information on a license based on the mapped data.
388
+
389
+ :param license_name: license title or id.
390
+ :return:
391
+ """
392
+ license_id = self.get_license_id(license_name, error_not_mapped=error_not_mapped)
393
+ if license_id is not None:
394
+ return self.licenses[license_id]
395
+ else:
396
+ return None
397
+
398
+ def _update_license_info(self, license_info: Union[CkanLicenseInfo, List[CkanLicenseInfo]]) -> None:
399
+ """
400
+ Internal function to update the information on a license.
401
+ """
402
+ if not (isinstance(license_info, list)):
403
+ license_info = [license_info]
404
+ self.licenses.update({license.id: license for license in license_info})
405
+ self.licenses_title_index.update({license.title: license.id for license in license_info})
406
+
407
+ ## Package record changes ------------------
408
+ def _record_package_update(self, pkg_info: CkanPackageInfo) -> None:
409
+ package_id = pkg_info.id
410
+ package_name = pkg_info.name
411
+ self.packages[package_id].update(pkg_info)
412
+ self.packages_id_index[package_name] = package_id
413
+ self.packages_title_index[pkg_info.title] = package_id
414
+
415
+ def _record_package_create(self, pkg_info: CkanPackageInfo) -> None:
416
+ package_id = pkg_info.id
417
+ package_name = pkg_info.name
418
+ self.packages[package_id] = pkg_info
419
+ self.packages_id_index[package_name] = package_id
420
+ self.packages_title_index[pkg_info.title] = package_id
421
+
422
+ def _record_package_delete_state(self, package_id: str) -> None:
423
+ # only pass in delete state
424
+ pkg_info = self.get_package_info(package_id, error_not_mapped=False)
425
+ if pkg_info is not None:
426
+ pkg_info.state = CkanState.Deleted
427
+
428
+ def _record_package_purge_removal(self, package_id:str) -> None:
429
+ # purge = full removal
430
+ pkg_info = self.get_package_info(package_id, error_not_mapped=False)
431
+ if pkg_info is None:
432
+ return
433
+ if package_id in self.packages.keys():
434
+ self.packages.pop(package_id)
435
+ if pkg_info.name in self.packages_id_index.keys():
436
+ self.packages_id_index.pop(pkg_info.name)
437
+ self.packages_title_index.pop(pkg_info.title)
438
+
439
+ ## Resource record changes ------------------
440
+ def _record_resource_update(self, resource_info:CkanResourceInfo) -> None:
441
+ resource_id = resource_info.id
442
+ new_resource = resource_id not in self.resources.keys()
443
+ self.resources[resource_id] = resource_info
444
+ if new_resource:
445
+ self.packages[resource_info.package_id].package_resources[resource_id] = resource_info
446
+ self.packages[resource_info.package_id].resources_id_index[resource_info.name] = resource_id
447
+
448
+ def _record_resource_create(self, resource_info:CkanResourceInfo) -> None:
449
+ resource_id = resource_info.id
450
+ new_resource = resource_id not in self.resources.keys()
451
+ self.resources[resource_id] = resource_info
452
+ if new_resource:
453
+ self.packages[resource_info.package_id].package_resources[resource_id] = resource_info
454
+ self.packages[resource_info.package_id].resources_id_index[resource_info.name] = resource_id
455
+
456
+ def _record_resource_delete(self, resource_id:str) -> None:
457
+ if resource_id not in self.resources.keys():
458
+ msg = f"Resource {resource_id} not found in mapped objects"
459
+ warn(msg)
460
+ return
461
+ resource_info = self.resources[resource_id]
462
+ self.resources.pop(resource_id)
463
+ if resource_id in self.packages[resource_info.package_id].package_resources.keys():
464
+ self.packages[resource_info.package_id].package_resources.pop(resource_id)
465
+ if resource_info.name in self.packages[resource_info.package_id].resources_id_index.keys():
466
+ self.packages[resource_info.package_id].resources_id_index.pop(resource_info.name)
467
+
468
+ def _record_datastore_delete(self, resource_id:str) -> None:
469
+ if resource_id not in self.resources.keys():
470
+ msg = f"DataStore {resource_id} not found in mapped objects"
471
+ warn(msg)
472
+ return
473
+ resource_info = self.resources[resource_id]
474
+ resource_info.datastore_info = None
475
+ if resource_id in self.packages[resource_info.package_id].package_resources.keys():
476
+ self.packages[resource_info.package_id].package_resources[resource_id].datastore_info = None
477
+
478
+ ## Organization record changes ------------------
479
+ def _update_organization_info(self, organization_info:Union[CkanOrganizationInfo, List[CkanOrganizationInfo]]) -> None:
480
+ """
481
+ Internal function to update information on an organization.
482
+ """
483
+ if not(isinstance(organization_info, list)):
484
+ organization_info = [organization_info]
485
+ self.organizations.update({info.id: info for info in organization_info})
486
+ self.organizations_id_index.update({info.name: info.id for info in organization_info})
487
+ self.organizations_title_index.update({info.title: info.id for info in organization_info})
488
+
489
+ ## Group and users record changes ------------------
490
+ def _update_group_info(self, group_info:Union[CkanGroupInfo, List[CkanGroupInfo]]) -> None:
491
+ """
492
+ Internal function to update information on a group.
493
+ """
494
+ if not(isinstance(group_info, list)):
495
+ group_info = [group_info]
496
+ self.groups.update({info.id: info for info in group_info})
497
+ self.groups_id_index.update({info.name: info.id for info in group_info})
498
+ self.groups_title_index.update({info.title: info.id for info in group_info})
499
+
500
+ def _update_user_info(self, user_info:Union[CkanUserInfo, List[CkanUserInfo]]) -> None:
501
+ """
502
+ Internal function to update information on a group.
503
+ """
504
+ if not(isinstance(user_info, list)):
505
+ user_info = [user_info]
506
+ self.users.update({info.id: info for info in user_info})
507
+ self.users_id_index.update({info.name: info.id for info in user_info})
508
+
509
+