ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1225 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ """
6
+ from typing import List, Union, Tuple, Dict
7
+ from collections import OrderedDict
8
+ import copy
9
+ import re
10
+ from warnings import warn
11
+ import argparse
12
+ import io
13
+ import hashlib
14
+
15
+ import pandas as pd
16
+
17
+ from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
18
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import json_encode_params
19
+ from ckanapi_harvesters.auxiliary.ckan_configuration import default_ckan_has_postgis, default_ckan_target_epsg
20
+ from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
21
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanResourceInfo, CkanViewInfo, CkanField
22
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanState
23
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import assert_or_raise, ckan_package_name_re, datastore_id_col
24
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import dict_recursive_update
25
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import upload_prepare_requests_files_arg, RequestType
26
+ from ckanapi_harvesters.auxiliary.ckan_action import CkanNotFoundError
27
+ from ckanapi_harvesters.auxiliary.ckan_errors import (ReadOnlyError, AdminFeatureLockedError, NoDefaultView,
28
+ InvalidParameterError, CkanMandatoryArgumentError,
29
+ IntegrityError)
30
+ from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
31
+ from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_abc import CkanDataCleanerABC
32
+ from ckanapi_harvesters.ckan_api.ckan_api_1_map import use_ckan_owner_org_as_default
33
+
34
+ from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
35
+ from ckanapi_harvesters.auxiliary.ckan_api_key import CkanApiKey
36
+ from ckanapi_harvesters.ckan_api.ckan_api_4_readwrite import CkanApiReadWriteParams
37
+ from ckanapi_harvesters.ckan_api.ckan_api_4_readwrite import CkanApiReadWrite
38
+
39
+
40
+ default_alias_package_resource_sep:str = "."
41
+ ckan_table_name_max_len:int = 63 # this comes from a PostgreSQL length limitation and applies to DataStore aliases
42
+ alias_name_max_len:Union[int,None] = ckan_table_name_max_len
43
+ # if name exceeds max length, a hash of the full name is used so this hash should be unchanged if the resource is re-created:
44
+ default_alias_hash_replace:bool = True # True: replace with full hash / False: only the exceeding characters are replaced, with the following parameters:
45
+ default_alias_hash_len:int = 6
46
+ default_alias_hash_sep:str = ":"
47
+
48
+ table_name_subs_re = '[^\w-]|^(?=\d)'
49
+
50
+ def clean_table_name(variable_name: str) -> str:
51
+ """
52
+ Replace unwanted characters and spaces to generate a table name similar to a table name
53
+ """
54
+ return re.sub(table_name_subs_re,'_', variable_name)
55
+
56
+
57
+ class CkanApiManageParams(CkanApiReadWriteParams):
58
+ default_enable_admin: bool = False # False: disable advanced admin operations by default such as resource/package deletion
59
+ default_alias_enforce: bool = True # if True, always add the default alias when calling datastore_create
60
+
61
+ def __init__(self, *, proxies:Union[str,dict,ProxyConfig]=None,
62
+ ckan_headers:dict=None, http_headers:dict=None):
63
+ super().__init__(proxies=proxies, ckan_headers=ckan_headers, http_headers=http_headers)
64
+ self.enable_admin: bool = self.default_enable_admin
65
+
66
+ def copy(self, new_identifier:str=None, *, dest=None):
67
+ if dest is None:
68
+ dest = CkanApiManageParams()
69
+ super().copy(dest=dest)
70
+ dest.enable_admin = self.enable_admin
71
+ return dest
72
+
73
+ def _setup_cli_ckan_parser__params(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
74
+ # overload adding support to trigger admin mode
75
+ parser = super()._setup_cli_ckan_parser__params(parser=parser)
76
+ parser.add_argument("--admin", action="store_true",
77
+ help="Option to enable admin mode")
78
+ return parser
79
+
80
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None,
81
+ error_not_found:bool=True, default_proxies:dict=None, proxy_headers:dict=None,
82
+ proxies:dict=None, headers:dict=None) -> None:
83
+ # overload adding support to trigger admin mode
84
+ super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
85
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
86
+ if args.admin:
87
+ self.enable_admin = args.admin
88
+
89
+
90
+ class CkanApiExtendedParams(CkanApiManageParams):
91
+ def __init__(self, *, proxies:Union[str,dict,ProxyConfig]=None,
92
+ ckan_headers:dict=None, http_headers:dict=None):
93
+ super().__init__(proxies=proxies, ckan_headers=ckan_headers, http_headers=http_headers)
94
+ self.ckan_has_postgis: bool = default_ckan_has_postgis
95
+ self.ckan_default_target_epsg: Union[int,None] = default_ckan_target_epsg
96
+
97
+ def copy(self, new_identifier:str=None, *, dest=None):
98
+ if dest is None:
99
+ dest = CkanApiExtendedParams()
100
+ super().copy(dest=dest)
101
+ dest.ckan_has_postgis = self.ckan_has_postgis
102
+ dest.ckan_default_target_epsg = self.ckan_default_target_epsg
103
+ return dest
104
+
105
+ def _setup_cli_ckan_parser__params(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
106
+ # overload adding support to change extended parameters
107
+ parser = super()._setup_cli_ckan_parser__params(parser=parser)
108
+ parser.add_argument("--ckan-postgis", action="store_true",
109
+ help="Option to notify that CKAN is compatible with PostGIS") # default=default_ckan_has_postgis
110
+ parser.add_argument("--ckan-epsg", type=int,
111
+ help="Default EPSG for CKAN", default=default_ckan_target_epsg)
112
+ return parser
113
+
114
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None,
115
+ error_not_found:bool=True, default_proxies:dict=None, proxy_headers:dict=None,
116
+ proxies:dict=None, headers:dict=None) -> None:
117
+ # overload adding support to trigger admin mode
118
+ super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
119
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
120
+ if args.ckan_postgis:
121
+ self.ckan_has_postgis = args.ckan_postgis
122
+ if args.ckan_epsg:
123
+ self.ckan_default_target_epsg = args.ckan_epsg
124
+
125
+
126
+ class CkanApiManage(CkanApiReadWrite):
127
+ """
128
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
129
+ This class implements more advanced requests to manage packages, resources and DataStores on the CKAN server.
130
+ """
131
+
132
+ def __init__(self, url:str=None, *, proxies:Union[str,dict,ProxyConfig]=None,
133
+ apikey:Union[str,CkanApiKey]=None, apikey_file:str=None,
134
+ owner_org:str=None, params:CkanApiExtendedParams=None,
135
+ map:CkanMap=None, policy: CkanPackageDataFormatPolicy = None, policy_file:str=None,
136
+ data_cleaner_upload:CkanDataCleanerABC=None,
137
+ identifier=None):
138
+ """
139
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
140
+
141
+ :param url: url of the CKAN server
142
+ :param proxies: proxies to use for requests
143
+ :param apikey: way to provide the API key directly (optional)
144
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
145
+ :param policy: data format policy to use with policy_check function
146
+ :param policy_file: path to a JSON file containing the data format policy to use with policy_check function
147
+ :param owner_org: name of the organization to limit package_search (optional)
148
+ :param params: other connection/behavior parameters
149
+ :param map: map of known resources
150
+ :param policy: data format policy to be used with the policy_check function.
151
+ :param policy_file: path to a JSON file containing the data format policy to load.
152
+ :param data_cleaner_upload: data cleaner object to use before uploading to a CKAN DataStore.
153
+ :param identifier: identifier of the ckan client
154
+ """
155
+ super().__init__(url=url, proxies=proxies, apikey=apikey, apikey_file=apikey_file,
156
+ owner_org=owner_org, map=map, policy=policy, policy_file=policy_file,
157
+ data_cleaner_upload=data_cleaner_upload, identifier=identifier)
158
+ if params is None:
159
+ params = CkanApiExtendedParams()
160
+ if proxies is not None:
161
+ params.proxies = proxies
162
+ self.params: CkanApiExtendedParams = params
163
+
164
+ def copy(self, new_identifier: str = None, *, dest=None):
165
+ if dest is None:
166
+ dest = CkanApiManage()
167
+ super().copy(new_identifier=new_identifier, dest=dest)
168
+ return dest
169
+
170
+ def full_unlock(self, unlock:bool=True,
171
+ *, no_ca:bool=None, external_url_resource_download:bool=None) -> None:
172
+ """
173
+ Function to unlock full capabilities of the CKAN API
174
+
175
+ :param unlock:
176
+ :return:
177
+ """
178
+ super().full_unlock(unlock, no_ca=no_ca, external_url_resource_download=external_url_resource_download)
179
+ self.params.enable_admin = unlock
180
+
181
+ def _setup_cli_ckan_parser(self, parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
182
+ # overload adding support to load a policy from a file
183
+ parser = super()._setup_cli_ckan_parser(parser=parser)
184
+ return parser
185
+
186
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None,
187
+ error_not_found:bool=True, default_proxies:dict=None, proxy_headers:dict=None,
188
+ proxies:dict=None, headers:dict=None) -> None:
189
+ # overload adding support to load a policy from a file
190
+ super()._cli_ckan_args_apply(args=args, base_dir=base_dir, error_not_found=error_not_found,
191
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
192
+
193
+ ## Field modification ------------------
194
+ @staticmethod
195
+ def datastore_field_dict(fields:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None,
196
+ fields_merge:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None,
197
+ fields_update:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None, *,
198
+ fields_type_override:Dict[str,str]=None, fields_description:Dict[str,str]=None,
199
+ fields_label:Dict[str,str]=None, return_list:bool=False) \
200
+ -> Union[Dict[str, CkanField], List[dict]]:
201
+ """
202
+ Initialization of the `fields` parameter for datastore_create.
203
+ Only parts used by this package are present.
204
+ To complete the field's dictionnaries, refer to datastore_field_patch_dict.
205
+
206
+ :param fields: first source of field information, usually the fields from the DataStore
207
+ :param fields_merge: second source. Values from this dictionary will overwrite fields
208
+ :param fields_update: third source. Values from this dictionary will be prioritary over all values.
209
+ :param fields_type_override:
210
+ :param fields_description:
211
+ :param fields_label:
212
+ :param return_list:
213
+ :return: dict if return_list is False, list if return_list is True.
214
+ You can easily transform the dict to a list with the following code:
215
+ ```python
216
+ fields = list(fields_update.values())
217
+ ```
218
+ """
219
+ fields_updated: OrderedDict[str, CkanField]
220
+ if fields is None:
221
+ fields_updated = OrderedDict()
222
+ elif isinstance(fields, list):
223
+ fields_updated = OrderedDict()
224
+ for field_info in fields:
225
+ if isinstance(field_info, CkanField):
226
+ fields_updated[field_info.name] = field_info
227
+ elif isinstance(field_info, dict):
228
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
229
+ else:
230
+ raise TypeError(f"Field type {type(field_info)} not supported.")
231
+ elif isinstance(fields, dict):
232
+ fields_updated = OrderedDict()
233
+ for field_name, field_info in fields.items():
234
+ if isinstance(field_info, CkanField):
235
+ fields_updated[field_info.name] = field_info
236
+ assert_or_raise(field_name == field_info.name, IntegrityError(f"Field {field_name} does not match its id ({field_info.name})"))
237
+ elif isinstance(field_info, dict):
238
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
239
+ assert_or_raise(field_name == field_info["id"], IntegrityError(f"Field {field_name} does not match its id ({field_info['id']})"))
240
+ else:
241
+ raise TypeError(f"Field type {type(field_info)} not supported for {field_name}.")
242
+ else:
243
+ raise TypeError(f"Field type {type(fields)} not supported.")
244
+ if fields_merge is None:
245
+ pass
246
+ elif isinstance(fields_merge, list):
247
+ for field_info in fields_merge:
248
+ if isinstance(field_info, CkanField):
249
+ fields_updated[field_info.name] = field_info
250
+ elif isinstance(field_info, dict):
251
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
252
+ else:
253
+ raise TypeError(f"Field type {type(field_info)} not supported.")
254
+ elif isinstance(fields_merge, dict):
255
+ for field_name, field_info in fields_merge.items():
256
+ if isinstance(field_info, CkanField):
257
+ fields_updated[field_info.name] = field_info
258
+ assert_or_raise(field_name == field_info.name, IntegrityError(f"Field {field_name} does not match its id ({field_info.name})"))
259
+ elif isinstance(field_info, dict):
260
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
261
+ assert_or_raise(field_name == field_info["id"], IntegrityError(f"Field {field_name} does not match its id ({field_info['id']})"))
262
+ else:
263
+ raise TypeError(f"Field type {type(field_info)} not supported for {field_name}.")
264
+ else:
265
+ raise TypeError(f"Field type {type(fields_merge)} not supported.")
266
+ if fields_update is None:
267
+ pass
268
+ elif isinstance(fields_update, list):
269
+ for field_info in fields_update:
270
+ if isinstance(field_info, CkanField):
271
+ fields_updated[field_info.name] = field_info
272
+ elif isinstance(field_info, dict):
273
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
274
+ else:
275
+ raise TypeError(f"Field type {type(field_info)} not supported.")
276
+ elif isinstance(fields_update, dict):
277
+ for field_name, field_info in fields_update.items():
278
+ if isinstance(field_info, CkanField):
279
+ fields_updated[field_info.name] = field_info
280
+ assert_or_raise(field_name == field_info.name, IntegrityError(f"Field {field_name} does not match its id ({field_info.name})"))
281
+ elif isinstance(field_info, dict):
282
+ fields_updated[field_info["id"]] = CkanField.from_ckan_dict(field_info)
283
+ assert_or_raise(field_name == field_info["id"], IntegrityError(f"Field {field_name} does not match its id ({field_info['id']})"))
284
+ else:
285
+ raise TypeError(f"Field type {type(field_info)} not supported for {field_name}.")
286
+ else:
287
+ raise TypeError(f"Field type {type(fields_update)} not supported.")
288
+ if fields_type_override is not None:
289
+ for field_name, field_type in fields_type_override.items():
290
+ if field_name in fields_updated.keys():
291
+ fields_updated[field_name].type_override = field_type
292
+ else:
293
+ fields_updated[field_name] = CkanField(field_name, field_type)
294
+ # fields_updated = dict_recursive_update(fields_updated, {field_id: {"type": str(value), "info": {"type_override": str(value)}, "schema": {"native_type": str(value)}} for field_id, value in fields_type_override.items()})
295
+ if fields_description is not None:
296
+ for field_name, description in fields_description.items():
297
+ if field_name not in fields_updated.keys():
298
+ fields_updated[field_name] = CkanField(field_name, None)
299
+ fields_updated[field_name].notes = description
300
+ # fields_updated = dict_recursive_update(fields_updated, {field_id: {"info": {"notes": value}} for field_id, value in fields_description.items()})
301
+ if fields_label is not None:
302
+ for field_name, label in fields_description.items():
303
+ if field_name not in fields_updated.keys():
304
+ fields_updated[field_name] = CkanField(field_name, None)
305
+ fields_updated[field_name].label = label
306
+ # fields_updated = dict_recursive_update(fields_updated, {field_id: {"info": {"label": value}} for field_id, value in fields_label.items()})
307
+ for field_id, field_dict in fields_updated.items():
308
+ field_dict.name = field_id
309
+ if return_list:
310
+ return [field_info.to_ckan_dict() for field_info in fields_updated.values()]
311
+ else:
312
+ return fields_updated
313
+
314
+ def datastore_field_patch_dict(self, fields_merge:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None,
315
+ fields_update:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None, *,
316
+ fields_type_override:Dict[str,str]=None, fields_description:Dict[str,str]=None,
317
+ fields_label:Dict[str,str]=None, return_list:bool=False,
318
+ datastore_merge:bool=True, resource_id:str=None, error_not_found:bool=True) \
319
+ -> Tuple[Union[bool,None], Union[Dict[str, CkanField], List[dict]]]:
320
+ """
321
+ Calls datastore_field_dict and merges attributes with those found in datastore_info if datastore_merge=True.
322
+
323
+ :param fields_update:
324
+ :param fields_type_override:
325
+ :param fields_description:
326
+ :param fields_label:
327
+ :param return_list:
328
+ :param datastore_merge:
329
+ :param resource_id: required if datastore_merge=True
330
+ :return:
331
+ """
332
+ fields_update: Dict[str, CkanField] = CkanApiManage.datastore_field_dict(fields=None, fields_merge=fields_merge, fields_update=fields_update,
333
+ fields_type_override=fields_type_override, fields_description=fields_description,
334
+ fields_label=fields_label, return_list=False)
335
+ if datastore_merge:
336
+ if error_not_found:
337
+ assert(resource_id is not None)
338
+ datastore_info = self.get_datastore_info_or_request_of_id(resource_id, error_not_found=error_not_found)
339
+ if datastore_info is not None:
340
+ fields_base = copy.deepcopy(datastore_info.fields_dict)
341
+ if len(fields_base) == 0:
342
+ msg = f"No fields found for {resource_id}"
343
+ warn(msg)
344
+ fields_new = copy.deepcopy(fields_base)
345
+ update_needed = False
346
+ for field_name, field_info in fields_update.items():
347
+ if field_name not in fields_base.keys():
348
+ fields_new[field_name] = field_info
349
+ update_needed = True
350
+ else:
351
+ fields_new[field_name] = fields_base[field_name].merge(field_info)
352
+ update_needed |= not fields_new[field_name] == fields_base[field_name]
353
+ else:
354
+ fields_new = fields_update
355
+ update_needed = False
356
+ if return_list:
357
+ return update_needed, [field_info.to_ckan_dict() for field_info in fields_new.values()]
358
+ else:
359
+ return update_needed, fields_new
360
+ else:
361
+ if return_list:
362
+ return None, [field_info.to_ckan_dict() for field_info in fields_update.values()]
363
+ else:
364
+ return None, fields_update
365
+
366
+ def datastore_field_patch(self, resource_id:str, fields_merge:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None,
367
+ fields_update:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None, *,
368
+ only_if_needed:bool=False, fields:Union[List[Union[CkanField,dict]], OrderedDict[str,Union[CkanField,dict]]]=None,
369
+ fields_type_override:Dict[str,str]=None, field_description:Dict[str,str]=None,
370
+ fields_label:Dict[str,str]=None) -> Tuple[bool, List[dict], Union[dict,bool,None]]:
371
+ """
372
+ Function helper call to API datastore_create in order to update the parameters of some fields. The initial field
373
+ configuration is taken from the mapped information or requested.
374
+ Typically, this could be used to enforce a data type on a field. In this case, it is required to resubmit the
375
+ resource data with the API resource_patch.
376
+ The field_update argument would be e.g. field_update={"id": {"info": {"type_override": "text"}}}
377
+ This is equivalent to the option field_type_override={"id": "text"}
378
+
379
+ __NB__: it is not possible to rename a field after creation through the API. To do this, the change must be done in the database.
380
+
381
+ :param resource_id: resource id
382
+ :param fields_update: dictionary of field id and properties to change. The update of the property dictionary is
383
+ recursive, ensuring only the fields appearing in the update are changed.
384
+ This field can be overridden by the values given in field_type_override, field_description, or field_label.
385
+ :param fields_type_override: argument to simplify the edition of the info.type_override value for each field id.
386
+ :param field_description: argument to simplify the edition of the info.notes value for each field id
387
+ :param fields_label: argument to simplify the edition of the info.label value for each field id
388
+ :param only_if_needed: Cancels the request if the changes do not affect the current configuration
389
+ :return: a tuple (update_needed, fields_new, update_dict)
390
+ """
391
+ update_needed, fields_update = self.datastore_field_patch_dict(fields_merge=fields_merge, fields_update=fields_update,
392
+ fields_type_override=fields_type_override,
393
+ fields_description=field_description, fields_label=fields_label,
394
+ datastore_merge=True, resource_id=resource_id, return_list=True)
395
+ if update_needed or not only_if_needed:
396
+ return update_needed, fields_update, self.datastore_create(resource_id, fields=fields_update)
397
+ else:
398
+ return update_needed, fields_update, None
399
+
400
+
401
+ ## Data deletions ------------------
402
+ def _api_datastore_delete(self, resource_id:str, *, params:dict=None,
403
+ force:bool=None) -> dict:
404
+ """
405
+ Function to delete rows an api_datastore using api_datastore_upsert.
406
+ If no filter is given, the whole database will be erased.
407
+ This function is private and should not be called directly.
408
+
409
+ :param resource_id:
410
+ :param params:
411
+ :param force: set to True to edit a read-only resource. If not provided, this is overridden by self.default_force
412
+ :return:
413
+ """
414
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
415
+ if params is None: params = {}
416
+ if force is None: force = self.params.default_force
417
+ params["resource_id"] = resource_id
418
+ params["force"] = force
419
+ response = self._api_action_request(f"datastore_delete", method=RequestType.Post, json=params)
420
+ if response.success:
421
+ return response.result
422
+ elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
423
+ resource_info = self.resource_show(resource_id) # will trigger another error if resource does not exist
424
+ raise CkanNotFoundError(self, "DataStore", response)
425
+ else:
426
+ raise response.default_error(self)
427
+
428
+ def datastore_delete_rows(self, resource_id:str, filters:dict, *, params:dict=None,
429
+ force:bool=None, calculate_record_count:bool=True) -> dict:
430
+ """
431
+ Function to delete certain rows a DataStore using _api_datastore_delete.
432
+ The filters are mandatory here.
433
+ If not given, the whole database would be erased. Prefer using datastore_clear for this usage.
434
+
435
+ :see: _api_datastore_delete()
436
+ :param resource_id:
437
+ :param filters:
438
+ :param params:
439
+ :param force: set to True to edit a read-only resource. If not provided, this is overridden by self.default_force
440
+ :param calculate_record_count:
441
+ :return:
442
+ """
443
+ if params is None: params = {}
444
+ if force is None: force = self.params.default_force
445
+ params["filters"] = filters
446
+ params["calculate_record_count"] = calculate_record_count
447
+ assert_or_raise(len(filters) > 0, InvalidParameterError("filters"))
448
+ return self._api_datastore_delete(resource_id, params=params, force=force)
449
+
450
+ def datastore_clear(self, resource_id:str, *, error_not_found:bool=True, params:dict=None,
451
+ force:bool=None, bypass_admin:bool=False) -> Union[dict,None]:
452
+ """
453
+ Function to clear data in a DataStore using _api_datastore_delete. Requires enable_admin=True.
454
+ This implementation adds the option error_not_found. If set to False, no error is raised if the resource is found by the datastore is not.
455
+
456
+ :see: _api_datastore_delete()
457
+ :param resource_id:
458
+ :param error_not_found: if False, does not raise an exception if the resource exists but there is not datastore
459
+ :param params:
460
+ :param force: set to True to edit a read-only resource. If not provided, this is overridden by self.default_force
461
+ :param bypass_admin: option to bypass check of enable_admin
462
+ :return:
463
+ """
464
+ if not bypass_admin:
465
+ assert_or_raise(self.params.enable_admin, AdminFeatureLockedError())
466
+ if params is None: params = {}
467
+ if force is None: force = self.params.default_force
468
+ try:
469
+ result = self._api_datastore_delete(resource_id, params=params, force=force)
470
+ self.map._record_datastore_delete(resource_id)
471
+ return result
472
+ except CkanNotFoundError as e:
473
+ if not error_not_found and e.object_type == "DataStore":
474
+ msg = f"Tried to delete DataStore of existing resource_id {resource_id} but there is no DataStore."
475
+ if self.params.verbose_request:
476
+ print(msg)
477
+ else:
478
+ raise e from e
479
+
480
+ def _api_resource_delete(self, resource_id:str, *, params:dict=None,
481
+ force:bool=None, bypass_admin:bool=False) -> dict:
482
+ """
483
+ Function to delete a resource. This fully removes the resource, definitively. Requires enable_admin=True.
484
+
485
+ :param resource_id:
486
+ :param params:
487
+ :param force: set to True to edit a read-only resource. If not provided, this is overridden by self.default_force
488
+ :return:
489
+ """
490
+ if not bypass_admin:
491
+ assert_or_raise(self.params.enable_admin, AdminFeatureLockedError())
492
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
493
+ if params is None: params = {}
494
+ if force is None: force = self.params.default_force
495
+ params["id"] = resource_id
496
+ # params["force"] = force
497
+ response = self._api_action_request(f"resource_delete", method=RequestType.Post, json=params)
498
+ if response.success:
499
+ # update map
500
+ self.map._record_resource_delete(resource_id)
501
+ return response.result
502
+ else:
503
+ raise response.default_error(self)
504
+
505
+ def resource_delete(self, resource_id:str, *, params:dict=None,
506
+ force:bool=None, bypass_admin:bool=False) -> dict:
507
+ # function alias
508
+ return self._api_resource_delete(resource_id, params=params, force=force, bypass_admin=bypass_admin)
509
+
510
+
511
+ ## Datastore creation ------------------
512
+ @staticmethod
513
+ def default_resource_view(resource_format:str) -> Tuple[str,str]:
514
+ """
515
+ Definition of the default resource view based on the resource format.
516
+
517
+ :param resource_format:
518
+ :return:
519
+ """
520
+ if resource_format is None:
521
+ resource_format = "unknown"
522
+ resource_format = resource_format.lower()
523
+ if resource_format == "csv":
524
+ title = "Table"
525
+ view_type = "recline_view" # Data Explorer
526
+ elif resource_format in {"json", "txt", "py"}:
527
+ title = "Text"
528
+ view_type = "text_view"
529
+ elif resource_format in {"png", "svg"}:
530
+ title = "Image"
531
+ view_type = "image_view"
532
+ else:
533
+ title = None
534
+ view_type = None
535
+ return title, view_type
536
+
537
+ def _api_resource_view_create(self, resource_id:str, title:Union[str,List[str]]=None, *,
538
+ view_type:Union[str,List[str]]=None, params:dict=None) -> List[CkanViewInfo]:
539
+ """
540
+ API call to resource_view_create.
541
+
542
+ title and view_type must have same length if specified as lists.
543
+
544
+ :param resource_id: resource id
545
+ :param title: Title of the resource
546
+ :param view_type: Type of view, typically recline_view for Data Explorer
547
+ :param params:
548
+ :return:
549
+ """
550
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
551
+ if params is None: params = {}
552
+ if title is None or view_type is None:
553
+ raise ValueError("title and view_type must be specified together")
554
+ if isinstance(view_type, str):
555
+ view_type = [view_type]
556
+ if isinstance(title, str):
557
+ title = [title]
558
+ assert(len(title) == len(view_type))
559
+ params["resource_id"] = resource_id
560
+ view_info_list = []
561
+ for title_selected, view_type_selected in zip(title, view_type):
562
+ params["title"] = title_selected
563
+ params["view_type"] = view_type_selected
564
+ response = self._api_action_request(f"resource_view_create", method=RequestType.Post, json=params)
565
+ if response.success:
566
+ view_info = CkanViewInfo(response.result)
567
+ self.map._update_view_info(view_info)
568
+ view_info_list.append(view_info.copy())
569
+ else:
570
+ raise response.default_error(self)
571
+ return view_info_list
572
+
573
+ def resource_view_create(self, resource_id:str, title:Union[str,List[str]]=None, *,
574
+ view_type:Union[str,List[str]]=None, params:dict=None,
575
+ error_no_default_view_type:bool=False, cancel_if_exists:bool=True) -> List[CkanViewInfo]:
576
+ """
577
+ Encapsulation of the API resource_view_create. If no resource view is provided to create (None),
578
+ the function looks up the default view defined in default_resource_view.
579
+ This function also looks at the existing views and cancels the creation of those which have the same title.
580
+ If provided as a list, title and view_type must have same length.
581
+
582
+ :param resource_id:
583
+ :param title:
584
+ :param view_type:
585
+ :param params:
586
+ :param error_no_default_view_type:
587
+ :param cancel_if_exists: option to cancel an existing view if it exists (based on the title)
588
+ :return:
589
+ """
590
+ if title is None and view_type is None:
591
+ resource_info = self.get_resource_info_or_request_of_id(resource_id)
592
+ resource_format = resource_info.format
593
+ title, view_type = self.default_resource_view(resource_format)
594
+ if title is None:
595
+ title = []
596
+ view_type = []
597
+ msg = NoDefaultView(resource_format)
598
+ if error_no_default_view_type:
599
+ raise(msg)
600
+ else:
601
+ warn(str(msg))
602
+ return []
603
+ if isinstance(view_type, str):
604
+ view_type = [view_type]
605
+ if isinstance(title, str):
606
+ title = [title]
607
+ assert(len(title) == len(view_type))
608
+ if cancel_if_exists:
609
+ resource_views_by_title = {view_info.title: view_info for view_info in self.get_resource_view_list_or_request(resource_id)}
610
+ i_rm = []
611
+ for i, view_title in enumerate(title):
612
+ if view_title in resource_views_by_title.keys():
613
+ i_rm.append(i)
614
+ for i in reversed(i_rm):
615
+ title.pop(i)
616
+ view_type.pop(i)
617
+ if len(title) > 0:
618
+ return self._api_resource_view_create(resource_id, title, view_type=view_type, params=params)
619
+ else:
620
+ return []
621
+
622
+ def _api_resource_create(self, package_id:str, name:str, *, format:str=None, description:str=None,
623
+ state:CkanState=None,
624
+ df:pd.DataFrame=None, file_path:str=None, url:str=None, files=None,
625
+ payload:Union[bytes, io.BufferedIOBase]=None, payload_name:str=None,
626
+ params:dict=None) -> CkanResourceInfo:
627
+ """
628
+ API call to resource_create.
629
+
630
+ :see: _api_resource_patch
631
+ :see: resource_create
632
+ :param package_id:
633
+ :param name:
634
+ :param format:
635
+ :param url: url of the resource to replace resource
636
+ :param params: additional parameters such as resource_type can be set
637
+
638
+ For file uploads, the following parameters are taken, by order of priority:
639
+ See upload_prepare_requests_files_arg for an example of formatting.
640
+
641
+ :param files: files pass through argument to the requests.post function. Use to send other data formats.
642
+ :param payload: bytes to upload as a file
643
+ :param payload_name: name of the payload to use (associated with the payload argument) - this determines the format recognized in CKAN viewers.
644
+ :param file_path: path of the file to transmit (binary and text files are supported here)
645
+ :param df: pandas DataFrame to replace resource
646
+
647
+ :return:
648
+ """
649
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
650
+ if params is None: params = {}
651
+ params["package_id"] = package_id
652
+ params["name"] = name
653
+ if format is not None:
654
+ params["format"] = format
655
+ if state is not None:
656
+ params["state"] = str(state)
657
+ if description is not None:
658
+ params["description"] = description
659
+ files = upload_prepare_requests_files_arg(files=files, file_path=file_path, df=df, payload=payload, payload_name=payload_name)
660
+ if url is not None:
661
+ params["url"] = url
662
+ assert(files is None)
663
+ if files is not None:
664
+ response = self._api_action_request(f"resource_create", method=RequestType.Post,
665
+ files=files, data=params)
666
+ else:
667
+ response = self._api_action_request(f"resource_create", method=RequestType.Post, json=params)
668
+ if response.success:
669
+ # update map
670
+ resource_info = CkanResourceInfo(response.result)
671
+ self.map._record_resource_create(resource_info)
672
+ return resource_info.copy()
673
+ else:
674
+ raise response.default_error(self)
675
+
676
+ def datastore_default_alias(self, resource_name:str, package_name:str, *,
677
+ query_names:bool=True, error_not_found:bool=True) -> str:
678
+ if query_names:
679
+ package_info = self.get_package_info_or_request(package_name)
680
+ resource_info = self.get_resource_info_or_request(resource_name, package_name, error_not_found=error_not_found)
681
+ if resource_info is not None:
682
+ return CkanApiManage.datastore_default_alias_of_info(resource_info, package_info)
683
+ return CkanApiManage.datastore_default_alias_of_names(resource_name, package_name)
684
+
685
+ @staticmethod
686
+ def datastore_default_alias_of_info(resource_info:CkanResourceInfo, package_info:CkanPackageInfo) -> str:
687
+ package_name= package_info.name
688
+ resource_name = resource_info.name
689
+ return CkanApiManage.datastore_default_alias_of_names(resource_name, package_name)
690
+
691
+ @staticmethod
692
+ def datastore_default_alias_of_names(resource_name:str, package_name:str) -> str:
693
+ resource_varname = clean_table_name(resource_name.lower().strip())
694
+ alias_name = package_name + default_alias_package_resource_sep + resource_varname
695
+ if alias_name_max_len is not None and len(alias_name) > alias_name_max_len:
696
+ alias_hash = hashlib.sha1(alias_name.encode("utf-8")).hexdigest()
697
+ if default_alias_hash_replace:
698
+ return "alias" + default_alias_hash_sep + alias_hash
699
+ else:
700
+ alias_name_truncated = alias_name[:alias_name_max_len-default_alias_hash_len-len(default_alias_hash_sep)] + default_alias_hash_sep + alias_hash[:default_alias_hash_len]
701
+ return alias_name_truncated
702
+ else:
703
+ return alias_name
704
+
705
+ def resource_create(self, package_id:str, name:str, *, format:str=None, description:str=None, state:CkanState=None,
706
+ params:dict=None,
707
+ url:str=None,
708
+ files=None, file_path:str=None, df:pd.DataFrame=None,
709
+ payload:Union[bytes, io.BufferedIOBase]=None, payload_name:str=None,
710
+ cancel_if_exists:bool=True, update_if_exists:bool=False, reupload:bool=False, create_default_view:bool=True, auto_submit:bool=False,
711
+ datastore_create:bool=False, records:Union[dict, List[dict], pd.DataFrame]=None, fields:List[dict]=None,
712
+ primary_key: Union[str, List[str]] = None, indexes: Union[str, List[str]] = None,
713
+ aliases: Union[str, List[str]] = None, data_cleaner:CkanDataCleanerABC=None) -> CkanResourceInfo:
714
+ """
715
+ Proxy to API call resource_create verifying if a resource with the same name already exists and adding the default view.
716
+
717
+ :param package_id:
718
+ :param name:
719
+ :param format:
720
+ :param params:
721
+ :param cancel_if_exists: check if a resource with the same name already exists in the package on CKAN server
722
+ If a resource with the same name already exists, the info for this resource is returned
723
+ :param update_if_exists: If a resource with the same name already exists (and cancel_if_exists=True), a call to resource_patch is performed.
724
+ :param reupload: re-upload the resource if a resource with the same name already exists and cancel_if_exists=True and update_if_exists=True
725
+ :param create_default_view:
726
+
727
+ For file uploads, the following parameters are taken, by order of priority:
728
+ See upload_prepare_requests_files_arg for an example of formatting.
729
+
730
+ :param files: files pass through argument to the requests.post function. Use to send other data formats.
731
+ :param payload: bytes to upload as a file
732
+ :param payload_name: name of the payload to use (associated with the payload argument) - this determines the format recognized in CKAN viewers.
733
+ :param file_path: path of the file to transmit (binary and text files are supported here)
734
+ :param df: pandas DataFrame to replace resource
735
+
736
+ :return:
737
+ """
738
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
739
+ has_file_data = (files is not None or df is not None or file_path is not None or payload is not None)
740
+ has_records = records is not None
741
+ delete_previous_datastore = (has_file_data or has_records) and reupload and datastore_create
742
+ if name is None or name == "":
743
+ raise CkanMandatoryArgumentError("resource_create", "name")
744
+ if df is not None and datastore_id_col in df.columns:
745
+ msg = f"You cannot initiate a resource with an {datastore_id_col} column if you are creating a DataStore. You risk to create a conflict error."
746
+ warn(msg)
747
+ if cancel_if_exists:
748
+ self.map_resources(package_id, only_missing=True)
749
+ if name in self.map.packages[package_id].resources_id_index.keys():
750
+ resource_info = self.map.get_resource_info(name, package_id)
751
+ resource_id = resource_info.id
752
+ resource_info.newly_created = False
753
+ resource_info.newly_updated = False
754
+ delete_previous_datastore = delete_previous_datastore # and self.resource_is_datastore(resource_id)
755
+ if update_if_exists:
756
+ if has_file_data and not reupload:
757
+ # cancel reupload if not enabled
758
+ has_file_data = False
759
+ df = None
760
+ file_path = None
761
+ files = None
762
+ payload = None
763
+ if has_records and not reupload:
764
+ records = None
765
+ has_records = False
766
+ if delete_previous_datastore:
767
+ # if there already was a datastore, clear it
768
+ self.datastore_clear(resource_id, error_not_found=False, bypass_admin=True)
769
+ resource_info.update(self.resource_patch(resource_id, name=name, format=format, state=state,
770
+ description=description, url=url,
771
+ df=df, file_path=file_path, files=files,
772
+ payload=payload, payload_name=payload_name))
773
+ if create_default_view:
774
+ view_info_list = self.resource_view_create(resource_info.id)
775
+ resource_info.update_view(view_info_list)
776
+ if has_file_data:
777
+ resource_info.newly_updated = True
778
+ if auto_submit:
779
+ self.datastore_submit(resource_info.id)
780
+ if datastore_create or delete_previous_datastore:
781
+ info = self.datastore_create(resource_info.id, records=records, fields=fields, primary_key=primary_key,
782
+ indexes=indexes, aliases=aliases, delete_previous=False, data_cleaner=data_cleaner)
783
+ return resource_info
784
+ else:
785
+ return resource_info
786
+ # here: the resource does not exist => create a new one
787
+ resource_info = self._api_resource_create(package_id, name, format=format, description=description, state=state,
788
+ url=url,
789
+ files=files, file_path=file_path, df=df,
790
+ payload=payload, payload_name=payload_name, params=params)
791
+ resource_info.newly_created = True
792
+ resource_info.newly_updated = False
793
+ if create_default_view:
794
+ view_info_list = self.resource_view_create(resource_info.id)
795
+ resource_info.update_view(view_info_list)
796
+ if auto_submit and has_file_data:
797
+ self.datastore_submit(resource_info.id)
798
+ if datastore_create:
799
+ info = self.datastore_create(resource_info.id, records=records, fields=fields, primary_key=primary_key,
800
+ indexes=indexes, aliases=aliases, delete_previous=False, data_cleaner=data_cleaner)
801
+ return resource_info
802
+
803
+ def _api_datastore_create(self, resource_id:str, *, records:Union[dict, List[dict], pd.DataFrame]=None,
804
+ fields:List[Union[dict, CkanField]]=None,
805
+ primary_key:Union[str, List[str]]=None, indexes:Union[str, List[str]]=None,
806
+ aliases: Union[str, List[str]]=None,
807
+ params:dict=None,force:bool=None) -> dict:
808
+ """
809
+ API call to datastore_create.
810
+ This endpoint also supports altering tables, aliases and indexes and bulk insertion.
811
+
812
+ :param resource_id: resource id
813
+ :param records:
814
+ :param fields:
815
+ :param primary_key:
816
+ :param indexes:
817
+ :param params:
818
+ :param force:
819
+ :return:
820
+ """
821
+ # assert_or_raise(self.enable_admin, AdvancedFeatureLockedError())
822
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
823
+ if params is None: params = {}
824
+ if force is None: force = self.params.default_force
825
+ params["resource_id"] = resource_id
826
+ params["force"] = force
827
+ if primary_key is not None:
828
+ if isinstance(primary_key, str):
829
+ primary_key = primary_key.split(ckan_tags_sep)
830
+ params["primary_key"] = primary_key
831
+ if indexes is not None:
832
+ if isinstance(indexes, str):
833
+ indexes = indexes.split(ckan_tags_sep)
834
+ params["indexes"] = indexes
835
+ if aliases is not None:
836
+ if isinstance(aliases, str):
837
+ aliases = aliases.split(ckan_tags_sep)
838
+ params["aliases"] = aliases
839
+ if records is not None:
840
+ if isinstance(records, pd.DataFrame):
841
+ params["records"] = records.to_dict(orient='records')
842
+ else:
843
+ params["records"] = records
844
+ if fields is not None:
845
+ # list of dicts
846
+ fields_list_dict = [field_info.to_ckan_dict() if isinstance(field_info, CkanField) else field_info for field_info in fields]
847
+ params["fields"] = fields_list_dict
848
+ data_payload, json_headers = json_encode_params(params)
849
+ response = self._api_action_request(f"datastore_create", method=RequestType.Post,
850
+ data=data_payload, headers=json_headers)
851
+ # response = self._api_action_request(f"datastore_create", method=RequestType.Post, json=params)
852
+ if response.success:
853
+ return response.result
854
+ else:
855
+ raise response.default_error(self)
856
+
857
+ def datastore_create(self, resource_id:str, *, delete_previous:bool=False, bypass_admin:bool=False,
858
+ records:Union[dict, List[dict], pd.DataFrame]=None,
859
+ fields:List[Union[dict,CkanField]]=None,
860
+ primary_key:Union[str, List[str]]=None, indexes:Union[str, List[str]]=None,
861
+ aliases: Union[str, List[str]]=None,
862
+ params:dict=None,force:bool=None, data_cleaner:CkanDataCleanerABC=None) -> dict:
863
+ """
864
+ Encapsulation of the datastore_create API call.
865
+ This function can optionally clear the DataStore before creating it.
866
+
867
+ :param resource_id:
868
+ :param delete_previous: option to delete the previous datastore, if exists (default:False)
869
+ :param records:
870
+ :param fields:
871
+ :param primary_key:
872
+ :param indexes:
873
+ :param params:
874
+ :param force:
875
+ :return:
876
+ """
877
+ if delete_previous:
878
+ self.datastore_clear(resource_id, error_not_found=False, bypass_admin=bypass_admin)
879
+ if data_cleaner is None:
880
+ data_cleaner = self.data_cleaner_upload
881
+ if data_cleaner is not None:
882
+ if not delete_previous:
883
+ fields_for_cleaner_dict = CkanApiManage.datastore_field_dict(fields=fields)
884
+ fields_for_cleaner = OrderedDict([(field_name, CkanField.from_ckan_dict(field_dict)) for field_name, field_dict in fields_for_cleaner_dict.items()])
885
+ else:
886
+ fields_for_cleaner = None
887
+ records = data_cleaner.clean_records(records, known_fields=fields_for_cleaner, inplace=True)
888
+ if len(records) > 0:
889
+ if primary_key is None:
890
+ primary_key = data_cleaner.field_suggested_primary_key
891
+ if indexes is None:
892
+ indexes = list(data_cleaner.field_suggested_index)
893
+ fields = data_cleaner.merge_field_changes(fields)
894
+ if self.params.default_alias_enforce:
895
+ resource_info = self.get_resource_info_or_request_of_id(resource_id)
896
+ package_info = self.get_package_info_or_request(resource_info.package_id)
897
+ if aliases is None and not delete_previous:
898
+ datastore_info = self.get_datastore_info_or_request_of_id(resource_id, error_not_found=False)
899
+ if datastore_info is not None:
900
+ aliases = datastore_info.aliases # when aliases argument is None, the aliases are not modified => keep existing aliases from server
901
+ default_alias_name = self.datastore_default_alias_of_info(resource_info, package_info)
902
+ if aliases is None:
903
+ aliases = [default_alias_name]
904
+ else:
905
+ aliases.append(default_alias_name)
906
+ aliases = list(set(aliases)) # keep unique values
907
+ return self._api_datastore_create(resource_id, records=records, fields=fields,
908
+ primary_key=primary_key, indexes=indexes, aliases=aliases,
909
+ params=params, force=force)
910
+
911
+
912
+ ## Package creation/deletion/edit ------------------
913
+ def _api_package_patch(self, package_id: str, package_name:str=None, private:bool=None, *, title:str=None, notes:str=None, owner_org:str=None,
914
+ state:Union[CkanState,str]=None, license_id:str=None, tags:List[str]=None, tags_list_dict:List[Dict[str, str]]=None,
915
+ url:str=None, version:str=None, custom_fields:dict=None,
916
+ author:str=None, author_email:str=None, maintainer:str=None, maintainer_email:str=None,
917
+ params:dict=None) -> CkanPackageInfo:
918
+ """
919
+ API call to package_patch. Use to change the properties of a package.
920
+ This method is preferred to package_update which requires to resend the full package configuration.
921
+ (API doc for package_update: It is recommended to call ckanapi_harvesters.logic.action.get.package_show(),
922
+ make the desired changes to the result, and then call package_update() with it.)
923
+
924
+ :param package_id:
925
+ :param package_name:
926
+ :param private:
927
+ :param title:
928
+ :param notes:
929
+ :param owner_org:
930
+ :param state:
931
+ :param license_id:
932
+ :param params:
933
+ :return:
934
+ """
935
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
936
+ if params is None: params = {}
937
+ params["id"] = package_id
938
+ params["private"] = private
939
+ if owner_org is None and use_ckan_owner_org_as_default:
940
+ owner_org = self.owner_org
941
+ if owner_org is not None:
942
+ params["owner_org"] = owner_org
943
+ if package_name is not None:
944
+ assert (2 <= len(package_name) <= 100 and re.match(ckan_package_name_re, package_name))
945
+ params["name"] = package_name
946
+ if title is not None:
947
+ params["title"] = title
948
+ if notes is not None:
949
+ params["notes"] = notes
950
+ if url is not None:
951
+ params["url"] = url
952
+ if version is not None:
953
+ params["version"] = version
954
+ if tags is not None:
955
+ if tags_list_dict is None:
956
+ tags_list_dict = []
957
+ for tag in tags:
958
+ tags_list_dict.append({"name": tag})
959
+ if tags_list_dict is not None:
960
+ params["tags"] = tags_list_dict
961
+ if custom_fields is not None:
962
+ params["extras"] = [{"key": key, "value": value if value is not None else ""} for key, value in custom_fields.items()]
963
+ if author is not None:
964
+ params["author"] = author
965
+ if author_email is not None:
966
+ params["author_email"] = author_email
967
+ if maintainer is not None:
968
+ params["maintainer"] = maintainer
969
+ if maintainer_email is not None:
970
+ params["maintainer_email"] = maintainer_email
971
+ if state is not None:
972
+ if isinstance(state, str):
973
+ params["state"] = state
974
+ else:
975
+ params["state"] = str(state)
976
+ if license_id is not None:
977
+ params["license_id"] = license_id
978
+ response = self._api_action_request(f"package_patch", method=RequestType.Post, json=params)
979
+ if response.success:
980
+ # update map
981
+ pkg_info = CkanPackageInfo(response.result)
982
+ self.map._record_package_update(pkg_info)
983
+ return pkg_info.copy()
984
+ else:
985
+ raise response.default_error(self)
986
+
987
+ def package_patch(self, package_id: str, package_name:str=None, private:bool=None, *, title:str=None, notes:str=None, owner_org:str=None,
988
+ state:Union[CkanState,str]=None, license_id:str=None, tags:List[str]=None, tags_list_dict:List[Dict[str, str]]=None,
989
+ url:str=None, version:str=None, custom_fields:dict=None,
990
+ author:str=None, author_email:str=None, maintainer:str=None, maintainer_email:str=None,
991
+ params:dict=None) -> CkanPackageInfo:
992
+ # function alias
993
+ return self._api_package_patch(package_id=package_id, package_name=package_name, private=private,
994
+ title=title, notes=notes, owner_org=owner_org, state=state,
995
+ license_id=license_id, tags=tags, tags_list_dict=tags_list_dict, url=url, version=version,
996
+ custom_fields=custom_fields, author=author, author_email=author_email,
997
+ maintainer=maintainer, maintainer_email=maintainer_email,
998
+ params=params)
999
+
1000
+ def package_state_change(self, package_id:str, state:CkanState) -> CkanPackageInfo:
1001
+ """
1002
+ Change package state using the package_patch API.
1003
+
1004
+ :param package_id:
1005
+ :param state:
1006
+ :return:
1007
+ """
1008
+ return self.package_patch(package_id, state=state)
1009
+
1010
+ def _api_package_create(self, name:str, private:bool, *, title:str=None, notes:str=None, owner_org:str=None,
1011
+ state: Union[CkanState, str] = None, license_id: str = None, tags: List[str] = None, tags_list_dict:List[Dict[str, str]]=None,
1012
+ url: str = None, version: str = None, custom_fields: dict = None,
1013
+ author: str = None, author_email: str = None,
1014
+ maintainer: str = None, maintainer_email: str = None,
1015
+ params:dict=None) -> CkanPackageInfo:
1016
+ """
1017
+ API call to package_create.
1018
+
1019
+ :param name:
1020
+ :param private:
1021
+ :param title:
1022
+ :param notes:
1023
+ :param owner_org:
1024
+ :param state:
1025
+ :param license_id:
1026
+ :param tags:
1027
+ :param params:
1028
+ :return:
1029
+ """
1030
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
1031
+ if params is None: params = {}
1032
+ assert(2 <= len(name) <= 100 and re.match(ckan_package_name_re, name))
1033
+ params["name"] = name
1034
+ params["private"] = private
1035
+ if owner_org is None and use_ckan_owner_org_as_default:
1036
+ owner_org = self.owner_org
1037
+ if owner_org is not None:
1038
+ params["owner_org"] = owner_org
1039
+ if title is not None:
1040
+ params["title"] = title
1041
+ if notes is not None:
1042
+ params["notes"] = notes
1043
+ if url is not None:
1044
+ params["url"] = url
1045
+ if version is not None:
1046
+ params["version"] = version
1047
+ if tags is not None:
1048
+ if tags_list_dict is None:
1049
+ tags_list_dict = []
1050
+ for tag in tags:
1051
+ tags_list_dict.append({"name": tag})
1052
+ if tags_list_dict is not None:
1053
+ params["tags"] = tags_list_dict
1054
+ if custom_fields is not None:
1055
+ params["extras"] = [{"key": key, "value": value if value is not None else ""} for key, value in custom_fields.items()]
1056
+ if author is not None:
1057
+ params["author"] = author
1058
+ if author_email is not None:
1059
+ params["author_email"] = author_email
1060
+ if maintainer is not None:
1061
+ params["maintainer"] = maintainer
1062
+ if maintainer_email is not None:
1063
+ params["maintainer_email"] = maintainer_email
1064
+ if state is not None:
1065
+ if isinstance(state, str):
1066
+ params["state"] = state
1067
+ else:
1068
+ params["state"] = str(state)
1069
+ if license_id is not None:
1070
+ params["license_id"] = license_id
1071
+ response = self._api_action_request(f"package_create", method=RequestType.Post, json=params)
1072
+ if response.success:
1073
+ # update map
1074
+ pkg_info = CkanPackageInfo(response.result)
1075
+ self.map._record_package_create(pkg_info)
1076
+ return pkg_info.copy()
1077
+ else:
1078
+ raise response.default_error(self)
1079
+
1080
+ def package_create(self, package_name:str, private:bool=True, *, title:str=None, notes:str=None, owner_org:str=None,
1081
+ state: Union[CkanState, str] = None, license_id: str = None, tags: List[str] = None, tags_list_dict:List[Dict[str, str]]=None,
1082
+ url: str = None, version: str = None, custom_fields: dict = None,
1083
+ author: str = None, author_email: str = None,
1084
+ maintainer: str = None, maintainer_email: str = None,
1085
+ params:dict=None, cancel_if_exists:bool=True, update_if_exists=True) -> CkanPackageInfo:
1086
+ """
1087
+ Helper function to create a new package. This first checks if the package already exists.
1088
+
1089
+ :see: _api_package_create()
1090
+ :param package_name:
1091
+ :param private:
1092
+ :param title:
1093
+ :param notes:
1094
+ :param owner_org:
1095
+ :param license_id:
1096
+ :param state:
1097
+ :param params:
1098
+ :param cancel_if_exists:
1099
+ :param update_if_exists:
1100
+ :return:
1101
+ """
1102
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
1103
+ if package_name is None or package_name == "":
1104
+ raise CkanMandatoryArgumentError("package_create", "package_name")
1105
+ self.map_resources(package_name, only_missing=True, error_not_found=False)
1106
+ pkg_info = self.map.get_package_info(package_name, error_not_mapped=False)
1107
+ if pkg_info is not None and cancel_if_exists:
1108
+ if update_if_exists:
1109
+ pkg_info = self.package_patch(pkg_info.id, package_name, private=private, title=title, notes=notes,
1110
+ owner_org=owner_org, state=state, license_id=license_id, tags=tags,
1111
+ tags_list_dict=tags_list_dict,
1112
+ url=url, version=version, custom_fields=custom_fields,
1113
+ author=author, author_email=author_email,
1114
+ maintainer=maintainer, maintainer_email=maintainer_email,
1115
+ params=params)
1116
+ pkg_info.newly_created = False
1117
+ return pkg_info
1118
+ else:
1119
+ pkg_info = self._api_package_create(package_name, private, title=title, notes=notes,
1120
+ owner_org=owner_org, state=state, license_id=license_id, tags=tags,
1121
+ tags_list_dict=tags_list_dict,
1122
+ url=url, version=version, custom_fields=custom_fields,
1123
+ author=author, author_email=author_email,
1124
+ maintainer=maintainer, maintainer_email=maintainer_email,
1125
+ params=params)
1126
+ pkg_info.newly_created = True
1127
+ return pkg_info
1128
+
1129
+ def _api_package_delete(self, package_id:str,
1130
+ *, params:dict=None) -> dict:
1131
+ """
1132
+ API call to package_delete.
1133
+ This marks the package as deleted and does not remove data.
1134
+
1135
+ :param package_id:
1136
+ :param params:
1137
+ :return:
1138
+ """
1139
+ assert_or_raise(self.params.enable_admin, AdminFeatureLockedError())
1140
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
1141
+ if params is None: params = {}
1142
+ params["id"] = package_id
1143
+ response = self._api_action_request(f"package_delete", method=RequestType.Post, json=params)
1144
+ if response.success:
1145
+ # update map
1146
+ self.map._record_package_delete_state(package_id)
1147
+ return response.result
1148
+ else:
1149
+ raise response.default_error(self)
1150
+
1151
+ def _api_package_resource_reorder(self, package_id:str, resource_ids: List[str],
1152
+ *, params:dict=None) -> dict:
1153
+ """
1154
+ API call to package_resource_reorder. Reorders resources within a package.
1155
+ Reorder resources against datasets. If only partial resource ids are supplied then these are assumed to be first and the other resources will stay in their original order.
1156
+
1157
+ :param package_id: the id or name of the package to update
1158
+ :param resource_ids: a list of resource ids in the order needed
1159
+ :param params:
1160
+ :return:
1161
+ """
1162
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
1163
+ if params is None: params = {}
1164
+ params["id"] = package_id
1165
+ params["order"] = resource_ids
1166
+ response = self._api_action_request(f"package_resource_reorder", method=RequestType.Post, json=params)
1167
+ if response.success:
1168
+ return response.result
1169
+ else:
1170
+ raise response.default_error(self)
1171
+
1172
+ package_resource_reorder = _api_package_resource_reorder
1173
+
1174
+ def _api_dataset_purge(self, package_id:str,
1175
+ *, params:dict=None) -> dict:
1176
+ """
1177
+ API call to dataset_purge.
1178
+ This fully removes the package.
1179
+ This action is not reversible.
1180
+ It requires an admin account.
1181
+
1182
+ :param package_id:
1183
+ :param params:
1184
+ :return:
1185
+ """
1186
+ assert_or_raise(self.params.enable_admin, AdminFeatureLockedError())
1187
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
1188
+ if params is None: params = {}
1189
+ params["id"] = package_id
1190
+ response = self._api_action_request(f"dataset_purge", method=RequestType.Post, json=params)
1191
+ if response.success:
1192
+ # update map
1193
+ self.map._record_package_purge_removal(package_id)
1194
+ return response.result
1195
+ else:
1196
+ raise response.default_error(self)
1197
+
1198
+ def package_delete_resources(self, package_name:str, *, bypass_admin:bool=False):
1199
+ """
1200
+ Definitively delete all resources associated with the package.
1201
+
1202
+ :param package_name:
1203
+ :return:
1204
+ """
1205
+ package_info = self.map.get_package_info(package_name)
1206
+ resource_ids = [resource_info.id for resource_info in package_info.package_resources.values()]
1207
+ for resource_id in resource_ids:
1208
+ self.resource_delete(resource_id, bypass_admin=bypass_admin)
1209
+
1210
+ def package_delete(self, package_id:str, definitive_delete:bool=False, *, params:dict=None) -> dict:
1211
+ """
1212
+ Alias function for package removal. Either calls API package_delete to simply mark for deletion or dataset_purge
1213
+ to definitively delete the package.
1214
+
1215
+ :param package_id:
1216
+ :param definitive_delete: True: calls dataset_purge (action not reversible), False: calls API package_delete.
1217
+ :param params:
1218
+ :return:
1219
+ """
1220
+ if definitive_delete:
1221
+ return self._api_dataset_purge(package_id, params=params)
1222
+ else:
1223
+ return self._api_package_delete(package_id, params=params)
1224
+
1225
+