ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,192 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Basic parameters for the CkanApi class
5
+ """
6
+ from typing import Union, Tuple
7
+ import copy
8
+ from warnings import warn
9
+ import argparse
10
+
11
+ import requests
12
+ from requests.auth import AuthBase
13
+
14
+ from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
15
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import CkanIdFieldTreatment
16
+ from ckanapi_harvesters.auxiliary.ckan_configuration import allow_no_ca
17
+ from ckanapi_harvesters.auxiliary.ckan_errors import NoCAVerificationError
18
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
19
+
20
+ default_df_download_id_field_treatment: CkanIdFieldTreatment = CkanIdFieldTreatment.SetIndex
21
+
22
+
23
+ class CkanApiParamsBasic:
24
+ def __init__(self, *, proxies:Union[str,dict,ProxyConfig]=None,
25
+ ckan_headers:dict=None, http_headers:dict=None):
26
+ """
27
+ :param proxies: proxies to use for requests
28
+ :param ckan_headers: headers to use for requests, only to the CKAN server
29
+ :param http_headers: headers to use for requests, for all requests, including external requests and to the CKAN server
30
+ """
31
+ if ckan_headers is None: ckan_headers = {}
32
+ if http_headers is None: http_headers = {}
33
+ # HTTP parameters
34
+ self._proxy_config: ProxyConfig = ProxyConfig.from_str_or_config(proxies)
35
+ self.user_agent: Union[str,None] = None
36
+ self.http_headers: dict = http_headers
37
+ self.ckan_headers: dict = ckan_headers
38
+ self._ckan_ca: Union[bool, str, None] = True # use to specify a path to a custom CA certificate for the CKAN server (see also environment variable REQUESTS_CA_BUNDLE)
39
+ self._extern_ca: Union[bool, str, None] = True # use to specify a path to a custom CA certificate for external connexions (see also environment variable REQUESTS_CA_BUNDLE)
40
+ # debug parameters
41
+ self.store_last_response: bool = True
42
+ self.store_last_response_debug_info: bool = True
43
+ # modes
44
+ self.dry_run: bool = False # if True, no requests are sent to the server (for debugging purposes)
45
+ # limits
46
+ self.default_limit_list:Union[int,None] = 100 # limit the number of entries per list response (used as default value)
47
+ self.default_limit_read:Union[int,None] = 5000 # limit the number of entries per response (used as default value)
48
+ self.max_requests_count:int = 1000 # when automating multiple requests, the number of requests is limited by this parameter
49
+ # timeouts
50
+ self.multi_requests_timeout:float = 60 # when automating multiple requests, the total time elapsed is limited by this parameter (evaluated between each request)
51
+ self.multi_requests_time_between_requests:float = 0 # when automating multiple requests, wait this additional time (in seconds) between each request
52
+ self.requests_timeout:Union[float,None] = 100 # timeout per request sent to the requests module
53
+ # verbosity
54
+ self.verbose_multi_requests:bool = False
55
+ self.verbose_request:bool = False
56
+ self.verbose_request_error:bool = True
57
+ self.verbose_extra:bool = True
58
+
59
+ def copy(self, *, dest=None):
60
+ if dest is None:
61
+ dest = CkanApiParamsBasic()
62
+ dest._proxy_config = self._proxy_config.copy()
63
+ dest.user_agent = copy.deepcopy(self.user_agent)
64
+ dest.http_headers = copy.deepcopy(self.http_headers)
65
+ dest.ckan_headers = copy.deepcopy(self.ckan_headers)
66
+ dest._ckan_ca = self._ckan_ca
67
+ dest._extern_ca = self._extern_ca
68
+ dest.dry_run = self.dry_run
69
+ dest.store_last_response = self.store_last_response
70
+ dest.store_last_response_debug_info = self.store_last_response_debug_info
71
+ dest.default_limit_list = self.default_limit_list
72
+ dest.default_limit_read = self.default_limit_read
73
+ dest.max_requests_count = self.max_requests_count
74
+ dest.multi_requests_timeout = self.multi_requests_timeout
75
+ dest.multi_requests_time_between_requests = self.multi_requests_time_between_requests
76
+ dest.requests_timeout = self.requests_timeout
77
+ dest.verbose_multi_requests = self.verbose_multi_requests
78
+ dest.verbose_request = self.verbose_request
79
+ dest.verbose_request_error = self.verbose_request_error
80
+ dest.verbose_extra = self.verbose_extra
81
+ return dest
82
+
83
+ @property
84
+ def proxies(self) -> dict:
85
+ return self._proxy_config.proxies
86
+ @proxies.setter
87
+ def proxies(self, proxies:dict) -> None:
88
+ self._proxy_config.proxies = proxies
89
+ @property
90
+ def proxy_string(self) -> str:
91
+ return self._proxy_config.proxy_string
92
+ @proxy_string.setter
93
+ def proxy_string(self, proxies:str) -> None:
94
+ self._proxy_config.proxy_string = proxies
95
+ @property
96
+ def proxy_auth(self) -> Union[AuthBase, Tuple[str,str]]:
97
+ return self._proxy_config.proxy_auth
98
+ @proxy_auth.setter
99
+ def proxy_auth(self, proxy_auth:Union[AuthBase, Tuple[str,str]]) -> None:
100
+ self._proxy_config.proxy_auth = proxy_auth
101
+ @property
102
+ def ckan_ca(self) -> Union[bool,str,None]:
103
+ return self._ckan_ca
104
+ @ckan_ca.setter
105
+ def ckan_ca(self, ca_cert:Union[bool,str,None]) -> None:
106
+ if ca_cert is not None and isinstance(ca_cert, bool) and not ca_cert:
107
+ if not allow_no_ca:
108
+ raise NoCAVerificationError()
109
+ else:
110
+ msg = "CA verification has been disabled. Only allow in a local environment!"
111
+ warn(msg)
112
+ self._ckan_ca = ca_cert
113
+ @property
114
+ def extern_ca(self) -> Union[bool,str,None]:
115
+ return self._extern_ca
116
+ @extern_ca.setter
117
+ def extern_ca(self, ca_cert:Union[bool,str,None]) -> None:
118
+ if ca_cert is not None and isinstance(ca_cert, bool) and not ca_cert:
119
+ if not allow_no_ca:
120
+ raise NoCAVerificationError()
121
+ else:
122
+ msg = "CA verification has been disabled. Only allow in a local environment!"
123
+ warn(msg)
124
+ self._extern_ca = ca_cert
125
+
126
+ @staticmethod
127
+ def _setup_cli_ckan_parser__params(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
128
+ """
129
+ Define or add CLI arguments to initialize a CKAN API connection
130
+ parser help message:
131
+
132
+ CKAN API connection parameters initialization
133
+
134
+ :param parser: option to provide an existing parser to add the specific fields needed to initialize a CKAN API connection
135
+ :return:
136
+ """
137
+ if parser is None:
138
+ parser = argparse.ArgumentParser(description="CKAN API connection parameters initialization")
139
+ ProxyConfig._setup_cli_proxy_parser(parser) # add arguments --proxy --http-proxy --https-proxy --no-proxy --proxy-auth-file
140
+ parser.add_argument("--ckan-ca", type=str,
141
+ help="CKAN CA certificate location (.pem file)")
142
+ parser.add_argument("--extern-ca", type=str,
143
+ help="CA certificate location for extern connexions (.pem file)")
144
+ parser.add_argument("--user-agent", type=str,
145
+ help="User agent for HTTP requests")
146
+ parser.add_argument("-l", "--default-limit", type=int,
147
+ help="Default number of rows per request")
148
+ parser.add_argument("-v", "--verbose",
149
+ help="Option to set verbosity", action="store_true", default=False)
150
+ # parser.add_argument("--external-code", action="store_true",
151
+ # help="Enable external code execution for builder (only enable for trusted sources)")
152
+ return parser
153
+
154
+ def _cli_ckan_args_apply(self, args: argparse.Namespace, *, base_dir:str=None, error_not_found:bool=True,
155
+ default_proxies:dict=None, proxy_headers:dict=None) -> None:
156
+ """
157
+ Apply the arguments parsed by the argument parser defined by _setup_cli_ckan_parser
158
+
159
+ :param args:
160
+ :param base_dir: base directory to find the CKAN API key file, if a relative path is provided
161
+ (recommended: leave None to use cwd)
162
+ :param error_not_found: option to raise an exception if the CKAN API key file is not found
163
+ :param default_proxies: proxies used if proxies="default"
164
+ :param proxy_headers: headers used to access the proxies, generally for authentication
165
+ :return:
166
+ """
167
+ proxy_config = ProxyConfig.from_cli_args(args, base_dir=base_dir, error_not_found=error_not_found,
168
+ default_proxies=default_proxies, proxy_headers=proxy_headers)
169
+ if proxy_config is not None:
170
+ self._proxy_config = proxy_config
171
+ if args.ckan_ca is not None:
172
+ self.ckan_ca = path_rel_to_dir(args.ckan_ca, base_dir=base_dir)
173
+ if args.extern_ca is not None:
174
+ self.extern_ca = path_rel_to_dir(args.extern_ca, base_dir=base_dir)
175
+ if args.user_agent is not None:
176
+ self.user_agent = args.user_agent
177
+ # if args.default_limit is not None:
178
+ # self.set_limits(args.default_limit)
179
+ # if args.verbose is not None:
180
+ # self.set_verbosity(args.verbose)
181
+ # if args.external_code:
182
+ # unlock_external_code_execution()
183
+ print(args)
184
+
185
+ class CkanApiDebug:
186
+ def __init__(self):
187
+ self.ckan_request_counter: int = 0
188
+ self.extern_request_counter: int = 0
189
+ self.last_response: Union[requests.Response, None] = None # field containing the last response, for debug purposes
190
+ self.last_response_request_count: int = 0
191
+ self.multi_requests_last_successful_offset: int = 0 # last used offset when multiple queries are performed. This can be used in order to restart an update/download sequence in case of an error.
192
+
@@ -0,0 +1,9 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Deprecated code
5
+ """
6
+
7
+ from . import ckan_api_deprecated
8
+ from . import ckan_api_deprecated_vocabularies
9
+
@@ -0,0 +1,267 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ """
6
+ from typing import List
7
+ import copy
8
+ from warnings import warn
9
+
10
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanPackageInfo, CkanResourceInfo, CkanViewInfo
11
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import RequestType, assert_or_raise
12
+ from ckanapi_harvesters.auxiliary.ckan_action import CkanNotFoundError
13
+ from ckanapi_harvesters.auxiliary.ckan_errors import ReadOnlyError
14
+ from ckanapi_harvesters.ckan_api.ckan_api_1_map import use_ckan_owner_org_as_default
15
+
16
+ from ckanapi_harvesters.ckan_api.ckan_api_5_manage import CkanApiManage
17
+
18
+
19
+
20
+ class CkanApiDeprecated(CkanApiManage):
21
+ """
22
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
23
+ This class implements API calls which are not recommended to use.
24
+ """
25
+
26
+ def copy(self, new_identifier: str = None, *, dest=None):
27
+ if dest is None:
28
+ dest = CkanApiDeprecated()
29
+ super().copy(new_identifier=new_identifier, dest=dest)
30
+ return dest
31
+
32
+ ## Not recommended mapping functions ------------------v
33
+ def _api_package_list(self, *, params:dict=None, owner_org:str=None, limit:int=None, offset:int=None) -> List[str]:
34
+ """
35
+ __Not recommended__
36
+ API call to package_list.
37
+ :param params: typically, the request can be limited to an organization with the owner_org parameter
38
+ :return:
39
+ """
40
+ msg = DeprecationWarning("Prefer using package_search rather than package_list because this API does not list private packages")
41
+ warn(msg)
42
+ if params is None: params = {}
43
+ if limit is None: limit = self.params.default_limit_list
44
+ if limit is not None:
45
+ params["limit"] = limit
46
+ if offset is not None:
47
+ params["offset"] = offset
48
+ if owner_org is None and use_ckan_owner_org_as_default:
49
+ owner_org = self.owner_org
50
+ if owner_org is not None:
51
+ params["owner_org"] = owner_org
52
+ response = self._api_action_request("package_list", method=RequestType.Get, params=params)
53
+ if response.success:
54
+ return response.result
55
+ else:
56
+ raise response.default_error(self)
57
+
58
+ def _api_package_list_all(self, *, params:dict=None, owner_org:str=None, limit:int=None, offset:int=None) -> List[str]:
59
+ """
60
+ __Not recommended__
61
+ API call to package_list until an empty list is received.
62
+ :see: api_package_list()
63
+ :param params:
64
+ :return:
65
+ """
66
+ msg = DeprecationWarning("Prefer using package_search rather than package_list because this API does not list private packages")
67
+ warn(msg)
68
+ if params is None: params = {}
69
+ responses = self._request_all_results_list(self._api_package_list, params=params, owner_org=owner_org, limit=limit, offset=offset)
70
+ return sum(responses, [])
71
+
72
+ package_list_all = _api_package_list_all # function alias
73
+
74
+
75
+ def _api_resource_search(self, query:str=None, *, order_by:str=None, limit:int=None, offset:int=None,
76
+ resource_name:str=None,
77
+ datastore_info:bool=None, resource_view_list:bool=None,
78
+ params:dict=None) -> List[CkanResourceInfo]:
79
+ """
80
+ __Not recommended__
81
+ API call to resource_search. It is more recommended to use the package_show API because it is not possible to
82
+ filter the resources by package name here. Moreover, it does not return information on private resources.
83
+ :see: map_resources()
84
+ :param query: (string or list of strings of the form {field}:{term1}) – The search criteria. See above for description.
85
+ :param order_by: A field on the Resource model that orders the results.
86
+ :param limit:
87
+ :param offset:
88
+ :param resource_name: a shortcut to add the filter "name:{resource_name}"
89
+ :param datastore_info: an option to query the datastore info for all the resources found.
90
+ If not provided, the last value for this option used with map_resources will be used.
91
+ :param resource_view_list: an option to query the resource views list for all the resources found.
92
+ If not provided, the last value for this option used with map_resources will be used.
93
+ :param params: additional parameters to pass to resource_search
94
+ :return:
95
+ """
96
+ msg = DeprecationWarning("Prefer using package_search rather than resource_search because resource_search cannot filter per package")
97
+ warn(msg)
98
+ if datastore_info is None:
99
+ datastore_info = self.map._mapping_query_datastore_info
100
+ if resource_view_list is None:
101
+ resource_view_list = self.map._mapping_query_resource_view_list
102
+ if params is None: params = {}
103
+ if limit is None: limit = self.params.default_limit_list
104
+ if limit is not None:
105
+ params["limit"] = limit
106
+ if offset is not None:
107
+ params["offset"] = offset
108
+ if query is None:
109
+ query = []
110
+ elif isinstance(query, str):
111
+ query = [query]
112
+ if resource_name is not None:
113
+ query.append(f"name:{resource_name}")
114
+ if query is not None:
115
+ params["query"] = query
116
+ if order_by is not None:
117
+ params["order_by"] = order_by
118
+ response = self._api_action_request("resource_search", method=RequestType.Get, params=params)
119
+ if response.success:
120
+ resource_info_list = [CkanResourceInfo(e) for e in response.result["results"]]
121
+ for resource_info in resource_info_list:
122
+ self._enrich_resource_info(resource_info, datastore_info=datastore_info,
123
+ resource_view_list=resource_view_list)
124
+ self.map._update_resource_info(resource_info_list)
125
+ return copy.deepcopy(resource_info_list)
126
+ else:
127
+ raise response.default_error(self)
128
+
129
+ def _api_resource_search_all(self, query: str = None, *, order_by: str = None, limit: int = None, offset: int = None,
130
+ resource_name: str = None,
131
+ datastore_info: bool = None, resource_view_list: bool = None,
132
+ params: dict = None) -> List[CkanResourceInfo]:
133
+ """
134
+ __Not recommended__
135
+ API call to resource_search until an empty list is received. It is more recommended to use the package_show API because it is not possible to
136
+ filter the resources by package name here. Moreover, it does not return information on private resources.
137
+ :see: map_resources()
138
+ :see: _api_resource_search()
139
+ :param query: (string or list of strings of the form {field}:{term1}) – The search criteria. See above for description.
140
+ :param order_by: A field on the Resource model that orders the results.
141
+ :param limit: maximum number of results to return.
142
+ :param offset: the offset in the complete result for where the set of returned datasets should begin.
143
+ :param resource_name: a shortcut to add the filter "name:{resource_name}"
144
+ :param datastore_info: an option to query the datastore info for all the resources found.
145
+ If not provided, the last value for this option used with map_resources will be used.
146
+ :param resource_view_list: an option to query the resource views list for all the resources found.
147
+ If not provided, the last value for this option used with map_resources will be used.
148
+ :param params: additional parameters to pass to resource_search
149
+ :return:
150
+ """
151
+ msg = DeprecationWarning("Prefer using package_search rather than resource_search because resource_search cannot filter per package")
152
+ warn(msg)
153
+ if params is None: params = {}
154
+ responses = self._request_all_results_list(self._api_resource_search, params=params, limit=limit, offset=offset,
155
+ query=query, order_by=order_by,
156
+ resource_name=resource_name,
157
+ datastore_info=datastore_info, resource_view_list=resource_view_list)
158
+ return sum(responses, [])
159
+
160
+ resource_search_all = _api_resource_search_all # function alias
161
+
162
+
163
+ def _api_group_package_show(self, group_name: str, *, params:dict=None, owner_org:str=None,
164
+ include_private:bool=True, include_drafts:bool=False, sort:str=None,
165
+ limit:int=None, offset:int=None) -> List[CkanPackageInfo]:
166
+ """
167
+ __Not recommended__
168
+ API call to group_package_show. Return the datasets (packages) of a group.
169
+ :param group_name: group name or id
170
+ :param owner_org: ability to filter packages by owner_org
171
+ :param include_private: if True, private datasets will be included in the results. Only private datasets from the user’s organizations will be returned and sysadmins will be returned all private datasets. Optional, the default is False in the API
172
+ :param include_drafts: if True, draft datasets will be included in the results. A user will only be returned their own draft datasets, and a sysadmin will be returned all draft datasets. Optional, the default is False.
173
+ :param sort: sorting of the search results. Optional. Default: 'score desc, metadata_modified desc'. As per the solr documentation, this is a comma-separated string of field names and sort-orderings.
174
+ :param limit: maximum number of results to return. Translatees to the API rows argument.
175
+ :param offset: the offset in the complete result for where the set of returned datasets should begin. Translatees to the API start argument.
176
+ :param params: other parameters to pass to package_search
177
+ :return:
178
+ """
179
+ msg = DeprecationWarning("Prefer using package_search rather than group_package_show knowing the name of the package because this API does not list private packages")
180
+ warn(msg)
181
+ if params is None: params = {}
182
+ params["id"] = group_name
183
+ if limit is None: limit = self.params.default_limit_list
184
+ if limit is not None:
185
+ params["limit"] = limit
186
+ if offset is not None:
187
+ params["offset"] = offset
188
+ if owner_org is None and use_ckan_owner_org_as_default:
189
+ owner_org = self.owner_org
190
+ if owner_org is not None:
191
+ owner_org_info = self.get_organization_info_or_request(owner_org)
192
+ owner_org = owner_org_info.id
193
+ params["owner_org"] = owner_org
194
+ if sort is not None:
195
+ params["sort"] = sort
196
+ if include_private is not None:
197
+ params["include_private"] = include_private
198
+ if include_drafts is not None:
199
+ params["include_drafts"] = include_drafts
200
+ response = self._api_action_request("group_package_show", method=RequestType.Get, params=params)
201
+ if response.success:
202
+ package_info_list = [CkanPackageInfo(e) for e in response.result]
203
+ self.map._update_package_info(package_info_list)
204
+ return package_info_list
205
+ elif response.status_code == 404 and response.success_json_loads and response.error_message["__type"] == "Not Found Error":
206
+ raise CkanNotFoundError(self, "Group", response)
207
+ else:
208
+ raise response.default_error(self)
209
+
210
+ def _api_group_package_show_all(self, group_name: str, *, params:dict=None, owner_org:str=None,
211
+ include_private:bool=True, include_drafts:bool=False, sort:str=None,
212
+ limit:int=None, offset:int=None) -> List[CkanPackageInfo]:
213
+ """
214
+ __Not recommended__
215
+ API call to group_package_show until an empty list is received.
216
+ :see: _api_group_package_show()
217
+ :param group_name: group name or id
218
+ :param owner_org: ability to filter packages by owner_org
219
+ :param include_private: if True, private datasets will be included in the results. Only private datasets from the user’s organizations will be returned and sysadmins will be returned all private datasets. Optional, the default is False in the API
220
+ :param include_drafts: if True, draft datasets will be included in the results. A user will only be returned their own draft datasets, and a sysadmin will be returned all draft datasets. Optional, the default is False.
221
+ :param sort: sorting of the search results. Optional. Default: 'score desc, metadata_modified desc'. As per the solr documentation, this is a comma-separated string of field names and sort-orderings.
222
+ :param limit: maximum number of results to return. Translatees to the API rows argument.
223
+ :param offset: the offset in the complete result for where the set of returned datasets should begin. Translatees to the API start argument.
224
+ :param params: other parameters to pass to API
225
+ :return:
226
+ """
227
+ msg = DeprecationWarning("Prefer using package_search rather than group_package_show knowing the name of the package because this API does not list private packages")
228
+ warn(msg)
229
+ if params is None: params = {}
230
+ responses = self._request_all_results_list(self._api_group_package_show, params=params, limit=limit, offset=offset,
231
+ group_name=group_name, owner_org=owner_org,
232
+ include_private=include_private, include_drafts=include_drafts)
233
+ return sum(responses, [])
234
+
235
+ group_package_show_all = _api_group_package_show_all # function alias
236
+
237
+ ## resource view
238
+ def _api_resource_create_default_resource_views(self, resource_id:str, *, create_datastore_views:bool=None,
239
+ params:dict=None) -> List[CkanViewInfo]:
240
+ """
241
+ API call to resource_create_default_resource_views
242
+ :param resource_id: resource id
243
+ :param create_datastore_views: whether to create views that rely on data being on the DataStore (optional, API defaults to False)
244
+ :param params:
245
+ :return:
246
+ """
247
+ msg = DeprecationWarning("Prefer using resource_view_create rather than resource_create_default_resource_views")
248
+ warn(msg)
249
+ assert_or_raise(not self.params.read_only, ReadOnlyError())
250
+ if params is None: params = {}
251
+ resource_info = self.resource_show(resource_id)
252
+ resource_dict = resource_info.details
253
+ if create_datastore_views is None:
254
+ create_datastore_views = self.resource_is_datastore(resource_id)
255
+ params["resource"] = resource_dict
256
+ params["create_datastore_views"] = create_datastore_views
257
+ response = self._api_action_request(f"resource_create_default_resource_views", method=RequestType.Post, json=params)
258
+ if response.success:
259
+ view_info_list = [CkanViewInfo(view_dict) for view_dict in response.result]
260
+ self.map._update_view_info(view_info_list)
261
+ return copy.deepcopy(view_info_list)
262
+ else:
263
+ raise response.default_error(self)
264
+
265
+ resource_create_default_resource_views = _api_resource_create_default_resource_views # function alias
266
+
267
+
@@ -0,0 +1,189 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ """
6
+ from typing import List, Dict, Union
7
+ import copy
8
+ from warnings import warn
9
+
10
+ from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
11
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import RequestType, assert_or_raise
12
+ from ckanapi_harvesters.auxiliary.ckan_map import CkanMap
13
+ from ckanapi_harvesters.auxiliary.ckan_errors import MandatoryAttributeError
14
+ from ckanapi_harvesters.auxiliary.ckan_vocabulary_deprecated import CkanTagVocabularyInfo, CkanVocabularyMap
15
+ from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
16
+ from ckanapi_harvesters.policies.data_format_policy_tag_groups import TagListPolicy
17
+
18
+ from ckanapi_harvesters.ckan_api.deprecated.ckan_api_deprecated import CkanApiDeprecated
19
+
20
+
21
+ class CkanApiVocabulariesDeprecated(CkanApiDeprecated):
22
+ def __init__(self, url:str=None, *, proxies:Union[str,dict,ProxyConfig]=None,
23
+ ckan_headers:dict=None, http_headers:dict=None,
24
+ apikey:str=None, apikey_file:str=None,
25
+ owner_org:str=None,
26
+ policy:CkanPackageDataFormatPolicy=None, policy_file:str=None,
27
+ identifier=None):
28
+ """
29
+ CKAN Database API interface to CKAN server with helper functions using pandas DataFrames.
30
+
31
+ :param url: url of the CKAN server
32
+ :param proxies: proxies to use for requests
33
+ :param ckan_headers: headers to use for requests, only to the CKAN server
34
+ :param http_headers: headers to use for requests, for all requests, including external requests and to the CKAN server
35
+ :param apikey: way to provide the API key directly (optional)
36
+ :param apikey_file: path to a file containing a valid API key in the first line of text (optional)
37
+ :param policy: data format policy to use with policy_check function
38
+ :param policy_file: path to a JSON file containing the data format policy to use with policy_check function
39
+ :param owner_org: name of the organization to limit package_search (optional)
40
+ """
41
+ msg = DeprecationWarning("Vocabularies are used to define custom fields which accept specific values and require to implement an IDatasetForm extension. This is not covered in this package.")
42
+ warn(msg)
43
+ super().__init__(url=url, proxies=proxies, apikey=apikey, apikey_file=apikey_file,
44
+ ckan_headers=ckan_headers, http_headers=http_headers,
45
+ owner_org=owner_org, policy=policy, policy_file=policy_file, identifier=identifier)
46
+ self.map_vocabulary: CkanVocabularyMap = CkanVocabularyMap()
47
+
48
+ def copy(self, new_identifier: str = None, *, dest=None):
49
+ if dest is None:
50
+ dest = CkanApiVocabulariesDeprecated()
51
+ super().copy(new_identifier=new_identifier, dest=dest)
52
+ dest.map_vocabulary = self.map_vocabulary.copy()
53
+ return dest
54
+
55
+ def set_default_map_mode(self, datastore_info:bool=None, resource_view_list:bool=None,
56
+ organization_info:bool=None, license_list:bool=None,
57
+ load_policy:bool=None, vocabulary_list:bool=None) -> None:
58
+ super().set_default_map_mode(datastore_info=datastore_info, resource_view_list=resource_view_list,
59
+ organization_info=organization_info, license_list=license_list,
60
+ load_policy=load_policy)
61
+ if vocabulary_list is None:
62
+ vocabulary_list = self.map_vocabulary._mapping_query_vocabulary_list
63
+ self.map_vocabulary._mapping_query_vocabulary_list = vocabulary_list
64
+
65
+ def map_resources(self, package_list:Union[str, List[str]]=None, *, params:dict=None,
66
+ datastore_info:bool=None, resource_view_list:bool=None, organization_info:bool=None, license_list:bool=None,
67
+ only_missing:bool=True, error_not_found:bool=True,
68
+ owner_org:str=None, load_policy:bool=None, vocabulary_list:bool=None) -> CkanMap:
69
+ # overload including a call to load the default data format policy
70
+ self.set_default_map_mode(vocabulary_list=vocabulary_list)
71
+ map = super().map_resources(package_list=package_list, params=params, datastore_info=datastore_info,
72
+ resource_view_list=resource_view_list, organization_info=organization_info,
73
+ license_list=license_list, only_missing=only_missing, error_not_found=error_not_found,
74
+ owner_org=owner_org, load_policy=load_policy)
75
+ vocabulary_list = self.map_vocabulary._mapping_query_vocabulary_list
76
+ if vocabulary_list:
77
+ self.vocabulary_list(cancel_if_present=True)
78
+ return map
79
+
80
+
81
+ ## Vocabulary management (requires sysadmin rights) --------------
82
+ def _api_vocabulary_list(self, *, params:dict=None) -> List[CkanTagVocabularyInfo]:
83
+ """
84
+ API call to vocabulary_list.
85
+
86
+ :return: a list of vocabulary info objects
87
+ """
88
+ msg = DeprecationWarning("Vocabulary functions did not work when tested")
89
+ warn(msg)
90
+ response = self._api_action_request(f"vocabulary_list", method=RequestType.Post, json=params)
91
+ if response.success:
92
+ vocabulary_list = [CkanTagVocabularyInfo(vocabulary_dict) for vocabulary_dict in response.result]
93
+ self.map_vocabulary._update_vocabulary_info(vocabulary_list, vocabularies_listed=True) # update map
94
+ return copy.deepcopy(vocabulary_list)
95
+ else:
96
+ raise response.default_error(self)
97
+
98
+ def vocabulary_list(self, cancel_if_present:bool=True) -> List[CkanTagVocabularyInfo]:
99
+ if self.map_vocabulary.vocabularies_listed and cancel_if_present:
100
+ return list(self.map_vocabulary.vocabularies.values())
101
+ else:
102
+ return self._api_vocabulary_list()
103
+
104
+ def _api_vocabulary_create(self, vocabulary_name: str, tags_list_dict: List[Dict[str, str]], *, params:dict=None) -> CkanTagVocabularyInfo:
105
+ """
106
+ API call to vocabulary_create.
107
+
108
+ :return: a
109
+ """
110
+ msg = DeprecationWarning("Vocabulary functions did not work when tested")
111
+ warn(msg)
112
+ if params is None: params = {}
113
+ params["name"] = vocabulary_name
114
+ params["tags"] = tags_list_dict
115
+ response = self._api_action_request(f"vocabulary_create", method=RequestType.Post, json=params)
116
+ if response.success:
117
+ vocabulary_info = CkanTagVocabularyInfo(response.result)
118
+ self.map_vocabulary._update_vocabulary_info(vocabulary_info)
119
+ return copy.deepcopy(vocabulary_info)
120
+ else:
121
+ raise response.default_error(self)
122
+
123
+ def _api_vocabulary_update(self, vocabulary_id: str, tags_list_dict: List[Dict[str, str]], *, params:dict=None) -> CkanTagVocabularyInfo:
124
+ """
125
+ API call to vocabulary_update.
126
+
127
+ :return: a
128
+ """
129
+ msg = DeprecationWarning("Vocabulary functions did not work when tested")
130
+ warn(msg)
131
+ if params is None: params = {}
132
+ params["id"] = vocabulary_id
133
+ params["tags"] = tags_list_dict
134
+ response = self._api_action_request(f"vocabulary_update", method=RequestType.Post, json=params)
135
+ if response.success:
136
+ vocabulary_info = CkanTagVocabularyInfo(response.result)
137
+ self.map_vocabulary._update_vocabulary_info(vocabulary_info)
138
+ return copy.deepcopy(vocabulary_info)
139
+ else:
140
+ raise response.default_error(self)
141
+
142
+ def vocabulary_update(self, vocabulary_name: str, tags_list_dict: List[Dict[str, str]]):
143
+ vocabulary_id = self.map_vocabulary.get_vocabulary_id(vocabulary_name, error_not_mapped=False)
144
+ if vocabulary_id is None:
145
+ self._api_vocabulary_create(vocabulary_name=vocabulary_name, tags_list_dict=tags_list_dict)
146
+ else:
147
+ self._api_vocabulary_update(vocabulary_id, tags_list_dict=tags_list_dict)
148
+
149
+ def _api_vocabulary_delete(self, vocabulary_id: str, *, params:dict=None) -> bool:
150
+ """
151
+ API call to vocabulary_delete.
152
+
153
+ :return: True if success
154
+ """
155
+ msg = DeprecationWarning("Vocabulary functions did not work when tested")
156
+ warn(msg)
157
+ if params is None: params = {}
158
+ params["id"] = vocabulary_id
159
+ response = self._api_action_request(f"vocabulary_delete", method=RequestType.Post, json=params)
160
+ if response.success:
161
+ return True
162
+ else:
163
+ raise response.default_error(self)
164
+
165
+ def vocabulary_delete(self, vocabulary_id: str) -> bool:
166
+ return self._api_vocabulary_delete(vocabulary_id)
167
+
168
+ def vocabularies_clear(self):
169
+ self.vocabulary_list(cancel_if_present=True)
170
+ vocabulary_ids = list(self.map_vocabulary.vocabularies.keys())
171
+ for vocabulary_id in vocabulary_ids:
172
+ self._api_vocabulary_delete(vocabulary_id)
173
+
174
+ def initiate_vocabularies_from_policy(self, policy:CkanPackageDataFormatPolicy, *, remove_others:bool=False):
175
+ vocabulary_policy = policy.package_tags
176
+ vocabulary_list: TagListPolicy
177
+ vocabulary_names = {vocabulary_list.group_name for vocabulary_list in vocabulary_policy.value_group_specs}
178
+ if remove_others:
179
+ self.vocabulary_list(cancel_if_present=True)
180
+ current_vocabularies = set(self.map_vocabulary.vocabulary_id_index.keys())
181
+ extra_vocabularies = current_vocabularies - vocabulary_names
182
+ for vocabulary_name in extra_vocabularies:
183
+ vocabulary_id = self.map_vocabulary.vocabulary_id_index[vocabulary_name]
184
+ self._api_vocabulary_delete(vocabulary_id)
185
+ for vocabulary_list in vocabulary_policy.value_group_specs:
186
+ vocabulary_name = vocabulary_list.group_name
187
+ tags_list_dict = vocabulary_list.get_tags_list_dict()
188
+ assert_or_raise(vocabulary_name is not None, MandatoryAttributeError("Tag vocabulary", "vocabulary_name"))
189
+ self.vocabulary_update(vocabulary_name=vocabulary_name, tags_list_dict=tags_list_dict)