ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Harvester parameters. The base names of the parameters are shared between harvesters.
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union, Tuple, List, Any, Callable
|
|
7
|
+
from collections import OrderedDict
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
import argparse
|
|
10
|
+
import shlex
|
|
11
|
+
from warnings import warn
|
|
12
|
+
import copy
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from requests.auth import AuthBase
|
|
16
|
+
|
|
17
|
+
from ckanapi_harvesters.auxiliary.ckan_configuration import default_ckan_has_postgis, default_ckan_target_epsg
|
|
18
|
+
from ckanapi_harvesters.auxiliary.ckan_configuration import unlock_external_url_resource_download, allow_no_ca, unlock_no_ca
|
|
19
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import NoCAVerificationError
|
|
20
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import ca_file_rel_to_dir, assert_or_raise
|
|
21
|
+
from ckanapi_harvesters.auxiliary.proxy_config import ProxyConfig
|
|
22
|
+
from ckanapi_harvesters.auxiliary.ckan_api_key import ApiKey
|
|
23
|
+
from ckanapi_harvesters.auxiliary.login import Login, SSHLogin
|
|
24
|
+
from ckanapi_harvesters.harvesters.harvester_errors import HarvestMethodRequiredError
|
|
25
|
+
|
|
26
|
+
harvester_enforce_ca_verification: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DatabaseParams:
|
|
30
|
+
"""
|
|
31
|
+
Class representing parameters to connect to a database.
|
|
32
|
+
This class mangages the connection parameters such as proxy and CA.
|
|
33
|
+
It also manages authentication parameters.
|
|
34
|
+
"""
|
|
35
|
+
def __init__(self, source: "DatabaseParams" = None):
|
|
36
|
+
self.options_string: Union[str,None] = None
|
|
37
|
+
self.file_url_attr: Union[str, None] = None
|
|
38
|
+
self.base_dir: Union[str,None] = None
|
|
39
|
+
self.harvest_method: str = ""
|
|
40
|
+
self._proxy_config: ProxyConfig = ProxyConfig()
|
|
41
|
+
self._verify_ca: Union[str, bool, None] = None
|
|
42
|
+
self._verify_ca_src: Union[str, None] = None
|
|
43
|
+
self.timeout: Union[float, None] = None
|
|
44
|
+
self.host: Union[str, None] = None
|
|
45
|
+
self.port: Union[int, None] = None
|
|
46
|
+
self.auth_url_suffix: Union[str, None] = None
|
|
47
|
+
self.auth_url: Union[str, None] = None
|
|
48
|
+
self.url: Union[str, None] = None
|
|
49
|
+
self.apikey: ApiKey = ApiKey()
|
|
50
|
+
self.login: Login = Login()
|
|
51
|
+
self.database: Union[str, None] = None
|
|
52
|
+
self.verbose_harvester: bool = True
|
|
53
|
+
self.ckan_postgis: Union[bool,None] = default_ckan_has_postgis
|
|
54
|
+
self.ckan_default_target_epsg:Union[int,None] = default_ckan_target_epsg
|
|
55
|
+
if source is not None:
|
|
56
|
+
source.copy(dest=self)
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def copy(self, *, dest=None):
|
|
60
|
+
dest.options_string = self.options_string
|
|
61
|
+
dest.file_url_attr = self.file_url_attr
|
|
62
|
+
dest.base_dir = self.base_dir
|
|
63
|
+
dest.harvest_method = self.harvest_method
|
|
64
|
+
dest._proxy_config = self._proxy_config
|
|
65
|
+
dest._verify_ca = self._verify_ca
|
|
66
|
+
dest._verify_ca_src = self._verify_ca_src
|
|
67
|
+
dest.timeout = self.timeout
|
|
68
|
+
dest.host = self.host
|
|
69
|
+
dest.port = self.port
|
|
70
|
+
dest.auth_url_suffix = self.auth_url_suffix
|
|
71
|
+
dest.auth_url = self.auth_url
|
|
72
|
+
dest.url = self.url
|
|
73
|
+
dest.apikey = self.apikey
|
|
74
|
+
dest.login = self.login
|
|
75
|
+
dest.database = self.database
|
|
76
|
+
dest.verbose_harvester = self.verbose_harvester
|
|
77
|
+
dest.ckan_postgis = self.ckan_postgis
|
|
78
|
+
return dest
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def setup_cli_harvester_parser(parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
|
|
82
|
+
if parser is None:
|
|
83
|
+
parser = argparse.ArgumentParser(description="Harvester parameters")
|
|
84
|
+
parser.add_argument("--harvester", type=str,
|
|
85
|
+
help="Type of harvester to use", required=True)
|
|
86
|
+
ProxyConfig._setup_cli_proxy_parser(parser) # add arguments --proxy --http-proxy --https-proxy --no-proxy --proxy-auth-file
|
|
87
|
+
parser.add_argument("--ca", type=str,
|
|
88
|
+
help="Server CA certificate location (.pem file)")
|
|
89
|
+
parser.add_argument("--timeout", type=float,
|
|
90
|
+
help="Server timeout (seconds)")
|
|
91
|
+
parser.add_argument("--host", type=str,
|
|
92
|
+
help="Host for queries")
|
|
93
|
+
parser.add_argument("--port", type=int,
|
|
94
|
+
help="Port for queries")
|
|
95
|
+
parser.add_argument("--auth-url-suffix", type=str,
|
|
96
|
+
help="URL suffix used to authenticate user")
|
|
97
|
+
parser.add_argument("--auth-url", type=str,
|
|
98
|
+
help="URL to authenticate user")
|
|
99
|
+
parser.add_argument("--url", type=str,
|
|
100
|
+
help="Base URL for queries")
|
|
101
|
+
ApiKey._setup_cli_parser(parser) # add arguments --apikey-file --apikey
|
|
102
|
+
Login._setup_cli_parser(parser) # add argument --login-file
|
|
103
|
+
parser.add_argument("-v", "--verbose",
|
|
104
|
+
help="Option to set verbosity", action="store_true", default=False)
|
|
105
|
+
parser.add_argument("--database", type=str,
|
|
106
|
+
help="Database name")
|
|
107
|
+
parser.add_argument("--ckan-postgis", action="store_true",
|
|
108
|
+
help="Option to use CKAN with PostGIS geometric types") # default=default_ckan_has_postgis
|
|
109
|
+
parser.add_argument("--ckan-epsg", type=int,
|
|
110
|
+
help="Default EPSG for CKAN", default=default_ckan_target_epsg)
|
|
111
|
+
return parser
|
|
112
|
+
|
|
113
|
+
def initialize_from_cli_args(self, args: argparse.Namespace, base_dir: str = None, error_not_found: bool = True,
|
|
114
|
+
default_proxies: dict = None, proxy_headers: dict = None) -> None:
|
|
115
|
+
self.harvest_method = args.harvester
|
|
116
|
+
proxy_config = ProxyConfig.from_cli_args(args, base_dir=base_dir, error_not_found=error_not_found,
|
|
117
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
118
|
+
if proxy_config is not None:
|
|
119
|
+
self._proxy_config = proxy_config
|
|
120
|
+
ca_cert = args.ca
|
|
121
|
+
verify_ca, self._verify_ca_src = ca_file_rel_to_dir(ca_cert, base_dir=base_dir)
|
|
122
|
+
self.set_verify_ca(verify_ca)
|
|
123
|
+
self.timeout = args.timeout
|
|
124
|
+
self.host = args.host
|
|
125
|
+
self.port = args.port
|
|
126
|
+
self.auth_url_suffix = args.auth_url_suffix
|
|
127
|
+
self.auth_url = args.auth_url
|
|
128
|
+
self.url = args.url
|
|
129
|
+
self.apikey._cli_args_apply(args, base_dir=base_dir, error_not_found=error_not_found)
|
|
130
|
+
self.login._cli_args_apply(args, base_dir=base_dir, error_not_found=error_not_found)
|
|
131
|
+
self.database = args.database
|
|
132
|
+
if args.verbose is not None:
|
|
133
|
+
self.verbose_harvester = args.verbose
|
|
134
|
+
if args.ckan_postgis:
|
|
135
|
+
self.ckan_postgis = args.ckan_postgis
|
|
136
|
+
if args.ckan_epsg:
|
|
137
|
+
self.ckan_default_target_epsg = args.ckan_epsg
|
|
138
|
+
|
|
139
|
+
def _update_from_ckan(self, ckan):
|
|
140
|
+
# aim: make these values accessible to the harvester algorithms (for the rest, Harvesters are independent of CkanApi)
|
|
141
|
+
if self.ckan_postgis is None:
|
|
142
|
+
self.ckan_postgis = ckan.params.ckan_has_postgis
|
|
143
|
+
if self.ckan_default_target_epsg is None:
|
|
144
|
+
self.ckan_default_target_epsg = ckan.params.ckan_default_target_epsg
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def parse_harvest_method(options_string: str) -> str:
|
|
148
|
+
# parser = DatabaseParams.setup_cli_harvester_parser()
|
|
149
|
+
parser = argparse.ArgumentParser(description="Harvester selection")
|
|
150
|
+
parser.add_argument("--harvester", type=str,
|
|
151
|
+
help="Type of harvester to use", required=True)
|
|
152
|
+
args, _ = parser.parse_known_args(shlex.split(options_string))
|
|
153
|
+
assert_or_raise(args.harvester is not None, HarvestMethodRequiredError())
|
|
154
|
+
return args.harvester.lower().strip()
|
|
155
|
+
|
|
156
|
+
def parse_options_string(self, options_string: str, *, base_dir: str = None, file_url_attr: str=None,
|
|
157
|
+
parser:argparse.ArgumentParser=None):
|
|
158
|
+
self.file_url_attr = file_url_attr
|
|
159
|
+
parser = self.setup_cli_harvester_parser(parser)
|
|
160
|
+
args, _ = parser.parse_known_args(shlex.split(options_string))
|
|
161
|
+
self.options_string = options_string
|
|
162
|
+
self.base_dir = base_dir
|
|
163
|
+
self.initialize_from_cli_args(args, base_dir=base_dir)
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def proxies(self) -> dict:
|
|
167
|
+
return self._proxy_config.proxies
|
|
168
|
+
|
|
169
|
+
@proxies.setter
|
|
170
|
+
def proxies(self, proxies: dict) -> None:
|
|
171
|
+
self._proxy_config.proxies = proxies
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def proxy_string(self) -> str:
|
|
175
|
+
return self._proxy_config.proxy_string
|
|
176
|
+
|
|
177
|
+
@proxy_string.setter
|
|
178
|
+
def proxy_string(self, proxies: str) -> None:
|
|
179
|
+
self._proxy_config.proxy_string = proxies
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def proxy_auth(self) -> Union[AuthBase, Tuple[str, str]]:
|
|
183
|
+
return self._proxy_config.proxy_auth
|
|
184
|
+
|
|
185
|
+
@proxy_auth.setter
|
|
186
|
+
def proxy_auth(self, proxy_auth: Union[AuthBase, Tuple[str, str]]) -> None:
|
|
187
|
+
self._proxy_config.proxy_auth = proxy_auth
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def verify_ca(self) -> Union[bool, str, None]:
|
|
191
|
+
return self._verify_ca
|
|
192
|
+
|
|
193
|
+
def set_verify_ca(self, ca_cert: Union[bool, str, None], enforce_ca_safety: bool = None) -> None:
|
|
194
|
+
if enforce_ca_safety is None:
|
|
195
|
+
enforce_ca_safety = harvester_enforce_ca_verification
|
|
196
|
+
if ca_cert is not None and isinstance(ca_cert, bool) and not ca_cert:
|
|
197
|
+
if enforce_ca_safety and not allow_no_ca:
|
|
198
|
+
raise NoCAVerificationError()
|
|
199
|
+
else:
|
|
200
|
+
msg = "CA verification has been disabled. Only allow in a local environment!"
|
|
201
|
+
warn(msg)
|
|
202
|
+
self._verify_ca = ca_cert
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def unlock_no_ca(value: bool = True):
|
|
206
|
+
"""
|
|
207
|
+
This function enables you to disable the CA verification of the CKAN server.
|
|
208
|
+
|
|
209
|
+
__Warning__:
|
|
210
|
+
Only allow in a local environment!
|
|
211
|
+
"""
|
|
212
|
+
unlock_no_ca(value)
|
|
213
|
+
|
|
214
|
+
@staticmethod
|
|
215
|
+
def unlock_external_url_resource_download(value: bool = True):
|
|
216
|
+
"""
|
|
217
|
+
This function enables the download of resources external from the CKAN server.
|
|
218
|
+
"""
|
|
219
|
+
unlock_external_url_resource_download(value)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class DatasetParams(DatabaseParams):
|
|
223
|
+
def __init__(self, source: "DatasetParams" =None):
|
|
224
|
+
super().__init__(source)
|
|
225
|
+
self.dataset: Union[str, None] = None
|
|
226
|
+
if source is not None:
|
|
227
|
+
source.copy(dest=self)
|
|
228
|
+
|
|
229
|
+
def copy(self, *, dest=None):
|
|
230
|
+
if dest is None:
|
|
231
|
+
dest = DatasetParams()
|
|
232
|
+
super().copy(dest=dest)
|
|
233
|
+
dest.dataset = self.dataset
|
|
234
|
+
return dest
|
|
235
|
+
|
|
236
|
+
@staticmethod
|
|
237
|
+
def setup_cli_harvester_parser(parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
|
|
238
|
+
if parser is None:
|
|
239
|
+
parser = argparse.ArgumentParser(description="Harvester parameters")
|
|
240
|
+
DatabaseParams.setup_cli_harvester_parser(parser=parser)
|
|
241
|
+
parser.add_argument("--dataset", type=str,
|
|
242
|
+
help="Dataset name")
|
|
243
|
+
return parser
|
|
244
|
+
|
|
245
|
+
def initialize_from_cli_args(self, args: argparse.Namespace, base_dir: str = None, error_not_found: bool = True,
|
|
246
|
+
default_proxies: dict = None, proxy_headers: dict = None) -> None:
|
|
247
|
+
super().initialize_from_cli_args(args, base_dir=base_dir, error_not_found=error_not_found,
|
|
248
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
249
|
+
self.dataset = args.dataset
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class TableParams(DatasetParams):
|
|
253
|
+
def __init__(self, source: "TableParams" =None):
|
|
254
|
+
super().__init__(source)
|
|
255
|
+
self.output_dir: Union[str, None] = None
|
|
256
|
+
self.enable_download: Union[bool, None] = None
|
|
257
|
+
self.resource_url: Union[str, None] = None
|
|
258
|
+
self.table: Union[str, None] = None
|
|
259
|
+
self.query_string: Union[str, None] = None
|
|
260
|
+
self.limit: Union[int, None] = None
|
|
261
|
+
self.single_request: bool = False
|
|
262
|
+
if source is not None:
|
|
263
|
+
source.copy(dest=self)
|
|
264
|
+
|
|
265
|
+
def copy(self, *, dest=None):
|
|
266
|
+
if dest is None:
|
|
267
|
+
dest = TableParams()
|
|
268
|
+
super().copy(dest=dest)
|
|
269
|
+
dest.output_dir = self.output_dir
|
|
270
|
+
dest.enable_download = self.enable_download
|
|
271
|
+
dest.resource_url = self.resource_url
|
|
272
|
+
dest.table = self.table
|
|
273
|
+
dest.query_string = self.query_string
|
|
274
|
+
dest.limit = self.limit
|
|
275
|
+
dest.single_request = self.single_request
|
|
276
|
+
dest.ckan_postgis = self.ckan_postgis
|
|
277
|
+
return dest
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def setup_cli_harvester_parser(parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
|
|
281
|
+
if parser is None:
|
|
282
|
+
parser = argparse.ArgumentParser(description="Harvester parameters")
|
|
283
|
+
DatabaseParams.setup_cli_harvester_parser(parser)
|
|
284
|
+
DatasetParams.setup_cli_harvester_parser(parser)
|
|
285
|
+
parser.add_argument("-o", "--output-dir", type=str,
|
|
286
|
+
help="Output directory of download, relative to the download directory (normally provided by File/URL attribute)") # applies to parent (builder)
|
|
287
|
+
parser.add_argument("--no-download", type=bool,
|
|
288
|
+
help="Option to disable download") # applies to parent (builder)
|
|
289
|
+
parser.add_argument("--resource-url", type=str,
|
|
290
|
+
help="URL of resource")
|
|
291
|
+
parser.add_argument("--table", type=str,
|
|
292
|
+
help="Table name") # normally specified in the File/URL attribute of builder
|
|
293
|
+
parser.add_argument("--query", type=str,
|
|
294
|
+
help="Query to restrict the lines of the table")
|
|
295
|
+
parser.add_argument("-l", "--limit", type=int,
|
|
296
|
+
help="Number of rows per request", default=10000)
|
|
297
|
+
parser.add_argument("--once",
|
|
298
|
+
help="Option to perform only one request with the default limit. This will limit the size of the Data.",
|
|
299
|
+
action="store_true", default=False)
|
|
300
|
+
return parser
|
|
301
|
+
|
|
302
|
+
def initialize_from_cli_args(self, args: argparse.Namespace, base_dir: str = None, error_not_found: bool = True,
|
|
303
|
+
default_proxies: dict = None, proxy_headers: dict = None) -> None:
|
|
304
|
+
super().initialize_from_cli_args(args, base_dir=base_dir, error_not_found=error_not_found,
|
|
305
|
+
default_proxies=default_proxies, proxy_headers=proxy_headers)
|
|
306
|
+
self.output_dir = args.output_dir # applies to parent (builder)
|
|
307
|
+
self.enable_download = not args.no_download if args.no_download is not None else None # applies to parent (builder)
|
|
308
|
+
self.resource_url = args.resource_url
|
|
309
|
+
self.table = args.table
|
|
310
|
+
self.limit = args.limit
|
|
311
|
+
if args.once is not None:
|
|
312
|
+
self.single_request = args.once
|
|
313
|
+
|
|
314
|
+
def parse_options_string(self, options_string: str, *, base_dir: str = None, file_url_attr: str=None,
|
|
315
|
+
parser:argparse.ArgumentParser=None):
|
|
316
|
+
self.file_url_attr = file_url_attr
|
|
317
|
+
parser = self.setup_cli_harvester_parser(parser)
|
|
318
|
+
args = parser.parse_args(shlex.split(options_string))
|
|
319
|
+
self.options_string = options_string
|
|
320
|
+
self.base_dir = base_dir
|
|
321
|
+
self.initialize_from_cli_args(args, base_dir=base_dir)
|
|
322
|
+
|
|
323
|
+
|