ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Code to upload metadata to the CKAN server to create/update an existing package
|
|
5
|
+
The metadata is defined by the user in an Excel worksheet
|
|
6
|
+
This file implements the basic resources. See builder_datastore for specific functions to initiate datastores.
|
|
7
|
+
"""
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
import threading
|
|
10
|
+
from threading import current_thread
|
|
11
|
+
from typing import Any, Generator, Union, Callable, Set, List, Dict, Tuple
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
import io
|
|
14
|
+
import os
|
|
15
|
+
import glob
|
|
16
|
+
import fnmatch
|
|
17
|
+
from warnings import warn
|
|
18
|
+
import copy
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
from ckanapi_harvesters.auxiliary.error_level_message import ContextErrorLevelMessage, ErrorLevel
|
|
24
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
|
|
25
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
26
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanResourceInfo
|
|
27
|
+
from ckanapi_harvesters.auxiliary.path import resolve_rel_path, glob_rm_glob, glob_name
|
|
28
|
+
from ckanapi_harvesters.builder.builder_aux import positive_end_index
|
|
29
|
+
from ckanapi_harvesters.builder.builder_errors import ResourceFileNotExistMessage
|
|
30
|
+
from ckanapi_harvesters.builder.builder_resource_multi_abc import BuilderMultiABC
|
|
31
|
+
from ckanapi_harvesters.builder.builder_resource import BuilderResourceABC
|
|
32
|
+
|
|
33
|
+
multi_file_exclude_other_files:bool = True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def default_progress_callback(index:int, total:int, info:Any, *, context:str=None, **kwargs) -> None:
|
|
37
|
+
if context is None:
|
|
38
|
+
context = ""
|
|
39
|
+
if index == total:
|
|
40
|
+
# info is None
|
|
41
|
+
print(f"{context} Finished {index}/{total} (100%)")
|
|
42
|
+
elif info is None:
|
|
43
|
+
print(f"{context} Request {index}/{total} ({index/total*100.0:.2f}%)")
|
|
44
|
+
else:
|
|
45
|
+
if isinstance(info, str):
|
|
46
|
+
info_str = info
|
|
47
|
+
elif isinstance(info, pd.DataFrame):
|
|
48
|
+
if "source" in info.attrs.keys():
|
|
49
|
+
info_str = str(info.attrs["source"])
|
|
50
|
+
else:
|
|
51
|
+
info_str = "<DataFrame>"
|
|
52
|
+
else:
|
|
53
|
+
info_str = str(info)
|
|
54
|
+
print(f"{context} Request {index}/{total} ({index/total*100.0:.2f}%): " + info_str)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class BuilderMultiFile(BuilderResourceABC, BuilderMultiABC):
|
|
58
|
+
"""
|
|
59
|
+
Class to manage a set of files to upload as separate resources
|
|
60
|
+
"""
|
|
61
|
+
def __init__(self, *, name:str=None, format:str=None, description:str=None,
|
|
62
|
+
resource_id:str=None, download_url:str=None, dir_name:str=None):
|
|
63
|
+
super().__init__(name=name, format=format, description=description, resource_id=resource_id, download_url=download_url)
|
|
64
|
+
self.dir_name: str = dir_name
|
|
65
|
+
self.local_file_list_base_dir: str = ""
|
|
66
|
+
self.local_file_list: Union[List[str], None] = None
|
|
67
|
+
self.excluded_files: Set[str] = set()
|
|
68
|
+
self.remote_resource_names: Union[List[str], None] = None
|
|
69
|
+
self.excluded_resource_names: Set[str] = set()
|
|
70
|
+
# BuilderMultiABC:
|
|
71
|
+
self.stop_event = threading.Event()
|
|
72
|
+
self.thread_ckan: Dict[str, CkanApi] = {}
|
|
73
|
+
self.progress_callback: Union[Callable[[int, int, Any], None], None] = default_progress_callback
|
|
74
|
+
self.progress_callback_kwargs: dict = {}
|
|
75
|
+
self.enable_multi_threaded_upload:bool = True
|
|
76
|
+
self.enable_multi_threaded_download:bool = True
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def resource_mode_str() -> str:
|
|
80
|
+
return "MultiFile"
|
|
81
|
+
|
|
82
|
+
def copy(self, *, dest=None):
|
|
83
|
+
if dest is None:
|
|
84
|
+
dest = BuilderMultiFile()
|
|
85
|
+
super().copy(dest=dest)
|
|
86
|
+
dest.dir_name = self.dir_name
|
|
87
|
+
# BuilderMultiABC:
|
|
88
|
+
dest.progress_callback = self.progress_callback
|
|
89
|
+
dest.progress_callback_kwargs = copy.deepcopy(self.progress_callback_kwargs)
|
|
90
|
+
dest.enable_multi_threaded_upload = self.enable_multi_threaded_upload
|
|
91
|
+
dest.enable_multi_threaded_download = self.enable_multi_threaded_download
|
|
92
|
+
# do not copy stop_event
|
|
93
|
+
return dest
|
|
94
|
+
|
|
95
|
+
def _load_from_df_row(self, row: pd.Series, base_dir:str=None):
|
|
96
|
+
super()._load_from_df_row(row=row)
|
|
97
|
+
self.dir_name = _string_from_element(row["file/url"], empty_value="")
|
|
98
|
+
|
|
99
|
+
def _to_dict(self, include_id:bool=True) -> dict:
|
|
100
|
+
d = super()._to_dict(include_id=include_id)
|
|
101
|
+
d["File/URL"] = self.dir_name
|
|
102
|
+
return d
|
|
103
|
+
|
|
104
|
+
def get_or_query_resource_id(self, ckan: CkanApi, cancel_if_present:bool=True, error_not_found:bool=True) -> Union[None,str]:
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
## upload --------------------------------------------------------------------
|
|
109
|
+
def patch_request(self, ckan: CkanApi, package_id: str, *, reupload: bool = None, resources_base_dir:str=None,
|
|
110
|
+
payload:Union[bytes, io.BufferedIOBase]=None) -> Union[None, CkanResourceInfo]:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def upload_request_final(self, ckan:CkanApi, *, force:bool=False) -> None:
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def sample_file_path_is_url() -> bool:
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def get_sample_file_path(self, resources_base_dir:str, file_index:int=0) -> Union[str,None]:
|
|
121
|
+
self.list_local_files(resources_base_dir=resources_base_dir)
|
|
122
|
+
return self.local_file_list[file_index]
|
|
123
|
+
|
|
124
|
+
def load_sample_data(self, resources_base_dir:str, file_index:int=0) -> Union[bytes,None]:
|
|
125
|
+
file_path:str = self.get_sample_file_path(resources_base_dir, file_index=file_index)
|
|
126
|
+
with open(file_path, "rb") as f:
|
|
127
|
+
return f.read()
|
|
128
|
+
|
|
129
|
+
def list_local_files(self, resources_base_dir:str, cancel_if_present:bool=True,
|
|
130
|
+
excluded_files:Set[str]=None) -> Union[List[str],None]:
|
|
131
|
+
"""
|
|
132
|
+
List files corresponding to the multi-file resource configuration and are not used in mono-resources
|
|
133
|
+
|
|
134
|
+
:param resources_base_dir:
|
|
135
|
+
:param cancel_if_present:
|
|
136
|
+
:param excluded_files: files from mono-resources
|
|
137
|
+
:return:
|
|
138
|
+
"""
|
|
139
|
+
if excluded_files is None:
|
|
140
|
+
excluded_files = set()
|
|
141
|
+
if (cancel_if_present and self.local_file_list is not None
|
|
142
|
+
and self.local_file_list_base_dir == resources_base_dir
|
|
143
|
+
and self.excluded_files == excluded_files):
|
|
144
|
+
return self.local_file_list
|
|
145
|
+
dir_search_path = resolve_rel_path(resources_base_dir, self.dir_name, field=f"File/URL of resource {self.name}")
|
|
146
|
+
search_query = dir_search_path
|
|
147
|
+
file_set = set(glob.glob(search_query))
|
|
148
|
+
file_set = file_set - excluded_files
|
|
149
|
+
file_list = list(file_set)
|
|
150
|
+
file_list.sort()
|
|
151
|
+
self.local_file_list = file_list
|
|
152
|
+
self.local_file_list_base_dir = resources_base_dir
|
|
153
|
+
self.excluded_files = excluded_files
|
|
154
|
+
return file_list
|
|
155
|
+
|
|
156
|
+
def init_local_files_list(self, resources_base_dir:str, cancel_if_present:bool=True, excluded_files:Set[str]=None, **kwargs) -> List[str]:
|
|
157
|
+
return self.list_local_files(resources_base_dir=resources_base_dir, cancel_if_present=cancel_if_present,
|
|
158
|
+
excluded_files=excluded_files)
|
|
159
|
+
|
|
160
|
+
def get_local_file_len(self) -> int:
|
|
161
|
+
if self.local_file_list is None:
|
|
162
|
+
raise RuntimeError("You must call list_local_files first")
|
|
163
|
+
return len(self.local_file_list)
|
|
164
|
+
|
|
165
|
+
def get_local_file_generator(self, resources_base_dir:str, excluded_files:Set[str]=None, **kwargs) -> Generator[str, None, None]:
|
|
166
|
+
self.list_local_files(resources_base_dir=resources_base_dir, excluded_files=excluded_files)
|
|
167
|
+
for file_name in self.local_file_list:
|
|
168
|
+
yield file_name
|
|
169
|
+
|
|
170
|
+
def upload_file_checks(self, *, resources_base_dir: str = None, ckan: CkanApi = None, excluded_files:Set[str]=None, **kwargs) \
|
|
171
|
+
-> Union[None, ContextErrorLevelMessage]:
|
|
172
|
+
if os.path.isdir(resolve_rel_path(resources_base_dir, glob_rm_glob(self.dir_name), field=f"File/URL of resource {self.name}")):
|
|
173
|
+
if len(self.list_local_files(resources_base_dir=resources_base_dir, excluded_files=excluded_files)) > 0:
|
|
174
|
+
return None
|
|
175
|
+
else:
|
|
176
|
+
return ResourceFileNotExistMessage(self.name, ErrorLevel.Error,
|
|
177
|
+
f"Empty resource directory for multi-file resource {self.name}: {os.path.join(resources_base_dir, self.dir_name)}")
|
|
178
|
+
else:
|
|
179
|
+
return ResourceFileNotExistMessage(self.name, ErrorLevel.Error,
|
|
180
|
+
f"Missing directory for multi-file resource {self.name}: {os.path.join(resources_base_dir, self.dir_name)}")
|
|
181
|
+
|
|
182
|
+
def upload_file(self, ckan:CkanApi, package_id:str, file_path:str, *,
|
|
183
|
+
reupload:bool=False, cancel_if_present:bool=True) -> CkanResourceInfo:
|
|
184
|
+
"""
|
|
185
|
+
Upload a file, using its name as resource name
|
|
186
|
+
"""
|
|
187
|
+
_, resource_name = os.path.split(file_path)
|
|
188
|
+
resource_info = ckan.map.get_resource_info(resource_name, package_name=package_id, error_not_mapped=False)
|
|
189
|
+
if resource_info is not None and cancel_if_present and not reupload:
|
|
190
|
+
resource_info.newly_created = False
|
|
191
|
+
resource_info.newly_updated = False
|
|
192
|
+
return resource_info
|
|
193
|
+
return ckan.resource_create(package_id, resource_name, format=self.format, description=self.description,
|
|
194
|
+
state=self.state, file_path=file_path, reupload=reupload, cancel_if_exists=True, update_if_exists=True,
|
|
195
|
+
create_default_view=True, auto_submit=False)
|
|
196
|
+
|
|
197
|
+
def _unit_upload_apply(self, *, ckan:CkanApi, file:str,
|
|
198
|
+
index:int, start_index:int, end_index:int, total:int,
|
|
199
|
+
package_id:str, reupload:bool, only_missing:bool, excluded_files:Set[str]) -> None:
|
|
200
|
+
# For each file, this function initiates its own FileStore.
|
|
201
|
+
file_path = file
|
|
202
|
+
_, file_name = os.path.split(file_path)
|
|
203
|
+
if start_index <= index and index < end_index and file_path not in excluded_files:
|
|
204
|
+
self._call_progress_callback(index, total, info=file_path,
|
|
205
|
+
context=f"{ckan.identifier} single-thread upload")
|
|
206
|
+
self.upload_file(ckan=ckan, package_id=package_id, file_path=file_path,
|
|
207
|
+
reupload=reupload, cancel_if_present=only_missing)
|
|
208
|
+
else:
|
|
209
|
+
# self._call_progress_callback(index, total, info=df_upload_local, context=f"{ckan.identifier} single-thread skip")
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
def upload_request_full(self, ckan:CkanApi, resources_base_dir:str, *,
|
|
213
|
+
threads:int=1, external_stop_event=None,
|
|
214
|
+
start_index:int=0, end_index:int=None,
|
|
215
|
+
reupload:bool=False, only_missing:bool=False, excluded_files:Set[str]=None) -> None:
|
|
216
|
+
if excluded_files is None:
|
|
217
|
+
excluded_files = set()
|
|
218
|
+
package_id = self.get_or_query_package_id(ckan)
|
|
219
|
+
super().upload_request_full(ckan=ckan, resources_base_dir=resources_base_dir, threads=threads,
|
|
220
|
+
external_stop_event=external_stop_event, start_index=start_index, end_index=end_index,
|
|
221
|
+
reupload=reupload, only_missing=only_missing,
|
|
222
|
+
package_id=package_id, excluded_files=excluded_files)
|
|
223
|
+
# if threads < 0:
|
|
224
|
+
# # cancel large uploads in this case
|
|
225
|
+
# return None
|
|
226
|
+
# elif threads is None or threads > 1:
|
|
227
|
+
# return self.upload_request_full_multi_threaded(ckan=ckan, resources_base_dir=resources_base_dir,
|
|
228
|
+
# threads=threads, external_stop_event=external_stop_event,
|
|
229
|
+
# start_index=start_index, end_index=end_index,
|
|
230
|
+
# reupload=reupload, only_missing=only_missing,
|
|
231
|
+
# excluded_files=excluded_files)
|
|
232
|
+
# else:
|
|
233
|
+
# self.init_local_files_list(resources_base_dir=resources_base_dir, cancel_if_present=True, excluded_files=excluded_files)
|
|
234
|
+
# package_id = self.get_or_query_package_id(ckan)
|
|
235
|
+
# if ckan.verbose_extra:
|
|
236
|
+
# print(f"Launching single-threaded upload of multi-file resource {self.name}")
|
|
237
|
+
# total = self.get_local_file_len()
|
|
238
|
+
# end_index = positive_end_index(end_index, total)
|
|
239
|
+
# for index, file_path in enumerate(self.get_local_file_generator(resources_base_dir=resources_base_dir, excluded_files=excluded_files)):
|
|
240
|
+
# if external_stop_event is not None and external_stop_event.is_set():
|
|
241
|
+
# print(f"{ckan.identifier} Interrupted")
|
|
242
|
+
# return
|
|
243
|
+
# self._unit_upload_apply(ckan, file=file_path, package_id=package_id,
|
|
244
|
+
# reupload=reupload, only_missing=only_missing,
|
|
245
|
+
# index=index, start_index=start_index, end_index=end_index, total=total,
|
|
246
|
+
# excluded_files=excluded_files)
|
|
247
|
+
# self._call_progress_callback(total, total, context=f"{ckan.identifier} single-thread upload")
|
|
248
|
+
# # at last, apply final actions:
|
|
249
|
+
# self.upload_request_final(ckan)
|
|
250
|
+
|
|
251
|
+
# def upload_request_graceful(self, ckan:CkanApi, file_path: str, *, index:int, package_id:str,
|
|
252
|
+
# external_stop_event=None,
|
|
253
|
+
# start_index:int=0, end_index:int=None,
|
|
254
|
+
# reupload:bool=False, only_missing:bool=False, excluded_files:Set[str]=None) -> None:
|
|
255
|
+
# """
|
|
256
|
+
# Calls upload_file with checks specific to multi-threading.
|
|
257
|
+
#
|
|
258
|
+
# :return:
|
|
259
|
+
# """
|
|
260
|
+
# # ckan.session_reset()
|
|
261
|
+
# # ckan.identifier = current_thread().name
|
|
262
|
+
# ckan = self.thread_ckan[current_thread().name]
|
|
263
|
+
# total = self.get_local_file_len()
|
|
264
|
+
# end_index = positive_end_index(end_index, total)
|
|
265
|
+
# if self.stop_event.is_set():
|
|
266
|
+
# return
|
|
267
|
+
# if external_stop_event is not None and external_stop_event.is_set():
|
|
268
|
+
# print(f"{ckan.identifier} Interrupted")
|
|
269
|
+
# return
|
|
270
|
+
# try:
|
|
271
|
+
# self._unit_upload_apply(ckan, file=file_path, package_id=package_id,
|
|
272
|
+
# reupload=reupload, only_missing=only_missing,
|
|
273
|
+
# index=index, start_index=start_index, end_index=end_index, total=total,
|
|
274
|
+
# excluded_files=excluded_files)
|
|
275
|
+
# except Exception as e:
|
|
276
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
277
|
+
# if ckan.verbose_extra:
|
|
278
|
+
# print(f"Stopping all threads because an exception occurred in thread: {e}")
|
|
279
|
+
# raise e from e
|
|
280
|
+
|
|
281
|
+
# def upload_request_full_multi_threaded(self, ckan:CkanApi, resources_base_dir:str,
|
|
282
|
+
# threads:int=1, external_stop_event=None,
|
|
283
|
+
# start_index:int=0, end_index:int=None,
|
|
284
|
+
# reupload:bool=False, only_missing:bool=False, excluded_files:Set[str]=None):
|
|
285
|
+
# """
|
|
286
|
+
# Multi-threaded implementation of upload_request_full, using ThreadPoolExecutor.
|
|
287
|
+
# """
|
|
288
|
+
# self.init_local_files_list(resources_base_dir=resources_base_dir, cancel_if_present=True, excluded_files=excluded_files)
|
|
289
|
+
# package_id = self.get_or_query_package_id(ckan)
|
|
290
|
+
# self._prepare_for_multithreading(ckan)
|
|
291
|
+
# try:
|
|
292
|
+
# with ThreadPoolExecutor(max_workers=threads, initializer=self._init_thread, initargs=(ckan,)) as executor:
|
|
293
|
+
# if ckan.verbose_extra:
|
|
294
|
+
# print(f"Launching multi-threaded upload of multi-file resource {self.name}")
|
|
295
|
+
# futures = [executor.submit(self.upload_request_graceful, ckan=ckan, file_path=file_path, index=index, package_id=package_id,
|
|
296
|
+
# start_index=start_index, end_index=end_index, external_stop_event=external_stop_event,
|
|
297
|
+
# excluded_files=excluded_files, reupload=reupload, only_missing=only_missing)
|
|
298
|
+
# for index, file_path in enumerate(self.get_local_file_generator(resources_base_dir=resources_base_dir, excluded_files=excluded_files))]
|
|
299
|
+
# for future in futures:
|
|
300
|
+
# future.result() # This will propagate the exception
|
|
301
|
+
# total = self.get_local_file_len()
|
|
302
|
+
# self._call_progress_callback(total, total, context=f"{ckan.identifier} multi-thread upload")
|
|
303
|
+
# except Exception as e:
|
|
304
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
305
|
+
# if ckan.verbose_extra:
|
|
306
|
+
# print(f"Stopping all threads because an exception occurred: {e}")
|
|
307
|
+
# raise e from e
|
|
308
|
+
# finally:
|
|
309
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
310
|
+
# if ckan.verbose_extra:
|
|
311
|
+
# print("End of multi-threaded upload...")
|
|
312
|
+
# # at last, apply final actions:
|
|
313
|
+
# self.upload_request_final(ckan)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
## download ------------------------------------------------
|
|
317
|
+
def list_remote_resources(self, ckan:CkanApi, *, excluded_resource_names:Set[str]=None,
|
|
318
|
+
cancel_if_present: bool = True) -> List[str]:
|
|
319
|
+
"""
|
|
320
|
+
Defines the list of resources to download that correspond to the definition and are not used in mono-resources.
|
|
321
|
+
|
|
322
|
+
:param ckan:
|
|
323
|
+
:param excluded_resource_names: resource names of mono-resources
|
|
324
|
+
:param cancel_if_present:
|
|
325
|
+
:return:
|
|
326
|
+
"""
|
|
327
|
+
if cancel_if_present and self.remote_resource_names is not None and self.excluded_resource_names == excluded_resource_names:
|
|
328
|
+
return self.remote_resource_names
|
|
329
|
+
if excluded_resource_names is None:
|
|
330
|
+
excluded_resource_names = set()
|
|
331
|
+
package_info = ckan.get_package_info_or_request(self.package_name)
|
|
332
|
+
resource_names = set(package_info.resources_id_index.keys())
|
|
333
|
+
# resource_name_glob = self.name
|
|
334
|
+
resource_name_glob = glob_name(self.dir_name)
|
|
335
|
+
filtered_resource_names = set(fnmatch.filter(resource_names, resource_name_glob)) # apply name as wildcard filter
|
|
336
|
+
filtered_resource_names = filtered_resource_names - excluded_resource_names
|
|
337
|
+
self.remote_resource_names = sorted(list(filtered_resource_names))
|
|
338
|
+
self.excluded_resource_names = excluded_resource_names
|
|
339
|
+
return self.remote_resource_names
|
|
340
|
+
|
|
341
|
+
def list_remote_resource_ids(self, ckan:CkanApi, *, excluded_resource_names:Set[str]=None,
|
|
342
|
+
cancel_if_present: bool = True) -> List[str]:
|
|
343
|
+
resource_names = self.list_remote_resources(ckan, excluded_resource_names=excluded_resource_names,
|
|
344
|
+
cancel_if_present=cancel_if_present)
|
|
345
|
+
resource_ids = [ckan.map.get_resource_id(resource_name, package_name=self.package_name) for resource_name in resource_names]
|
|
346
|
+
return resource_ids
|
|
347
|
+
|
|
348
|
+
def init_download_file_query_list(self, ckan: CkanApi, out_dir: str=None,
|
|
349
|
+
cancel_if_present: bool = True,
|
|
350
|
+
excluded_resource_names:Set[str]=None, **kwargs) -> List[str]:
|
|
351
|
+
if out_dir is not None:
|
|
352
|
+
dir_tables = resolve_rel_path(out_dir, glob_rm_glob(self.dir_name, default_rec_dir=self.name), field=f"File/URL of resource {self.name}")
|
|
353
|
+
os.makedirs(dir_tables, exist_ok=True)
|
|
354
|
+
return self.list_remote_resources(ckan, cancel_if_present=cancel_if_present, excluded_resource_names=excluded_resource_names)
|
|
355
|
+
|
|
356
|
+
def get_file_query_generator(self) -> Generator[str, Any, None]:
|
|
357
|
+
for resource_name in self.remote_resource_names:
|
|
358
|
+
yield resource_name
|
|
359
|
+
|
|
360
|
+
def get_file_query_len(self) -> int:
|
|
361
|
+
if self.remote_resource_names is None:
|
|
362
|
+
raise RuntimeError("init_download_file_query_list must be called first")
|
|
363
|
+
return len(self.remote_resource_names)
|
|
364
|
+
|
|
365
|
+
def download_file_query_item(self, ckan: CkanApi, out_dir: str, file_query_item: str) \
|
|
366
|
+
-> Tuple[Union[str,None], Union[requests.Response,None]]:
|
|
367
|
+
resource_name = file_query_item
|
|
368
|
+
file_out = None
|
|
369
|
+
if out_dir is not None:
|
|
370
|
+
file_out = resolve_rel_path(out_dir, glob_rm_glob(self.dir_name, default_rec_dir=self.name), resource_name, field=f"File/URL of resource {self.name}")
|
|
371
|
+
if self.download_skip_existing and os.path.exists(file_out):
|
|
372
|
+
if ckan.params.verbose_extra:
|
|
373
|
+
print(f"Skipping existing file {file_out}")
|
|
374
|
+
return file_out, None
|
|
375
|
+
resource_id = ckan.map.get_resource_id(resource_name, package_name=self.package_name)
|
|
376
|
+
resource_info, response = ckan.resource_download(resource_id)
|
|
377
|
+
if out_dir is not None:
|
|
378
|
+
with open(file_out, 'wb') as f:
|
|
379
|
+
f.write(response.content)
|
|
380
|
+
else:
|
|
381
|
+
file_out = None
|
|
382
|
+
return file_out, response
|
|
383
|
+
|
|
384
|
+
def download_request_generator(self, ckan: CkanApi, out_dir: str,
|
|
385
|
+
excluded_resource_names:Set[str]=None) -> Generator[Tuple[Union[str,None], Union[requests.Response,None]], Any, None]:
|
|
386
|
+
self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True,
|
|
387
|
+
excluded_resource_names=excluded_resource_names)
|
|
388
|
+
for file_query_item in self.get_file_query_generator():
|
|
389
|
+
yield self.download_file_query_item(ckan=ckan, out_dir=out_dir, file_query_item=file_query_item)
|
|
390
|
+
|
|
391
|
+
def _unit_download_apply(self, ckan:CkanApi, file_query_item:Any, out_dir:str,
|
|
392
|
+
index:int, start_index:int, end_index:int, total:int, excluded_resource_names:Set[str]) -> Any:
|
|
393
|
+
if start_index <= index and index < end_index and file_query_item not in excluded_resource_names:
|
|
394
|
+
self._call_progress_callback(index, total, info=file_query_item,
|
|
395
|
+
context=f"{ckan.identifier} single-thread download")
|
|
396
|
+
self.download_file_query_item(ckan=ckan, out_dir=out_dir, file_query_item=file_query_item)
|
|
397
|
+
else:
|
|
398
|
+
pass
|
|
399
|
+
# self._call_progress_callback(index, total, info=file_query_item, context=f"{ckan.identifier} single-thread skip")
|
|
400
|
+
|
|
401
|
+
def download_request_full(self, ckan: CkanApi, out_dir: str, threads:int=1, external_stop_event=None,
|
|
402
|
+
start_index:int=0, end_index:int=None, force:bool=False,
|
|
403
|
+
excluded_resource_names:Set[str]=None) -> None:
|
|
404
|
+
return super().download_request_full(ckan=ckan, out_dir=out_dir, threads=threads,
|
|
405
|
+
external_stop_event=external_stop_event,
|
|
406
|
+
start_index=start_index, end_index=end_index, force=force,
|
|
407
|
+
excluded_resource_names=excluded_resource_names)
|
|
408
|
+
# if (not self.enable_download) and (not force):
|
|
409
|
+
# msg = f"Did not download resource {self.name} because download was disabled."
|
|
410
|
+
# warn(msg)
|
|
411
|
+
# return None
|
|
412
|
+
# if threads < 0:
|
|
413
|
+
# # do not download large datasets in this case
|
|
414
|
+
# return None
|
|
415
|
+
# elif threads is None or threads > 1:
|
|
416
|
+
# return self.download_request_full_multi_threaded(ckan=ckan, out_dir=out_dir,
|
|
417
|
+
# threads=threads, external_stop_event=external_stop_event,
|
|
418
|
+
# start_index=start_index, end_index=end_index,
|
|
419
|
+
# excluded_resource_names=excluded_resource_names)
|
|
420
|
+
# else:
|
|
421
|
+
# self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True,
|
|
422
|
+
# excluded_resource_names=excluded_resource_names)
|
|
423
|
+
# if ckan.verbose_extra:
|
|
424
|
+
# print(f"Launching single-threaded download of multi-file resource {self.name}")
|
|
425
|
+
# total = self.get_file_query_len()
|
|
426
|
+
# end_index = positive_end_index(end_index, total)
|
|
427
|
+
# for index, file_query_item in enumerate(self.get_file_query_generator()):
|
|
428
|
+
# if external_stop_event is not None and external_stop_event.is_set():
|
|
429
|
+
# print(f"{ckan.identifier} Interrupted")
|
|
430
|
+
# return
|
|
431
|
+
# self._unit_download_apply(ckan=ckan, file_query_item=file_query_item, out_dir=out_dir,
|
|
432
|
+
# index=index, start_index=start_index, end_index=end_index, total=total,
|
|
433
|
+
# excluded_resource_names=excluded_resource_names)
|
|
434
|
+
# self._call_progress_callback(total, total, context=f"{ckan.identifier} single-thread download")
|
|
435
|
+
|
|
436
|
+
# def download_file_query_item_graceful(self, ckan: CkanApi, out_dir: str, resource_name: str, index:int,
|
|
437
|
+
# external_stop_event=None, start_index:int=0, end_index:int=None,
|
|
438
|
+
# excluded_resource_names:Set[str]=None) -> None:
|
|
439
|
+
# """
|
|
440
|
+
# Implementation of download_file_query_item with checks for a multi-threaded download.
|
|
441
|
+
# """
|
|
442
|
+
# # ckan.session_reset()
|
|
443
|
+
# # ckan.identifier = current_thread().name
|
|
444
|
+
# ckan = self.thread_ckan[current_thread().name]
|
|
445
|
+
# total = self.get_file_query_len()
|
|
446
|
+
# end_index = positive_end_index(end_index, total)
|
|
447
|
+
# if self.stop_event.is_set():
|
|
448
|
+
# return
|
|
449
|
+
# if external_stop_event is not None and external_stop_event.is_set():
|
|
450
|
+
# print(f"{ckan.identifier} Interrupted")
|
|
451
|
+
# return
|
|
452
|
+
# try:
|
|
453
|
+
# self._unit_download_apply(ckan=ckan, file_query_item=file_query_item, out_dir=out_dir,
|
|
454
|
+
# index=index, start_index=start_index, end_index=end_index, total=total,
|
|
455
|
+
# excluded_resource_names=excluded_resource_names)
|
|
456
|
+
# except Exception as e:
|
|
457
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
458
|
+
# if ckan.verbose_extra:
|
|
459
|
+
# print(f"Stopping all threads because an exception occurred in thread: {e}")
|
|
460
|
+
# raise e from e
|
|
461
|
+
|
|
462
|
+
# def download_request_full_multi_threaded(self, ckan: CkanApi, out_dir: str,
|
|
463
|
+
# threads: int = None, external_stop_event=None,
|
|
464
|
+
# start_index:int=0, end_index:int=-1,
|
|
465
|
+
# excluded_resource_names:Set[str]=None) -> None:
|
|
466
|
+
# """
|
|
467
|
+
# Multi-threaded implementation of download_request_full using ThreadPoolExecutor.
|
|
468
|
+
# """
|
|
469
|
+
# self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True, excluded_resource_names=excluded_resource_names)
|
|
470
|
+
# self._prepare_for_multithreading(ckan)
|
|
471
|
+
# try:
|
|
472
|
+
# with ThreadPoolExecutor(max_workers=threads, initializer=self._init_thread, initargs=(ckan,)) as executor:
|
|
473
|
+
# if ckan.verbose_extra:
|
|
474
|
+
# print(f"Launching multi-threaded download of multi-file resource {self.name}")
|
|
475
|
+
# futures = [executor.submit(self.download_file_query_item_graceful, ckan=ckan, out_dir=out_dir, resource_name=resource_name,
|
|
476
|
+
# index=index, external_stop_event=external_stop_event, start_index=start_index, end_index=end_index,
|
|
477
|
+
# excluded_resource_names=excluded_resource_names)
|
|
478
|
+
# for index, resource_name in enumerate(self.get_file_query_generator())]
|
|
479
|
+
# for future in futures:
|
|
480
|
+
# future.result() # This will propagate the exception
|
|
481
|
+
# total = self.get_file_query_len()
|
|
482
|
+
# self._call_progress_callback(total, total, context=f"multi-thread download")
|
|
483
|
+
# except Exception as e:
|
|
484
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
485
|
+
# if ckan.verbose_extra:
|
|
486
|
+
# print(f"Stopping all threads because an exception occurred: {e}")
|
|
487
|
+
# raise e from e
|
|
488
|
+
# finally:
|
|
489
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
490
|
+
# if ckan.verbose_extra:
|
|
491
|
+
# print("End of multi-threaded download...")
|
|
492
|
+
|
|
493
|
+
def download_sample(self, ckan:CkanApi, full_download:bool=True, **kwargs) -> Union[bytes, None]:
|
|
494
|
+
return None
|
|
495
|
+
|
|
496
|
+
def download_request(self, ckan: CkanApi, out_dir: str, *, full_download:bool=True, threads:int=1,
|
|
497
|
+
force:bool=False, excluded_resource_names:Set[str]=None, **kwargs) -> None:
|
|
498
|
+
if full_download:
|
|
499
|
+
return self.download_request_full(ckan=ckan, out_dir=out_dir, threads=threads, force=force,
|
|
500
|
+
excluded_resource_names=excluded_resource_names, **kwargs)
|
|
501
|
+
|
|
502
|
+
def resource_info_request(self, ckan:CkanApi, error_not_found:bool=True) -> Union[CkanResourceInfo, None]:
|
|
503
|
+
return None # there are multiple resource ids => do not return info
|
|
504
|
+
def _to_ckan_resource_info(self, package_id:str, check_id:bool=True) -> CkanResourceInfo:
|
|
505
|
+
return None
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Section of the package dedicated to the initialization of a CKAN package
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
# usage shortcuts
|
|
10
|
+
self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
|
11
|
+
example_package_dir = os.path.join(self_dir, "package")
|
|
12
|
+
from ..builder_package import example_package_xls
|
|
13
|
+
|
|
14
|
+
from . import builder_example
|
|
15
|
+
from . import builder_example_aux_fun
|
|
16
|
+
from . import builder_example_generate_data
|
|
17
|
+
from . import builder_example_patch_upload
|
|
18
|
+
from . import builder_example_tests
|
|
19
|
+
from . import builder_example_policy
|
|
20
|
+
from . import builder_example_download
|
|
21
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Function to load the example package
|
|
5
|
+
"""
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from ckanapi_harvesters.builder.builder_package import BuilderPackage
|
|
9
|
+
from ckanapi_harvesters.builder.example import example_package_xls
|
|
10
|
+
|
|
11
|
+
def load_example_package() -> BuilderPackage:
|
|
12
|
+
BuilderPackage.unlock_external_code_execution()
|
|
13
|
+
mdl = BuilderPackage.from_excel(example_package_xls)
|
|
14
|
+
return BuilderPackage(src=mdl)
|
|
15
|
+
|
|
16
|
+
def load_help_page_df(*, engine:str=None) -> pd.DataFrame:
|
|
17
|
+
with pd.ExcelFile(example_package_xls, engine=engine) as help_file:
|
|
18
|
+
help_df = pd.read_excel(help_file, sheet_name="help", header=None)
|
|
19
|
+
help_file.close()
|
|
20
|
+
return help_df
|
|
21
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Auxiliary functions for package upload/download example
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
def users_upload(df_users: pd.DataFrame, file_name:str, **kwargs) -> pd.DataFrame:
|
|
10
|
+
print("<<< Upload function example called on users dataframe containing ids " + ",".join([str(id) for id in df_users["user_id"].to_list()]))
|
|
11
|
+
print(f"<<< File {file_name}")
|
|
12
|
+
return df_users
|
|
13
|
+
|
|
14
|
+
def users_download(df_users: pd.DataFrame, file_query, **kwargs) -> pd.DataFrame:
|
|
15
|
+
print("<<< Download function example called on users dataframe containing ids " + ",".join([str(id) for id in df_users["user_id"].to_list()]))
|
|
16
|
+
print(f"<<< File query {file_query}")
|
|
17
|
+
return df_users
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if __name__ == '__main__':
|
|
21
|
+
df_users = pd.DataFrame({"user_id": [1, 2, 3]})
|
|
22
|
+
df_users = users_upload(df_users)
|
|
23
|
+
print(df_users)
|
|
24
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Example code to download the builder example from a CKAN server
|
|
5
|
+
"""
|
|
6
|
+
from typing import Tuple
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from ckanapi_harvesters.builder.builder_package import BuilderPackage
|
|
14
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
15
|
+
|
|
16
|
+
from ckanapi_harvesters.builder.example import example_package_xls
|
|
17
|
+
self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
|
18
|
+
example_package_download_dir = os.path.abspath("package_download")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run(ckan:CkanApi = None):
|
|
22
|
+
BuilderPackage.unlock_external_code_execution()
|
|
23
|
+
|
|
24
|
+
mdl = BuilderPackage.from_excel(example_package_xls)
|
|
25
|
+
ckan = mdl.init_ckan(ckan)
|
|
26
|
+
ckan.input_missing_info(input_args_if_necessary=True, input_owner_org=True)
|
|
27
|
+
ckan.set_verbosity(True)
|
|
28
|
+
|
|
29
|
+
# download into example_package_download_dir
|
|
30
|
+
threads = 3 # > 1: number of threads to download large datasets
|
|
31
|
+
mdl.download_request_full(ckan, example_package_download_dir, full_download=True, threads=threads,
|
|
32
|
+
skip_existing=False, rm_dir=True)
|
|
33
|
+
|
|
34
|
+
print("Package downloaded in")
|
|
35
|
+
print(example_package_download_dir)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == '__main__':
|
|
39
|
+
ckan = CkanApi(None)
|
|
40
|
+
ckan.initialize_from_cli_args()
|
|
41
|
+
run(ckan)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|